#!/bin/rc
hget $1 | uhtml | tr -d '
' | sed '
s!>[^<]*<!!g
s!^XXX!!g
s!(href|src|link)\=''([^'']+)''!\nXXX \2\n!g
s!(href|src|link)\="([^"]+)"!\nXXX \2\n!g
s!(href|src|link)\=([^ >]+)!\nXXX \2\n!g' |
awk '
BEGIN {
b="'$1'"
}
/^XXX/ {
u=substr($0, 5)
if(match(u, /^(mailto|javascript|file|ftp|gopher):/))
next
if(index(b, u) == 1)
next
if(index(u, b) == 1)
u=substr(u, 1+length(b))
if(match(u, /[\?:;]/)){
printf "# hget -b ''%s'' ''%s''\n", b, u
next
}
f=u
sub(/#.*$/, "", f)
sub(/\/$/, "/index.html", f)
sub(/^\//, "", f)
if(index(f, "/")){
d=f
sub(/\/([^\/]+)$/, "", d);
printf "mkdir -p ''%s''; ", d
}
printf "hget -b ''%s'' ''%s'' >''%s''\n", b, u, f
}' | sort | uniq
|