#!/bin/rc
if(~ $#uconv 0)
uconv=8.uconv
unicodedata = unicodedata
for(i)
unicodedata = $i
fn Sprint {
$uconv
}
fn Unicode {
grep $rune < $unicodedata | tr -d '\015'
}
hex='
function hex0(s, base, r, n, i, k, c)
{
base = 16;
if(s ~ /^0[xX]/)
s = substr(str, 3);
if(s !~ /^[0-9a-fA-f]*/)
return 0;
n = length(s)
r = 0
for (i = 1; i <= n; i++) {
c = tolower(substr(s, i, 1))
k = index("0123456789abcdef", c) - 1;
r = r * base + k
}
return r
}
'
awk '-F;' '
' ^ $hex ^ '
function hex(x){
return strtonum("0x" x);
}
BEGIN {
}
$3 ~ /^L.$/ {
cp = hex($1);
if(cp > last)
last = cp
# if(length($14)>=4)
# lc[cp] = $14
if(length($6) >= 4 && $6 !~ /<.*/)
val[cp] = $6
}
END {
for(i = 0; i < last; i++){
cp = i
while(length(r = val[cp]) > 0){
cp = hex(r)
if(cp == 0)
print "broke at [" $0 "] r= " r
}
# if(length(r = lc[cp]) > 0)
# cp = hex(r)
if(cp != i){
str = sprintf("\\u%04x", i)
tab[cp] = tab[cp] str
cmt[cp] = cmt[cp] sprintf("\\u%04x", i);
}
}
print "static";
print "Rune\t*__unfoldbase[] =";
print "{";
for(i = 0; i < last; i++)
if(length(tab[i]) > 0){
str = sprintf("\\u%04x", i)
print "/* " str " */\tL\"" str tab[i] "\","
# print "\t\"" str tab[i] "\", /* " str " */"
}
print "};";
print "";
} ' <{Unicode} | Sprint
|