#!/bin/rc
if(~ $#uconv 0)
uconv=8.uconv
unicodedata = UnicodeData.txt
for(i)
unicodedata = $i
fn Sprint {
$uconv
}
fn Unicode {
grep $rune < $unicodedata | tr -d '\015'
}
hex='
function hex0(s, base, r, n, i, k, c)
{
base = 16;
if(s ~ /^0[xX]/)
s = substr(str, 3);
if(s !~ /^[0-9a-fA-f]*/)
return 0;
n = length(s)
r = 0
for (i = 1; i <= n; i++) {
c = tolower(substr(s, i, 1))
k = index("0123456789abcdef", c) - 1;
r = r * base + k
}
return r
}
'
awk '-F;' '
' ^ $hex ^ '
function hex(x){
return strtonum("0x" x);
}
BEGIN {
}
$3 ~ /^L.$/ {
cp = hex($1);
if(cp > last)
last = cp
if(length($14)>=4)
lc[cp] = $14
if(length($6) >= 4 && $6 !~ /<.*/)
val[cp] = $6
}
END {
print "static";
print "Rune\t__base2[] =";
print "{";
for(i = 0; i < last; i++){
cp = i
while(length(r = val[cp]) > 0){
cp = hex(r)
if(cp == 0)
print "broke at [" $0 "] r= " r
}
# if(length(r = lc[cp]) > 0)
# cp = hex(r)
if(cp != i)
printf("\t0x%04x,\t0x%04x,\t/* \\u%04x \\u%04x */\n", i, cp, i, cp);
}
print "};";
print "";
} ' <{Unicode} | Sprint
|