#include <u.h>
#include <libc.h>
#include <bio.h>
#include "dict.h"
/*
* Use this to start making an index for a new dictionary.
* Get the dictionary-specific nextoff and printentry(_,'h')
* commands working, add a record to the dicts[] array below,
* and run this program to get a list of offset,headword
* pairs
*/
Biobuf boutbuf;
Biobuf *bdict;
Biobuf *bout = &boutbuf;
int linelen;
int breaklen = 2000;
int outinhibit;
int debug;
Dict *dict; /* current dictionary */
Entry getentry(long);
void
usage(void)
{
fprint(2, "usage: mkindex [-D] [-d dictname]\n");
exits("usage");
}
void
main(int argc, char **argv)
{
int i;
long a, ae;
char *p;
Entry e;
Binit(&boutbuf, 1, OWRITE);
dict = &dicts[0];
ARGBEGIN{
case 'd':
dict = 0;
p = EARGF(usage());
for(i=0; dicts[i].name; i++)
if(strcmp(p, dicts[i].name)==0) {
dict = &dicts[i];
break;
}
if(!dict) {
err("unknown dictionary: %s", p);
exits("nodict");
}
break;
case 'D':
debug++;
break;
}ARGEND
bdict = Bopen(dict->path, OREAD);
if(!bdict) {
err("can't open dictionary %s", dict->path);
exits("nodict");
}
ae = Bseek(bdict, 0, 2);
for(a = 0; a < ae; a = (*dict->nextoff)(a+1)) {
linelen = 0;
e = getentry(a);
Bprint(bout, "%ld\t", a);
linelen = 4; /* only has to be approx right */
(*dict->printentry)(e, 'h');
}
exits(0);
}
Entry
getentry(long b)
{
long e, n, dtop;
static Entry ans;
static int anslen = 0;
e = (*dict->nextoff)(b+1);
ans.doff = b;
if(e < 0) {
dtop = Bseek(bdict, 0L, 2);
if(b < dtop) {
e = dtop;
} else {
err("couldn't seek to entry");
ans.start = 0;
ans.end = 0;
}
}
n = e-b;
if(n) {
if(n > anslen) {
ans.start = realloc(ans.start, n);
if(!ans.start)
sysfatal("realloc: %r");
anslen = n;
}
Bseek(bdict, b, 0);
n = Bread(bdict, ans.start, n);
ans.end = ans.start + n;
}
return ans;
}
|