# include "stdio.h"
# include "error.h"
#include "inv.h"
void
main(int argc, char **argv)
{
/* make inverted file indexes. Reads a stream from mkey which
gives record pointer items and keys. Generates a set of files
a. NHASH pointers to file b.
b. lists of record numbers.
c. record pointer items.
these files are named xxx.ia, xxx.ib, xxx.ic;
where xxx is taken from arg1.
If the files exist they are updated.
*/
FILE * fa, *fb, *fc, *fta, *ftb, *fd;
int nhash = 256;
int appflg = 1;
int keepkey = 0, pipein = 0;
char nma[100], nmb[100], nmc[100], com[100], nmd[100];
char tmpa[20], tmpb[20], tmpc[20];
char *remf = NULL;
int chatty = 0, docs, hashes, status;
long keys;
char *sortdir;
progname = mkprogname(argv[0]);
sortdir = "/tmp";
for (; argc > 1 && argv[1][0] == '-'; argc--, argv++) {
switch (argv[1][1]) {
case 'h': /* size of hash table */
nhash = atoi (argv[1] + 2);
break;
case 'n': /* new, don't append */
appflg = 0;
break;
case 'a': /* append to old file */
appflg = 1;
break;
case 'v': /* verbose output */
chatty = 1;
break;
case 'd': /* keep keys on file .id for check on searching */
keepkey = 1;
break;
case 'p': /* pipe into sort (saves space, costs time)*/
pipein = 1;
break;
case 'i': /* input is on file, not stdin */
close(0);
if (open(argv[2], 0) != 0)
err("can't read input %s", argv[2]);
if (argv[1][2] == 'u') /* remove */
remf = argv[2];
argc--;
argv++;
break;
case 'T':
sortdir = &argv[1][2];
break;
}
}
strcpy(nma, argc >= 2 ? argv[1] : "Index");
strcpy(nmb, nma);
strcpy(nmc, nma);
strcpy(nmd, nma);
strcat(nma, ".ia");
strcat(nmb, ".ib");
strcat(nmc, ".ic");
strcat(nmd, ".id");
sprintf(tmpa, "junk%di", getpid());
if (pipein) {
sprintf(com, "/bin/sort -T %s -o %s", sortdir, tmpa);
if ((fta = popen(com, "w")) == NULL)
err("can't pipe into %s", com);
} else
fta = efopen(tmpa, "w"); /* use tmp file */
if (appflg) {
if ((fb = fopen(nmb, "r")) != NULL) {
sprintf(tmpb, "junk%dj", getpid());
ftb = efopen(tmpb, "w");
nhash = recopy(ftb, fb, fopen(nma, "r"), nhash);
fclose(ftb);
fclose(fb);
} else
appflg = 0;
}
fc = efopen(nmc, appflg ? "a" : "w");
fd = keepkey? efopen(nmd, "w") : 0;
docs = newkeys(fta, stdin, fc, nhash, fd);
fclose(stdin);
if (remf != NULL)
remove(remf);
if (pipein) {
status = pclose(fta);
if (status != 0)
err("pipe into sort failed");
} else {
fclose(fta);
sprintf(com, "/bin/sort -T %s %s -o %s", sortdir, tmpa, tmpa);
system(com);
}
if (appflg) {
sprintf(tmpc, "junk%dk", getpid());
rename(tmpa, tmpc);
sprintf(com, "/bin/sort -T %s -m %s %s -o %s", sortdir, tmpb, tmpc, tmpa);
system(com);
}
fta = efopen(tmpa, "r");
fa = efopen(nma, "w");
fb = efopen(nmb, "w");
whash(fta, fa, fb, nhash, &keys, &hashes);
fclose(fta);
# ifndef D1
remove(tmpa);
# endif
if (appflg) {
remove(tmpb);
remove(tmpc);
}
if (chatty)
printf ("%ld key occurrences, %d hashes, %d docs\n",
keys, hashes, docs);
exit(0);
}
|