#include "refer.h"
#ifndef D1
#define D1 0
#endif
typedef struct Taglist {
List drop; /* candidate tag pointers not yet eliminated */
List count; /* count.el[i] = number of query items having drop.el[i] in list */
} Taglist;
static Taglist sprev; /* List space re-used to reduce number of alloc req */
static Taglist scurr; /* ... */
static List hh; /* ... query item hash codes */
static long *hfreq; /* frequency reference for hcomp */
#define Reset(mp) ((mp)->drop.n = (mp)->count.n = 0)
static int hcomp(int, int);
static void hexch(int, int);
#if D1
static void dnote(int, Taglist *, char *);
#define D(x) x
#else
#define dnote(a,b,c)
#define D(x)
#endif
List *doquery(Index *ind, FILE *fb, int nitem, char *qitem[])
{
Taglist *cur = &scurr, *prev = &sprev;
Fpos *hpt;
int best, nterm, i, j;
Fpos lp, k;
hpt = ind->hash.el;
hfreq = ind->freq.el;
D(fprintf(stderr, "doquery; nitem %d\n", nitem));
growlist(&hh, nitem);
hh.n = 0;
for (i = 0; i < nitem; i++)
hh.el[i] = hash(qitem[i]) % ind->n;
if (prfreqs || D1)
for (i = 0; i < nitem; i++)
fprintf(stderr, "item %s hash %d hfreq %ld\n", qitem[i], hh.el[i], hfreq[hh.el[i]]);
/* if possible, sort query: least common hash code first */
if (hfreq)
shell(nitem, hcomp, hexch);
/*
* read the tag list for the item with the shortest tag list
*/
lp = hpt[hh.el[0]];
D(fprintf(stderr, "first item hash %d lp %ld\n", hh.el[0], lp));
Reset(cur);
xseek(fb, lp, 0);
while ((lp = getl(fb)) != -1) {
applist(&cur->drop, lp);
applist(&cur->count, 1); /* ie, lp has appeared once */
}
/*
* intersect it with the tag list for each remaining item.
* since the tag lists are sorted by key, the intersection
* looks like a 2-way merge. note that when the co-ordination
* level is non-zero (not all items need match), new items can be added
* to the current tag list.
*/
for (nterm = 1; nterm < nitem; nterm++) {
int nf;
D(fprintf(stderr, "item %d, hash %d\n", nterm, hh.el[nterm]));
{ Taglist *mp = prev; prev = cur; cur = mp; } /* exchange */
Reset(cur);
nf = prev->drop.n; /* scan prev to build new cur */
lp = hpt[hh.el[nterm]]; /* tag list for this query item's hash code */
xseek(fb, lp, 0);
D(fprintf(stderr, "item %d hash %d seek to %ld\n", nterm, hh.el[nterm], lp));
for (j=0; (k=getl(fb)) != -1;) { /* each tag reference */
D(fprintf(stderr, "next term finds tag %ld\n", k));
/* skip entries that can't match this one, unless colevel gives them a chance */
for (; j < nf && prev->drop.el[j] < k; j++)
if (prev->count.el[j] + colevel > nterm) {
applist(&cur->drop, prev->drop.el[j]);
applist(&cur->count, prev->count.el[j]);
dnote(hh.el[nterm], cur, "keep/co");
}
if (colevel == 0 && j >= nf)
break;
if (j < nf && prev->drop.el[j] == k) {
applist(&cur->drop, k);
applist(&cur->count, prev->count.el[j] + 1);
j++;
dnote(hh.el[nterm], cur, "keep");
} else if (colevel >= nterm) {
/* previous entries don't include this, */
/* but nitem-colevel entries might match later */
applist(&cur->drop, k);
applist(&cur->count, 1);
dnote(hh.el[nterm], cur, "add/co");
}
}
if (colevel > 0)
for (; j < nf; j++)
if (prev->count.el[j] + colevel > nterm) {
applist(&cur->drop, prev->drop.el[j]);
applist(&cur->count, prev->count.el[j]);
dnote(hh.el[nterm], cur, "copy/co");
}
D(fprintf(stderr, "now have %d items\n", cur->drop.n));
}
if (colevel > 0) { /* save just the "best" entries in cur */
best = 0;
for (j = 0; j < cur->drop.n; j++)
if (cur->count.el[j] > best)
best = cur->count.el[j];
D(fprintf(stderr, "colevel %d best %d\n", colevel, best));
reached = best;
{ Taglist *mp = prev; prev = cur; cur = mp; } /* exchange */
Reset(cur);
for (j = 0; j < prev->drop.n; j++)
if (prev->count.el[j] == best)
applist(&cur->drop, prev->drop.el[j]);
D(fprintf(stderr, "now have %d items\n", cur->drop.n));
}
D(for (j = 0; j < cur->drop.n; j++)
fprintf(stderr, ":%ld\n", cur->drop.el[j]));
return &cur->drop;
}
static int hcomp(int n1, int n2)
{
return hfreq[hh.el[n1]] <= hfreq[hh.el[n2]];
}
static void hexch(int n1, int n2)
{
int t;
t = hh.el[n1]; hh.el[n1] = hh.el[n2]; hh.el[n2] = t;
}
#if D1
static void dnote(int i, Taglist *mp, char *why)
{
fprintf(stderr, "item %d %s tag %ld (#%d)\n",
i, why, mp->drop.el[mp->drop.n-1], mp->count.el[mp->drop.n-1]);
}
#endif
|