Plan 9 from Bell Labs’s /usr/web/sources/contrib/steve/root/sys/src/cmd/refer/doquery.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#include "refer.h"

#ifndef D1
#define	D1 0
#endif

typedef struct Taglist {
	List	drop;	/* candidate tag pointers not yet eliminated */
	List	count;	/* count.el[i] = number of query items having drop.el[i] in list */
} Taglist;
static	Taglist sprev;	/* List space re-used to reduce number of alloc req */
static	Taglist scurr;	/* ... */
static	List	hh;	/* ...  query item hash codes */
static	long	*hfreq;	/* frequency reference for hcomp */

#define	Reset(mp) ((mp)->drop.n = (mp)->count.n = 0)

static int hcomp(int, int);
static void hexch(int, int);
#if D1
static void dnote(int, Taglist *, char *);
#define	D(x) x
#else
#define	dnote(a,b,c)
#define	D(x)
#endif

List *doquery(Index *ind, FILE *fb, int nitem, char *qitem[])
{
	Taglist *cur = &scurr, *prev = &sprev;
	Fpos *hpt;
	int best, nterm, i, j;
	Fpos lp, k;

	hpt = ind->hash.el;
	hfreq = ind->freq.el;
	D(fprintf(stderr, "doquery; nitem %d\n", nitem));
	growlist(&hh, nitem);
	hh.n = 0;
	for (i = 0; i < nitem; i++)
		hh.el[i] = hash(qitem[i]) % ind->n;
	if (prfreqs || D1)
		for (i = 0; i < nitem; i++)
			fprintf(stderr, "item %s hash %d hfreq %ld\n", qitem[i], hh.el[i], hfreq[hh.el[i]]);
	/* if possible, sort query: least common hash code first */
	if (hfreq)
		shell(nitem, hcomp, hexch);
	/*
	 * read the tag list for the item with the shortest tag list
	 */
	lp = hpt[hh.el[0]];
	D(fprintf(stderr, "first item hash %d lp %ld\n", hh.el[0], lp));
	Reset(cur);
	xseek(fb, lp, 0);
	while ((lp = getl(fb)) != -1) {
		applist(&cur->drop, lp);
		applist(&cur->count, 1);	/* ie, lp has appeared once */
	}
	/*
	 * intersect it with the tag list for each remaining item.
	 * since the tag lists are sorted by key, the intersection
	 * looks like a 2-way merge.  note that when the co-ordination
	 * level is non-zero (not all items need match), new items can be added
	 * to the current tag list.
	 */
	for (nterm = 1; nterm < nitem; nterm++) {
		int nf;
		D(fprintf(stderr, "item %d, hash %d\n", nterm, hh.el[nterm]));
		{ Taglist *mp = prev; prev = cur; cur = mp; }	/* exchange */
		Reset(cur);
		nf = prev->drop.n;	/* scan prev to build new cur */
		lp = hpt[hh.el[nterm]]; /* tag list for this query item's hash code */
		xseek(fb, lp, 0);
		D(fprintf(stderr, "item %d hash %d seek to %ld\n", nterm, hh.el[nterm], lp));
		for (j=0; (k=getl(fb)) != -1;) {	/* each tag reference */
			D(fprintf(stderr, "next term finds tag %ld\n", k));
			/* skip entries that can't match this one, unless colevel gives them a chance */
			for (; j < nf && prev->drop.el[j] < k; j++)
				if (prev->count.el[j] + colevel > nterm) {
					applist(&cur->drop, prev->drop.el[j]);
					applist(&cur->count, prev->count.el[j]);
					dnote(hh.el[nterm], cur, "keep/co");
				}
			if (colevel == 0 && j >= nf) 
				break;
			if (j < nf && prev->drop.el[j] == k) {
				applist(&cur->drop, k);
				applist(&cur->count, prev->count.el[j] + 1);
				j++;
				dnote(hh.el[nterm], cur, "keep");
			} else if (colevel >= nterm) {
				/* previous entries don't include this, */
				/* but nitem-colevel entries might match later */
				applist(&cur->drop, k);
				applist(&cur->count, 1);
				dnote(hh.el[nterm], cur, "add/co");
			}
		}
		if (colevel > 0)
			for (; j < nf; j++)
				if (prev->count.el[j] + colevel > nterm) {
					applist(&cur->drop, prev->drop.el[j]);
					applist(&cur->count, prev->count.el[j]);
					dnote(hh.el[nterm], cur, "copy/co");
				}
		D(fprintf(stderr, "now have %d items\n", cur->drop.n));
	}
	if (colevel > 0) {	/* save just the "best" entries in cur */
		best = 0;
		for (j = 0; j < cur->drop.n; j++)
			if (cur->count.el[j] > best) 
				best = cur->count.el[j];
		D(fprintf(stderr, "colevel %d best %d\n", colevel, best));
		reached = best;
		{ Taglist *mp = prev; prev = cur; cur = mp; }	/* exchange */
		Reset(cur);
		for (j = 0; j < prev->drop.n; j++)
			if (prev->count.el[j] == best)
				applist(&cur->drop, prev->drop.el[j]);
		D(fprintf(stderr, "now have %d items\n", cur->drop.n));
	}
	D(for (j = 0; j < cur->drop.n; j++)
		fprintf(stderr, ":%ld\n", cur->drop.el[j]));
	return &cur->drop;
}

static int hcomp(int n1, int n2)
{
	return hfreq[hh.el[n1]] <= hfreq[hh.el[n2]];
}

static void hexch(int n1, int n2)
{
	int t;

	t = hh.el[n1]; hh.el[n1] = hh.el[n2]; hh.el[n2] = t;
}

#if D1
static void dnote(int i, Taglist *mp, char *why)
{
	fprintf(stderr, "item %d %s tag %ld (#%d)\n",
		i, why, mp->drop.el[mp->drop.n-1], mp->count.el[mp->drop.n-1]);
}
#endif

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].