Plan 9 from Bell Labs’s /usr/web/sources/contrib/steve/root/sys/src/cmd/refer/fgrep.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


#include "stdio.h"
#include "ctype.h"
#include "assert.h"
#include "err.h"
#include "re.h"

#ifdef _tolower	/* thank you, system V */
#undef tolower
#define tolower _tolower
#endif

#ifndef D2
#define D2 0
#endif

void	*zalloc(unsigned int, unsigned int);

/*
 * fgrep -- print all lines containing any of a set of keywords
 *
 *	status returns:
 *		0 - ok, and some matches
 *		1 - ok, but no matches
 *		2 - some error
 */
#define	MAXSIZ 1000
#define QSIZE 400

typedef struct Trans Trans;
typedef struct State State;

struct State {
	State	*fail;
	Trans	*trans;
	int	out;
};

struct Trans {
	State	*nst;
	Trans	*link;
	int	inp;
};

typedef union {
	State	s;
	Trans	t;
} Heapobj;

struct re_ac {
	/*unsigned char *map;*/
	Heapobj *smax, *lim;
	Heapobj	space[MAXSIZ];
};

static	int	nfound;
static	int	isnew(State *);

int re_run(re_ac *fg, char *instr, int ccount, int need)
{
	unsigned char	*p;
	State *c, *s0;
	int ch;

	nfound = 0;
	p = (unsigned char *)instr;
	c = s0 = &fg->space[0].s;	/* start state */
#if D2
	fprintf(stderr, "in execute ccount %d\n", ccount);
#endif
	while (--ccount >= 0) {
		ch = *p;
		if (isupper(ch))
			ch = tolower(ch);
		for (;; c = c->fail) {
			Trans *t;

#if D2
			fprintf(stderr, "g(0x%x,'%c')", c, ch);
#endif
			for (t = c->trans; t != 0; t = t->link)
				if (t->inp == ch) {
					c = t->nst;
					goto Found;
				}
			/* failed */
			if (c == s0)
				break;
#if D2
			fprintf(stderr, " =\n");
#endif
		}
	Found:
#if D2
		fprintf(stderr, " = 0x%x [%d]\n", c, c->out);
#endif
		if (c->out && isnew(c)) {
			nfound++;
#if D2
			fprintf(stderr, " found: nfound %d need %d\n", nfound, need);
#endif
			if (nfound >= need)
				break;
			ccount++;
			continue; /* note: restarts machine on last char of last match */
		}
		p++;
	}
	return nfound;
}

#define	NFINAL 100
static State	*seen[NFINAL];

static int isnew(State *x)
{
	int i;

	for (i = 0; i < nfound; i++)
		if (seen[i] == x)
			return 0;
	if (i >= NFINAL)
		err("too many final states (fgrep)");
	seen[i] = x;
	return 1;
}

/*
 * Aho/Corasick keyword matching algorithm:
 *	CACM Vol. 18, No. 6, June 1975, pp 333-340
 */

/*
 * initialise a structure describing an empty machine
 */
re_ac *re_acinit(void)
{
	static re_ac *sfg;	/* static in refer's application, to save time */
	State *s0;

	if (sfg == 0) {
		sfg = (re_ac *) zalloc(1, sizeof(*sfg));
		sfg->lim = &sfg->space[MAXSIZ];
	}
	sfg->smax = sfg->space;
	s0 = &sfg->smax->s;
	s0->out = 0;
	s0->trans = 0;
	s0->fail = 0;
	return sfg;
}

static State *fgoto(State *s, int c)
{
	Trans *t;

	for (t = s->trans; t != 0; t = t->link)
		if (t->inp == c)
			return t->nst;	/* non-zero by construction */
	return 0;
}

/*
 * add the new keyword represented by the string ab..ae to machine fg
 *
 * this is algorithm 2 (construction of the goto function); p 336
 *	returns 0 if construction failed (no space)
 */
int re_acadd(re_ac *fg, char *ab, char *ae) 
{
	unsigned char *b = (unsigned char *)ab;
	unsigned char *e = (unsigned char *)ae;
	State *s, *ns;
	Trans *t;

	s = &fg->space[0].s;	/* start state */
	for (; b != e && (ns = fgoto(s, *b)) != 0; b++, s = ns)	/* while g(state,*b) != fail */
		;
	for (; b != e; b++) {	/* add new trans to new state for each char */
		t = &(++fg->smax)->t;
		ns = &(++fg->smax)->s;
		if (fg->smax >= fg->lim)
			return 0;
		t->nst = ns;
		t->inp = *b;
		t->link = s->trans;
		s->trans = t;
		ns->out = 0;
		ns->fail = 0;
		ns->trans = 0;
		s = ns;
	}
	s->out = 1;
	return 1;
}

#define	enq(w) { \
	*qw++ = (w); \
	if (qw >= &queue[QSIZE]) qw = queue; \
	if (qw == qr) return 0; \
}

/*
 * complete the machine
 *
 * this is algorithm 3 (construction of failure function), page 336
 *	returns 0 if construction failed (no space)
 */
int re_accomp(re_ac *fg) 
{
	State *queue[QSIZE];
	State **qr, **qw;
	State *state, *r, *ns, *s, *s0;
	Trans *t;
	int c;

	qr = qw = queue;
	s0 = &fg->space[0].s;	/* start state; state 0 */
	for (t = s0->trans; t != 0; t = t->link) {	/* queue states of depth 1 */
		enq(t->nst);
		t->nst->fail = s0;
	}
	/* calculate states at each depth d from those at d-1: */
	while (qr != qw) {
		r = *qr++;
		if (qr >= &queue[QSIZE])
			qr = queue;
		for (t = r->trans; t != 0; t = t->link) { /* each char in state r */
			s = t->nst;
			enq(s);
			state = r->fail;
			c = t->inp;
			while ((ns = fgoto(state,c)) == 0 && state != s0)
				state = state->fail;
			if ((s->fail = ns) == 0)
				s->fail = s0;
			s->out |= s->fail->out;
		}
	}
	return 1;
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].