#include "stdio.h"
#include "ctype.h"
#include "assert.h"
#include "err.h"
#include "re.h"
#ifdef _tolower /* thank you, system V */
#undef tolower
#define tolower _tolower
#endif
#ifndef D2
#define D2 0
#endif
void *zalloc(unsigned int, unsigned int);
/*
* fgrep -- print all lines containing any of a set of keywords
*
* status returns:
* 0 - ok, and some matches
* 1 - ok, but no matches
* 2 - some error
*/
#define MAXSIZ 1000
#define QSIZE 400
typedef struct Trans Trans;
typedef struct State State;
struct State {
State *fail;
Trans *trans;
int out;
};
struct Trans {
State *nst;
Trans *link;
int inp;
};
typedef union {
State s;
Trans t;
} Heapobj;
struct re_ac {
/*unsigned char *map;*/
Heapobj *smax, *lim;
Heapobj space[MAXSIZ];
};
static int nfound;
static int isnew(State *);
int re_run(re_ac *fg, char *instr, int ccount, int need)
{
unsigned char *p;
State *c, *s0;
int ch;
nfound = 0;
p = (unsigned char *)instr;
c = s0 = &fg->space[0].s; /* start state */
#if D2
fprintf(stderr, "in execute ccount %d\n", ccount);
#endif
while (--ccount >= 0) {
ch = *p;
if (isupper(ch))
ch = tolower(ch);
for (;; c = c->fail) {
Trans *t;
#if D2
fprintf(stderr, "g(0x%x,'%c')", c, ch);
#endif
for (t = c->trans; t != 0; t = t->link)
if (t->inp == ch) {
c = t->nst;
goto Found;
}
/* failed */
if (c == s0)
break;
#if D2
fprintf(stderr, " =\n");
#endif
}
Found:
#if D2
fprintf(stderr, " = 0x%x [%d]\n", c, c->out);
#endif
if (c->out && isnew(c)) {
nfound++;
#if D2
fprintf(stderr, " found: nfound %d need %d\n", nfound, need);
#endif
if (nfound >= need)
break;
ccount++;
continue; /* note: restarts machine on last char of last match */
}
p++;
}
return nfound;
}
#define NFINAL 100
static State *seen[NFINAL];
static int isnew(State *x)
{
int i;
for (i = 0; i < nfound; i++)
if (seen[i] == x)
return 0;
if (i >= NFINAL)
err("too many final states (fgrep)");
seen[i] = x;
return 1;
}
/*
* Aho/Corasick keyword matching algorithm:
* CACM Vol. 18, No. 6, June 1975, pp 333-340
*/
/*
* initialise a structure describing an empty machine
*/
re_ac *re_acinit(void)
{
static re_ac *sfg; /* static in refer's application, to save time */
State *s0;
if (sfg == 0) {
sfg = (re_ac *) zalloc(1, sizeof(*sfg));
sfg->lim = &sfg->space[MAXSIZ];
}
sfg->smax = sfg->space;
s0 = &sfg->smax->s;
s0->out = 0;
s0->trans = 0;
s0->fail = 0;
return sfg;
}
static State *fgoto(State *s, int c)
{
Trans *t;
for (t = s->trans; t != 0; t = t->link)
if (t->inp == c)
return t->nst; /* non-zero by construction */
return 0;
}
/*
* add the new keyword represented by the string ab..ae to machine fg
*
* this is algorithm 2 (construction of the goto function); p 336
* returns 0 if construction failed (no space)
*/
int re_acadd(re_ac *fg, char *ab, char *ae)
{
unsigned char *b = (unsigned char *)ab;
unsigned char *e = (unsigned char *)ae;
State *s, *ns;
Trans *t;
s = &fg->space[0].s; /* start state */
for (; b != e && (ns = fgoto(s, *b)) != 0; b++, s = ns) /* while g(state,*b) != fail */
;
for (; b != e; b++) { /* add new trans to new state for each char */
t = &(++fg->smax)->t;
ns = &(++fg->smax)->s;
if (fg->smax >= fg->lim)
return 0;
t->nst = ns;
t->inp = *b;
t->link = s->trans;
s->trans = t;
ns->out = 0;
ns->fail = 0;
ns->trans = 0;
s = ns;
}
s->out = 1;
return 1;
}
#define enq(w) { \
*qw++ = (w); \
if (qw >= &queue[QSIZE]) qw = queue; \
if (qw == qr) return 0; \
}
/*
* complete the machine
*
* this is algorithm 3 (construction of failure function), page 336
* returns 0 if construction failed (no space)
*/
int re_accomp(re_ac *fg)
{
State *queue[QSIZE];
State **qr, **qw;
State *state, *r, *ns, *s, *s0;
Trans *t;
int c;
qr = qw = queue;
s0 = &fg->space[0].s; /* start state; state 0 */
for (t = s0->trans; t != 0; t = t->link) { /* queue states of depth 1 */
enq(t->nst);
t->nst->fail = s0;
}
/* calculate states at each depth d from those at d-1: */
while (qr != qw) {
r = *qr++;
if (qr >= &queue[QSIZE])
qr = queue;
for (t = r->trans; t != 0; t = t->link) { /* each char in state r */
s = t->nst;
enq(s);
state = r->fail;
c = t->inp;
while ((ns = fgoto(state,c)) == 0 && state != s0)
state = state->fail;
if ((s->fail = ns) == 0)
s->fail = s0;
s->out |= s->fail->out;
}
}
return 1;
}
|