#include <u.h>
#include <libc.h>
#include <bio.h>
#include "httpd.h"
#include "httpsrv.h"
enum{ URLmax = 65536, HINTmax = 20 };
#define RECIPLOG2 1.44269504089
char **urlname; /* array of url strings 1,...,nurl */
static int nurl;
static uint urltab[URLmax]; /* hashstr(url) 1,...,nurl */
static int urlnext[URLmax]; /* index urltab of next url in chain */
static int urlhash[URLmax]; /* initially 0, meaning empty buckets */
typedef struct Hint {
ushort url;
uchar prob;
} Hint;
Hint *hints[URLmax];
uchar nhint[URLmax];
vlong
Bfilelen(void *vb)
{
Biobuf *b;
vlong n;
b = vb;
n = Bseek(b, 0L, 2);
Bseek(b, 0L, 0);
return n;
}
static uint
hashstr(char* key)
{
/* asu works better than pjw for urls */
uchar *k = (unsigned char*)key;
uint h = 0;
while(*k!=0)
h = 65599*h + *k++;
return h;
}
static int
urllookup(uint url)
{
/* returns +index into urltab, else -hash */
int j, hash;
hash = 1 + url%(URLmax-1);
j = urlhash[hash];
for(;;){
if(j==0)
return -hash;
if(url==urltab[j])
return j;
j = urlnext[j];
}
}
int
Bage(Biobuf *b)
{
Dir *dir;
long mtime;
dir = dirfstat(Bfildes(b));
if(dir != nil)
mtime = dir->mtime;
else
mtime = 0;
free(dir);
return time(nil) - mtime;
}
void
urlinit(void)
{
static Biobuf *b = nil;
static vlong filelen = 0;
vlong newlen;
char *s, *arena;
int i, j, n;
uint url;
char *file;
if(filelen < 0)
return;
file = "/sys/log/httpd/url";
if(b == nil){
b = Bopen(file, OREAD); /* first time */
if(b == nil){
syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
filelen = -1;
return;
}
}
newlen = Bfilelen(b); /* side effect: rewinds b */
if(newlen == filelen || Bage(b)<300)
return;
filelen = newlen;
if(filelen < 0)
return;
if(nurl){ /* free existing tables */
free(urlname[0]); /* arena */
memset(urlhash,0,sizeof urlhash);
memset(urlnext,0,sizeof urlnext);
nurl = 0;
}
if(urlname==nil)
urlname = (char**)ezalloc(URLmax*sizeof(*urlname));
arena = (char*)ezalloc(filelen); /* enough for all the strcpy below */
i = 1;
while((s=Brdline(b,'\n'))!=0){
/* read lines of the form: 999 /url/path */
n = Blinelen(b) - 1;
if(n>2 && s[n]=='\n'){
s[n] = '\0';
}else{
sysfatal("missing fields or newline in url-db");
}
j = strtoul(s,&s,10);
while(*s==' ')
s++;
if(i++!=j)
sysfatal("url-db synchronization error");
url = hashstr(s);
j = urllookup(url);
if(j>=0)
sysfatal("duplicate url");
j = -j;
nurl++;
if(nurl>=URLmax){
syslog(0, HTTPLOG, "urlinit overflow at %s",s);
free(urlname[0]); /* arena */
memset(urlhash,0,sizeof urlhash);
memset(urlnext,0,sizeof urlnext);
nurl = 0;
return;
}
urltab[nurl] = url;
urlnext[nurl] = urlhash[j];
urlhash[j] = nurl;
strcpy(arena,s);
urlname[nurl] = arena;
arena += strlen(s)+1;
}
syslog(0, HTTPLOG, "prefetch-hints url=%d (%.1fMB)", nurl, 1.e-6*(URLmax*sizeof(*urlname)+filelen));
/* b is held open, because namespace will be chopped */
}
void
statsinit(void)
{
static Biobuf *b = nil;
static vlong filelen = 0;
vlong newlen;
int iq, n, i, nstats = 0;
uchar *s, buf[3+HINTmax*3]; /* iq, n, (url,prob)... */
Hint *arena, *h;
char *file;
static void *oldarena = nil;
file = "/sys/log/httpd/pathstat";
if(b == nil){
if(filelen == -1)
return; /* if failed first time */
b = Bopen(file, OREAD); /* first time */
if(b == nil){
syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
filelen = -1;
return;
}
}
newlen = Bfilelen(b); /* side effect: rewinds b */
if(newlen == filelen || Bage(b)<300)
return;
filelen = newlen;
if(oldarena){
free(oldarena);
memset(nhint,0,sizeof nhint);
}
arena = (Hint*)ezalloc((filelen/3)*sizeof(Hint));
oldarena = arena;
for(;;){
i = Bread(b,buf,3);
if(i<3)
break;
nstats++;
iq = buf[0];
iq = (iq<<8) | buf[1];
n = buf[2];
h = arena;
arena += n;
hints[iq] = h;
nhint[iq] = n;
if(Bread(b,buf,3*n)!=3*n)
sysfatal("stats read error");
for(i=0; i<n; i++){
s = &buf[3*i];
h[i].url = (s[0]<<8) | s[1];
h[i].prob = s[2];
}
}
syslog(0, HTTPLOG, "prefetch-hints stats=%d (%.1fMB)", nstats, 1.e-6*((filelen/3)*sizeof(Hint)));
}
void
urlcanon(char *url)
{
/* all the changes here can be implemented by rewriting in-place */
char *p, *q;
/* remove extraneous '/' in the middle and at the end */
p = url+1; /* first char needs no change */
q = p;
while(q[0]){
if(q[0]=='/' && q[-1]=='/'){
q++;
continue;
}
*p++ = *q++;
}
if(q[-1]=='/'){ /* trailing '/' */
p[-1] = '\0';
}else{
p[0] = '\0';
}
/* specific to the cm.bell-labs.com web site */
if(strncmp(url,"/cm/",4)==0){
if(strchr("cims",url[4]) && strncmp(url+5,"s/who/",6)==0)
/* strip off /cm/cs */
memmove(url,url+6,strlen(url+6)+1);
else if(strncmp(url+4,"ms/what/wavelet",15)==0)
/* /cm/ms/what */
memmove(url,url+11,strlen(url+11)+1);
}
}
void
hintprint(HConnect *hc, Hio *hout, char *uri, int thresh, int havej)
{
int i, j, pr, prefix, fd, siz, havei, newhint = 0, n;
char *query, *sf, etag[32], *wurl;
Dir *dir;
Hint *h, *haveh;
query = hstrdup(hc, uri);
urlcanon(query);
j = urllookup(hashstr(query));
if(j < 0)
return;
query = strrchr(uri,'/');
if(!query)
return; /* can't happen */
prefix = query-uri+1; /* = strlen(dirname)+1 */
h = hints[j];
for(i=0; i<nhint[j]; i++){
if(havej > 0 && havej < URLmax){ /* exclude hints client has */
haveh = hints[havej];
for(havei=0; havei<nhint[havej]; havei++)
if( haveh[havei].url == h[i].url)
goto continuei;
}
sf = urlname[h[i].url];
pr = h[i].prob;
if(pr<thresh)
break;
n = strlen(webroot) + strlen(sf) + 1;
wurl = halloc(hc, n);
strcpy(wurl, webroot);
strcat(wurl, sf);
fd = open(wurl, OREAD);
if(fd<0)
continue;
dir = dirfstat(fd);
if(dir == nil){
close(fd);
continue;
}
close(fd);
snprint(etag, sizeof(etag), "\"%lluxv%lux\"", dir->qid.path, dir->qid.vers);
siz = (int)( log((double)dir->length) * RECIPLOG2 + 0.9999);
free(dir);
if(strncmp(uri,sf,prefix)==0 && strchr(sf+prefix,'/')==0 && sf[prefix]!=0)
sf = sf+prefix;
hprint(hout, "Fresh: %d,%s,%d,%s\r\n", pr, etag, siz, sf);
newhint++;
continuei: ;
}
if(newhint)
hprint(hout, "Fresh: have/%d\r\n", j);
}
|