Plan 9 from Bell Labs’s /usr/web/sources/extra/9hist/port/thwack.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


## diffname port/thwack.c 1999/1001
## diff -e /dev/null /n/emeliedump/1999/1001/sys/src/brazil/port/thwack.c
0a
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"

#include "thwack.h"

typedef struct Huff	Huff;
struct Huff
{
	short	bits;				/* length of the code */
	ulong	encode;				/* the code */
};

static	Huff	lentab[MaxLen] =
{
	{1,	0x0},		/* 0 */
	{2,	0x2},		/* 10 */
	{4,	0xc},		/* 1100 */
	{4,	0xd},		/* 1101 */
	{5,	0x1e},		/* 11110 */
	{6,	0x3e},		/* 111110 */
	{6,	0x3f},		/* 111111 */
	{4,	0xe},		/* 1110 */
};

static	void	bitput(Thwack *tw, int c, int n);
static	int	iomegaput(Thwack *tw, ulong v);

void
thwackinit(Thwack *tw)
{
	int i;

	memset(tw, 0, sizeof *tw);
	for(i = 0; i < EWinBlocks; i++){
		tw->blocks[i].data = tw->data[i];
		tw->blocks[i].edata = tw->blocks[i].data;
		tw->blocks[i].hash = tw->hash[i];
		tw->blocks[i].acked = 0;
	}
}

/*
 * acknowledgement for block seq & nearby preds
 */
void
thwackack(Thwack *tw, ulong seq, ulong mask)
{
	int slot, b;

	slot = tw->slot;
	for(;;){
		for(;;){
			slot--;
			if(slot < 0)
				slot += EWinBlocks;
			if(slot == tw->slot)
				return;
			if(tw->blocks[slot].seq == seq){
				tw->blocks[slot].acked = 1;
				break;
			}
		}

		if(mask == 0)
			break;
		do{
			b = mask & 1;
			seq--;
			mask >>= 1;
		}while(!b);
	}
}

/*
 * find a string in the dictionary
 */
static int
thwmatch(ThwBlock *b, ThwBlock *eblocks, uchar **ss, uchar *esrc, ulong h)
{
	int then, toff, w;
	uchar *s, *t;

	s = *ss;
	if(esrc < s + MinMatch)
		return 0;

	toff = 0;
	for(; b < eblocks; b++){
		then = b->hash[h];
		toff += b->maxoff;
		w = (ushort)(then - b->begin);

		if(w >= b->maxoff)
			continue;

		/*
		 * don't need to check for the end because
		 * 1) s too close check above
		 * 2) entries too close not added to hash tables
		 */
		t = w + b->data;
		if(s[0] != t[0] || s[1] != t[1] || s[2] != t[2])
			continue;
		if(esrc - s > b->edata - t)
			esrc = s + (b->edata - t);

		t += 3;
		for(s += 3; s < esrc; s++){
			if(*s != *t)
				break;
			t++;
		}
		*ss = s;
		return toff - w;
	}
	return 0;
}

/*
 * knuth vol. 3 multiplicative hashing
 * each byte x chosen according to rules
 * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
 * with reasonable spread between the bytes & their complements
 *
 * the 3 byte value appears to be as almost good as the 4 byte value,
 * and might be faster on some machines
 */
//#define hashit(c)	(((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog)) & HashMask)
#define hashit(c)	((((ulong)(c) * 0x6b43a9) >> (24 - HashLog)) & HashMask)

/*
 * lz77 compression with single lookup in a hash table for each block
 */
int
thwack(Thwack *tw, uchar *dst, uchar *src, int n, ulong seq)
{
	ThwBlock *eblocks, *b, blocks[CompBlocks];
	uchar *s, *ss, *sss, *esrc, *half;
	ulong cont, cseq, bseq, cmask;
	int now, toff;
	int h, m, slot, bits, totmatched;

	if(n > ThwMaxBlock || n < MinMatch || waserror())
		return -1;

	tw->dst = dst;
	tw->dmax = dst + n;
	tw->nbits = 0;

	/*
	 * add source to the coding window
	 * there is always enough space
	 */
	slot = tw->slot;
	b = &tw->blocks[slot];
	b->seq = seq;
	b->acked = 0;
	now = b->begin + b->maxoff;
	s = b->data;
	memmove(s, src, n);
	b->edata = s + n;
	b->begin = now;
	b->maxoff = n;

	/*
	 * set up the history blocks
	 */
	cseq = seq;
	cmask = 0;
	*blocks = *b;
	b = blocks;
	b->maxoff = 0;
	b++;
	while(b < blocks + CompBlocks){
		slot--;
		if(slot < 0)
			slot += EWinBlocks;
		if(slot == tw->slot)
			break;
		if(!tw->blocks[slot].acked)
			continue;
		bseq = tw->blocks[slot].seq;
		if(cseq == seq){
			if(seq - bseq >= MaxSeqStart)
				break;
			cseq = bseq;
		}else if(cseq - bseq > MaxSeqMask)
			break;
		else
			cmask |= 1 << (cseq - bseq - 1);
		*b = tw->blocks[slot];
		b++;
	}
	eblocks = b;
	bitput(tw, ((seq - cseq) << MaxSeqMask) | cmask, 16);

	cont = (s[0] << 16) | (s[2] << 8) | s[2];

	totmatched = 0;
	esrc = s + n;
	half = s + (n >> 1);
	while(s < esrc){
		h = hashit(cont);

		sss = s;
		toff = thwmatch(blocks, eblocks, &sss, esrc, h);
		ss = sss;

		m = ss - s;
		if(m < MinMatch){
			bitput(tw, 0x100|*s, 9);
			ss = s + 1;
		}else{
			totmatched += m;

			toff--;
			for(bits = OffBase; toff >= (1 << bits); bits++)
				;
			if(bits >= MaxOff+OffBase)
				error("thwack offset");
			bitput(tw, bits - OffBase, 4);
			if(bits != OffBase)
				bits--;
			bitput(tw, toff & ((1 << bits) - 1), bits);

			m -= MinMatch;
			if(m < MaxLen-1){
				bitput(tw, lentab[m].encode, lentab[m].bits);
			}else{
				bitput(tw, lentab[MaxLen-1].encode, lentab[MaxLen-1].bits);
				iomegaput(tw, m - (MaxLen - 2));
			}
		}
		blocks->maxoff += ss - s;

		/*
		 * speed hack
		 * check for compression progress, bail if none achieved
		 */
		if(s < half && ss >= half && totmatched * 10 < n)
			error("thwack likely expanding");

		for(; s != ss; s++){
			if(s + MinMatch <= esrc){
				h = hashit(cont);
				blocks->hash[h] = now;
				if(s + MinMatch < esrc)
					cont = (cont << 8) | s[MinMatch];
			}
			now++;
		}
	}

	if(tw->nbits)
		bitput(tw, 0, 8 - tw->nbits);
	tw->slot++;
	if(tw->slot >= EWinBlocks)
		tw->slot = 0;

	poperror();
	return tw->dst - dst;
}

static void
bitput(Thwack *tw, int c, int n)
{
	tw->bits = (tw->bits << n) | c;
	for(tw->nbits += n; tw->nbits >= 8; tw->nbits -= 8){
		if(tw->dst >= tw->dmax)
			error("thwack expanding");
		*tw->dst++ = tw->bits >> (tw->nbits - 8);
	}
}

/*
 * elias's omega code, modified
 * for at least 3 bit transmission
 */
static Huff omegatab[16] = 
{
	{0,	0},		/* 1 */
	{0,	0},
	{0,	0},
	{0,	0},
	{3,	0x4},		/* 5 */
	{3,	0x5},
	{3,	0x6},
	{3,	0x7},
	{7,	0x48},
	{7,	0x49},		/* 10 */
	{7,	0x4a},
	{7,	0x4b},
	{7,	0x4c},
	{7,	0x4d},
	{7,	0x4e},		/* 15 */
	{7,	0x4f},
};

static int
iomegaput(Thwack *tw, ulong v)
{
	int b, bb;

	v--;
	if(v < 4){
		bitput(tw, v, 3);
		return 3;
	}
	if(v < 16){
		b = omegatab[v].bits + 1;
		bitput(tw, omegatab[v].encode << 1, b);
		return b;
	}

	for(bb = 5; v >= (1 << bb); bb++)
		;
	if(bb >= 16)
		error("thwack omegaput");
	b = bb + omegatab[bb].bits + 1;
	bitput(tw, (omegatab[bb].encode << (bb + 1)) | (v << 1), b);
	return b;
}
.
## diffname port/thwack.c 1999/1007
## diff -e /n/emeliedump/1999/1001/sys/src/brazil/port/thwack.c /n/emeliedump/1999/1007/sys/src/brazil/port/thwack.c
276,324d
233,234c
				code = BigLenCode;
				bits = BigLenBits;
				use = BigLenBase;
				m -= MaxFastLen;
				while(m >= use){
					m -= use;
					code = (code + use) << 1;
					use <<= bits & 1;
					bits++;
				}
				bitput(tw, (code + m), bits);
.
230c
			if(m < MaxFastLen){
.
144c
	int h, m, slot, bits, use, totmatched;
.
142c
	ulong cont, cseq, bseq, cmask, code;
.
22,25c
	{5,	0x1c},		/* 11100 */
	{6,	0x3a},		/* 111010 */
	{6,	0x3b},		/* 111011 */
	{7,	0x78},		/* 1111000 */
	{7,	0x79},		/* 1111001 */
.
16c
static	Huff	lentab[MaxFastLen] =
.
2c
#include "lib.h"
.
## diffname port/thwack.c 1999/1022
## diff -e /n/emeliedump/1999/1007/sys/src/brazil/port/thwack.c /n/emeliedump/1999/1022/sys/src/brazil/port/thwack.c
277,285c
	return twdst - dst;
.
273,275c
	stats[StatOutBytes] += twdst - dst;
.
267,268c
	stats[StatBytes] += blocks->maxoff;
	stats[StatLits] += lits;
	stats[StatMatches] += matches;
	stats[StatLitBits] += (twdst - (dst + 2)) * 8 + twnbits - offbits - lenbits;
	stats[StatOffBits] += offbits;
	stats[StatLenBits] += lenbits;

	if(twnbits & 7){
		twbits <<= 8 - (twnbits & 7);
		twnbits += 8 - (twnbits & 7);
	}
	for(; twnbits >= 8; twnbits -= 8){
		if(twdst >= twdmax)
			return -1;
		*twdst++ = twbits >> (twnbits - 8);
	}

.
259c
				blocks->hash[(h ^ blocks->seq) & HashMask] = now;
.
255a
		toff--;
		for(bits = OffBase; toff >= (1 << bits); bits++)
			;
		if(bits >= MaxOff+OffBase)
			panic("thwack offset");
		twbits = (twbits << 4) | 0x8 | (bits - OffBase);
		if(bits != OffBase)
			bits--;
		twbits = (twbits << bits) | toff & ((1 << bits) - 1);
		twnbits += bits + 4;
		offbits += bits + 4;

		len -= MinMatch;
		if(len < MaxFastLen){
			bits = lentab[len].bits;
			twbits = (twbits << bits) | lentab[len].encode;
			twnbits += bits;
			lenbits += bits;
		}else{
			for(; twnbits >= 8; twnbits -= 8){
				if(twdst >= twdmax)
					return -1;
				*twdst++ = twbits >> (twnbits - 8);
			}
			code = BigLenCode;
			bits = BigLenBits;
			use = BigLenBase;
			len -= MaxFastLen;
			while(len >= use){
				len -= use;
				code = (code + use) << 1;
				use <<= bits & 1;
				bits++;
			}
			if(bits > MaxLenDecode + BigLenBits)
				panic("length too big");
			twbits = (twbits << bits) | (code + len);
			twnbits += bits;
			lenbits += bits;
		}

.
249,254c
		blocks->maxoff += len;
		matches++;
.
247d
245a
			now++;
			s++;
			continue;
.
230,244c
			if(s + MinMatch <= esrc){
				blocks->hash[(h ^ blocks->seq) & HashMask] = now;
				if(s + MinMatch < esrc)
					cont = (cont << 8) | s[MinMatch];
.
220,228c
			/*
			 * speed hack
			 * check for compression progress, bail if none achieved
			 */
			if(s > half){
				if(4 * blocks->maxoff < 5 * lits)
					return -1;
				half = esrc;
			}
.
213,218c
		len = ss - s;
		for(; twnbits >= 8; twnbits -= 8){
			if(twdst >= twdmax)
				return -1;
			*twdst++ = twbits >> (twnbits - 8);
		}
		if(len < MinMatch){
			toff = *s;
			lithist = (lithist << 1) | toff < 32 | toff > 127;
			if(lithist & 0x1e){
				twbits = (twbits << 9) | toff;
				twnbits += 9;
			}else if(lithist & 1){
				toff = (toff + 64) & 0xff;
				if(toff < 96){
					twbits = (twbits << 10) | toff;
					twnbits += 10;
				}else{
					twbits = (twbits << 11) | toff;
					twnbits += 11;
				}
			}else{
				twbits = (twbits << 8) | toff;
				twnbits += 8;
			}
			lits++;
			blocks->maxoff++;
.
205a
	twnbits = 0;
	twbits = 0;
	lits = 0;
	matches = 0;
	offbits = 0;
	lenbits = 0;
	lithist = ~0;
.
203d
201c
	cont = (s[0] << 16) | (s[1] << 8) | s[2];
.
199c
	*twdst++ = seq - cseq;
	*twdst++ = cmask;
.
150,152c
	twdst = dst;
	twdmax = dst + n;
.
147c
	if(n > ThwMaxBlock || n < MinMatch)
.
142,145c
	uchar *s, *ss, *sss, *esrc, *half, *twdst, *twdmax;
	ulong cont, cseq, bseq, cmask, code, twbits;
	int now, toff, lithist, h, len, slot, bits, use, twnbits, lits, matches, offbits, lenbits;
.
139c
thwack(Thwack *tw, uchar *dst, uchar *src, int n, ulong seq, ulong stats[ThwStats])
.
132,133c
/*
#define hashit(c)	(((ulong)(c) * 0x6b43a9) >> (24 - HashLog))
*/
#define hashit(c)	((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog))
.
108,109c
		ok = b->edata - t;
		if(esrc - s > ok)
			esrc = s + ok;
.
99a

.
93c
		then = b->hash[(h ^ b->seq) & HashMask];
.
84c
	int then, toff, w, ok;
.
29,31d
9a

enum
{
	StatBytes,
	StatOutBytes,
	StatLits,
	StatMatches,
	StatLitBits,
	StatOffBits,
	StatLenBits,

	StatProbe,
	StatProbeMiss,

	MaxStat
};

.
2c
#include "../port/lib.h"
.
## diffname port/thwack.c 1999/1111
## diff -e /n/emeliedump/1999/1022/sys/src/brazil/port/thwack.c /n/emeliedump/1999/1111/sys/src/9/port/thwack.c
358a
#endif
.
355a
#ifdef NOUNCOMP
.
327a
		/*
		 * offset in history
		 */
		toff--;
		for(bits = OffBase; toff >= (1 << bits); bits++)
			;
		if(bits < MaxOff+OffBase-1){
			twbits = (twbits << 3) | (bits - OffBase);
			if(bits != OffBase)
				bits--;
			twnbits += bits + 3;
			offbits += bits + 3;
		}else{
			twbits = (twbits << 4) | 0xe | (bits - (MaxOff+OffBase-1));
			bits--;
			twnbits += bits + 4;
			offbits += bits + 4;
		}
		twbits = (twbits << bits) | toff & ((1 << bits) - 1);

.
325a

			for(; twnbits >= 8; twnbits -= 8){
				if(twdst >= twdmax)
					return -1;
				*twdst++ = twbits >> (twnbits - 8);
			}
.
321,322d
318c
				use <<= (bits & 1) ^ 1;
.
306,310d
287,298c
		/*
		 * length of match
		 */
.
272a
#endif
.
263a
#ifdef NOUNCOMP
.
217a
#ifndef NOUNCOMP
	tw->slot++;
	if(tw->slot >= EWinBlocks)
		tw->slot = 0;
#endif

.
40,43c
	{5,	0x1d},		/* 11101 */
	{6,	0x3c},		/* 111100 */
	{7,	0x7a},		/* 1111010 */
	{7,	0x7b},		/* 1111011 */
	{8,	0xf8},		/* 11111000 */
	{8,	0xf9},		/* 11111001 */
.
37,38c
	{3,	0x6},		/* 110 */
.
35d
12a
	MaxFastLen	= 9,
	BigLenCode	= 0x1f4,	/* minimum code for large lenth encoding */
	BigLenBits	= 9,
	BigLenBase	= 4		/* starting items to encode for big lens */
};

enum
{
.
8a
/*
 * don't include compressed blocks
 */
#define NOUNCOMP

.
2c
#include "lib.h"
.
## diffname port/thwack.c 1999/1116
## diff -e /n/emeliedump/1999/1111/sys/src/9/port/thwack.c /n/emeliedump/1999/1116/sys/src/9/port/thwack.c
391d
387d
375a
	stats[StatDelay] = stats[StatDelay]*7/8 + dst[0];
	stats[StatHist] = stats[StatHist]*7/8 + nhist;
.
373d
293d
283d
231,236d
224a
		nhist += b->maxoff;
.
206a
	nhist = 0;
.
175c
	int now, toff, lithist, h, len, slot, bits, use, twnbits, lits, matches, offbits, lenbits, nhist;
.
34,35c
	StatDelay,
	StatHist,
.
30d
9,13d
## diffname port/thwack.c 2001/0609
## diff -e /n/emeliedump/1999/1116/sys/src/9/port/thwack.c /n/emeliedump/2001/0609/sys/src/9/port/thwack.c
379a
	stats[StatBytes] += blocks->maxoff;
	stats[StatLits] += lits;
	stats[StatMatches] += matches;
	stats[StatOffBits] += offbits;
	stats[StatLenBits] += lenbits;
	stats[StatDelay] = stats[StatDelay]*7/8 + dst[0];
	stats[StatHist] = stats[StatHist]*7/8 + nhist;
.
358,364d
253c
			lithist = (lithist << 1) | (toff < 32) | (toff > 127);
.
88a
		tw->blocks[slot].acked = 1;

.
77,87c
		slot--;
		if(slot < 0)
			slot += EWinBlocks;
		if(slot == tw->slot)
			break;
		if(tw->blocks[slot].seq != seq)
			continue;
.

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].