Plan 9 from Bell Labs’s /usr/web/sources/extra/9hist/ip/tcp.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


## diffname ip/tcp.c 1997/0327
## diff -e /dev/null /n/emeliedump/1997/0327/sys/src/brazil/ip/tcp.c
0a
#include	"u.h"
#include	"../port/lib.h"
#include	"mem.h"
#include	"dat.h"
#include	"fns.h"
#include	"../port/error.h"

#include	"ip.h"

enum
{
	QMAX		= 64*1024-1,
	IP_TCPPROTO	= 6,
	TCP_IPLEN	= 8,
	TCP_PHDRSIZE	= 12,
	TCP_HDRSIZE	= 20,
	TCP_PKT		= TCP_IPLEN+TCP_PHDRSIZE,
	TimerOFF	= 0,
	TimerON		= 1,
	TimerDONE	= 2,
	MAX_TIME 	= (1<<20),	/* Forever */
	TCP_ACK		= 50,		/* Timed ack sequence in ms */

	URG		= 0x20,		/* Data marked urgent */
	ACK		= 0x10,		/* Acknowledge is valid */
	PSH		= 0x08,		/* Whole data pipe is pushed */
	RST		= 0x04,		/* Reset connection */
	SYN		= 0x02,		/* Pkt. is synchronise */
	FIN		= 0x01,		/* Start close down */

	EOLOPT		= 0,
	NOOPOPT		= 1,
	MAXBACKOFF	= 20,
	MSSOPT		= 2,
	MSS_LENGTH	= 4,		/* Mean segment size */
	MSL2		= 10,
	MSPTICK		= 50,		/* Milliseconds per timer tick */
	DEF_MSS		= 1024,		/* Default mean segment */
	DEF_RTT		= 150,		/* Default round trip */
	TCP_LISTEN	= 0,		/* Listen connection */
	TCP_CONNECT	= 1,		/* Outgoing connection */

	FORCE		= 1,
	CLONE		= 2,
	RETRAN		= 4,
	ACTIVE		= 8,
	SYNACK		= 16,
	ACKED		= 32,

	LOGAGAIN	= 3,
	LOGDGAIN	= 2,
	Closed		= 0,		/* Connection states */
	Listen,
	Syn_sent,
	Syn_received,
	Established,
	Finwait1,
	Finwait2,
	Close_wait,
	Closing,
	Last_ack,
	Time_wait
};

/* Must correspond to the enumeration above */
char *tcpstates[] =
{
	"Closed", 	"Listen", 	"Syn_sent", "Syn_received",
	"Established", 	"Finwait1",	"Finwait2", "Close_wait",
	"Closing", 	"Last_ack", 	"Time_wait"
};

typedef struct Timer Timer;
struct Timer
{
	Timer	*next;
	Timer	*prev;
	int	state;
	int	start;
	int	count;
	void	(*func)(void*);
	void	*arg;
};

typedef struct Tcphdr Tcphdr;
struct Tcphdr
{
	byte	vihl;		/* Version and header length */
	byte	tos;		/* Type of service */
	byte	length[2];	/* packet length */
	byte	id[2];		/* Identification */
	byte	frag[2];	/* Fragment information */
	byte	Unused;
	byte	proto;
	byte	tcplen[2];
	byte	tcpsrc[4];
	byte	tcpdst[4];
	byte	tcpsport[2];
	byte	tcpdport[2];
	byte	tcpseq[4];
	byte	tcpack[4];
	byte	tcpflag[2];
	byte	tcpwin[2];
	byte	tcpcksum[2];
	byte	tcpurg[2];
	/* Options segment */
	byte	tcpopt[2];
	byte	tcpmss[2];
};

typedef struct Tcp Tcp;
struct	Tcp
{
	ushort	source;
	ushort	dest;
	ulong	seq;
	ulong	ack;
	byte	flags;
	ushort	wnd;
	ushort	urg;
	ushort	mss;
};

typedef struct Reseq Reseq;
struct Reseq
{
	Reseq 	*next;
	Tcp	seg;
	Block	*bp;
	ushort	length;
};

typedef struct Tcpctl Tcpctl;
struct Tcpctl
{
	QLock;
	byte	state;			/* Connection state */
	byte	type;			/* Listening or active connection */
	byte	code;			/* Icmp code */
	struct {
		ulong	una;		/* Unacked data pointer */
		ulong	nxt;		/* Next sequence expected */
		ulong	ptr;		/* Data pointer */
		ushort	wnd;		/* Tcp send window */
		ulong	urg;		/* Urgent data pointer */
		ulong	wl1;
		ulong	wl2;
	} snd;
	struct {
		ulong	nxt;		/* Receive pointer to next byte slot */
		ushort	wnd;		/* Receive window incoming */
		ulong	urg;		/* Urgent pointer */
		int	blocked;
	} rcv;
	ulong	iss;			/* Initial sequence number */
	ushort	cwind;			/* Congestion window */
	ushort	ssthresh;		/* Slow start threshold */
	int	resent;			/* Bytes just resent */
	int	irs;			/* Initial received squence */
	ushort	mss;			/* Mean segment size */
	int	rerecv;			/* Overlap of data rerecevived */
	ushort	window;			/* Recevive window */
	int	max_snd;		/* Max send */
	ulong	last_ack;		/* Last acknowledege received */
	byte	backoff;		/* Exponential backoff counter */
	byte	flags;			/* State flags */
	ulong	sndcnt;			/* Amount of data in send queue */
	Reseq	*reseq;			/* Resequencing queue */
	Timer	timer;			/* Activity timer */
	Timer	acktimer;		/* Acknowledge timer */
	Timer	rtt_timer;		/* Round trip timer */
	ulong	rttseq;			/* Round trip sequence */
	int	srtt;			/* Shortened round trip */
	int	mdev;			/* Mean deviation of round trip */
	int	kacounter;		/* count down for keep alive */
	int	f2counter;		/* count down for finwait2 state */
	uint	sndsyntime;		/* time syn sent */
	char	ascstate[128];
};

#define DBG(x)	if((logmask & Logtcpmsg) && (iponly == 0 || x == iponly))netlog

Proto	tcp;
int	tcp_irtt = DEF_RTT;	/* Initial guess at round trip time */
ushort	tcp_mss  = DEF_MSS;	/* Maximum segment size to be sent */
Timer 	*timers;		/* List of active timers */
QLock 	tl;			/* Protect timer list */

void	addreseq(Tcpctl*, Tcp*, Block*, ushort);
void	getreseq(Tcpctl*, Tcp*, Block**, ushort*);
void	localclose(Conv*, char*);
void	procsyn(Conv*, Tcp*);
void	tcpiput(Block*);
void	tcpoutput(Conv*);
int	tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
void	tcpstart(Conv*, int, ushort);
void	tcptimeout(void*);
void	tcpsndsyn(Tcpctl*);
void	tcprcvwin(Conv*);
void	tcpacktimer(Conv*);

void
tcpsetstate(Conv *s, byte newstate)
{
	Tcpctl *tcb;
	byte oldstate;

	tcb = (Tcpctl*)s->ptcl;

	oldstate = tcb->state;
	if(oldstate == newstate)
		return;

	/*
	  print("%d/%d %s->%s\n", s->lport, s->rport,
			tcpstates[oldstate], tcpstates[newstate]);
	/**/

	tcb->state = newstate;

	switch(newstate) {
	case Closed:
		qclose(s->rq);
		qclose(s->wq);
		qclose(s->eq);
		s->lport = 0;		/* This connection is toast */
		s->rport = 0;
		s->raddr = 0;

	case Close_wait:		/* Remote closes */
		qhangup(s->rq, nil);
		break;
	}

	if(oldstate == Syn_sent)
		Fsconnected(&fs, s, nil);
}

static char*
tcpconnect(Conv *c, char **argv, int argc)
{
	char *rv;

	rv = Fsstdconnect(c, argv, argc);
	if(rv)
		return rv;
	tcpstart(c, TCP_CONNECT, QMAX);
	return rv;
}

int
tcpstate(char **msg, Conv *c)
{
	Tcpctl *s;
	int isclose;

	s = (Tcpctl*)(c->ptcl);

	isclose = 0;
	if(s->state == Closed)
		isclose = 1;

/*	snprint(s->ascstate, sizeof(s->ascstate),
		"tcnt %d tnxt %lux tuna %lux twnd %d rack %lux rnxt %lux rwnd %d",
		s->sndcnt, s->snd.nxt, s->snd.una, s->snd.wnd,
		s->last_ack, s->rcv.nxt, s->rcv.wnd);*/
	snprint(s->ascstate, sizeof(s->ascstate),
		"%s srtt %d mdev %d timer.start %d timer.count %d\n",
		tcpstates[s->state], s->srtt, s->mdev,
		s->timer.start, s->timer.count);

	*msg = s->ascstate;
	return isclose;
}

static void
tcpannounce(Conv *c)
{
	tcpstart(c, TCP_LISTEN, QMAX);
	Fsconnected(&fs, c, nil);
}

static void
tcpclose(Conv *c)
{
	Tcpctl *tcb;

	tcb = (Tcpctl*)c->ptcl;

	qhangup(c->rq, nil);
	qhangup(c->wq, nil);
	qhangup(c->eq, nil);

	unlock(c);

	switch(tcb->state) {
	case Listen:
		/*
		 *  reset any incoming calls to this listener
		 */
		Fsconnected(&fs, c, "Hangup");

		qlock(tcb);
		localclose(c, nil);
		break;
	case Closed:
	case Syn_sent:
		qlock(tcb);
		localclose(c, nil);
		break;
	case Syn_received:
	case Established:
		qlock(tcb);
		tcb->sndcnt++;
		tcb->snd.nxt++;
		tcpsetstate(c, Finwait1);
		tcpoutput(c);
		break;
	case Close_wait:
		qlock(tcb);
		tcb->sndcnt++;
		tcb->snd.nxt++;
		tcpsetstate(c, Last_ack);
		tcpoutput(c);
		break;
	}
	qunlock(tcb);
}

void
tcpkick(Conv *s, int len)
{
	Tcpctl *tcb;

	tcb = (Tcpctl*)s->ptcl;


	switch(tcb->state) {
	case Listen:
		tcb->flags |= ACTIVE;
		tcpsndsyn(tcb);
		tcpsetstate(s, Syn_sent);
		/* No break */
	case Syn_sent:
	case Syn_received:
	case Established:
	case Close_wait:
		/*
		 * Push data
		 */
		qlock(tcb);
		tcb->sndcnt += len;
		tcprcvwin(s);
		tcpoutput(s);
		qunlock(tcb);
		break;
	default:
		localclose(s, "Hangup");
	}
}

/*
 *  get remote sender going if it was flow controlled due to a closed window
 */
static void
deltimer(Timer *t)
{
	if(timers == t)
		timers = t->next;
	if(t->next)
		t->next->prev = t->prev;
	if(t->prev)
		t->prev->next = t->next;
}

void
tcprcvwin(Conv *s)				/* Call with tcb locked */
{
	int w;
	Tcpctl *tcb;

	tcb = (Tcpctl*)s->ptcl;
	w = QMAX - qlen(s->rq);
	if(w < 0)
		w = 0;
	tcb->rcv.wnd = w;
	if(w == 0)
		tcb->rcv.blocked = 1;
}

void
tcpacktimer(Conv *s)
{
	Tcpctl *tcb;

	tcb = (Tcpctl*)s->ptcl;

	qlock(tcb);
	tcb->flags |= FORCE;
	tcprcvwin(s);
	tcpoutput(s);
	qunlock(tcb);
}

static void
tcpcreate(Conv *c)
{
	c->rq = qopen(QMAX, 0, tcpacktimer, c);
	c->wq = qopen(QMAX, 0, 0, 0);
}

void
tcpackproc(void*)
{
	Timer *t, *tp, *timeo;
	static Rendez tcpr;

	for(;;) {
		tsleep(&tcpr, return0, 0, MSPTICK);

		qlock(&tl);
		timeo = nil;
		for(t = timers; t != nil; t = tp) {
			tp = t->next;
 			if(t->state == TimerON) {
				t->count--;
				if(t->count == 0) {
					deltimer(t);
					t->state = TimerDONE;
					t->next = timeo;
					timeo = t;
				}
			}
		}
		qunlock(&tl);

		for(;;) {
			t = timeo;
			if(t == nil)
				break;

			timeo = t->next;
			if(t->state == TimerDONE && t->func != nil)
				(*t->func)(t->arg);
		}
	}
}

void
tcpgo(Timer *t)
{
	if(t == nil || t->start == 0)
		return;

	qlock(&tl);
	t->count = t->start;
	if(t->state != TimerON) {
		t->state = TimerON;
		t->prev = nil;
		t->next = timers;
		if(t->next)
			t->next->prev = t;
		timers = t;
	}
	qunlock(&tl);
}

void
tcphalt(Timer *t)
{
	if(t == nil)
		return;

	qlock(&tl);
	if(t->state == TimerON)
		deltimer(t);
	t->state = TimerOFF;
	qunlock(&tl);
}

int
backoff(int n)
{
	if(n < 5)
		return 1 << n;

	return 64;
}

void
localclose(Conv *s, char *reason)	 /*  called with tcb locked */
{
	Tcpctl *tcb;
	Reseq *rp,*rp1;

	tcb = (Tcpctl*)s->ptcl;

	tcphalt(&tcb->timer);
	tcphalt(&tcb->rtt_timer);

	/* Flush reassembly queue; nothing more can arrive */
	for(rp = tcb->reseq; rp != nil; rp = rp1) {
		rp1 = rp->next;
		freeblist(rp->bp);
		free(rp);
	}

	if(tcb->state == Syn_sent)
		Fsconnected(&fs, s, reason);

	qhangup(s->rq, reason);
	qhangup(s->wq, reason);

	tcb->reseq = nil;
	tcpsetstate(s, Closed);
}

void
inittcpctl(Conv *s)
{
	Tcpctl *tcb;

	tcb = (Tcpctl*)s->ptcl;

	memset(tcb, 0, sizeof(Tcpctl));

	tcb->cwind = tcp_mss;
	tcb->mss = tcp_mss;
	tcb->ssthresh = 65535;
	tcb->srtt = 0;

	tcb->timer.start = tcp_irtt / MSPTICK;
	tcb->timer.func = tcptimeout;
	tcb->timer.arg = s;
	tcb->rtt_timer.start = MAX_TIME;
	tcb->acktimer.start = TCP_ACK / MSPTICK;
	tcb->acktimer.func = tcpacktimer;
	tcb->acktimer.arg = s;
}

/* mtu (- TCP + IP hdr len) of 1st hop */
int
tcpmtu(Conv *s)
{
	Media *m;
	byte dst[4], dummy[4];
	int mtu;

	mtu = 0;
	hnputl(dst, s->raddr);
	m = Mediaroute(dst, dummy);
	if(m != nil)
		mtu = m->maxmtu - m->hsize - (TCP_PKT + TCP_HDRSIZE);
	if(mtu < 4)
		mtu = DEF_MSS;
	return mtu;
}

void
tcpstart(Conv *s, int mode, ushort window)
{
	Tcpctl *tcb;

	tcb = (Tcpctl*)s->ptcl;

	inittcpctl(s);
	tcb->window = window;
	tcb->rcv.wnd = window;

	switch(mode) {
	case TCP_LISTEN:
		tcb->flags |= CLONE;
		tcpsetstate(s, Listen);
		break;

	case TCP_CONNECT:
		/* Send SYN, go into SYN_SENT state */
		qlock(tcb);
		tcb->flags |= ACTIVE;
		tcpsndsyn(tcb);
		tcpsetstate(s, Syn_sent);
		tcpoutput(s);
		qunlock(tcb);
		break;
	}
}

static char*
tcpflag(ushort flag)
{
	static char buf[128];

	sprint(buf, "%d", flag>>10);	/* Head len */
	if(flag & URG)
		strcat(buf, " URG");
	if(flag & ACK)
		strcat(buf, " ACK");
	if(flag & PSH)
		strcat(buf, " PSH");
	if(flag & RST)
		strcat(buf, " RST");
	if(flag & SYN)
		strcat(buf, " SYN");
	if(flag & FIN)
		strcat(buf, " FIN");

	return buf;
}

Block *
htontcp(Tcp *tcph, Block *data, Tcphdr *ph)
{
	int dlen;
	Tcphdr *h;
	ushort csum;
	ushort hdrlen;

	hdrlen = TCP_HDRSIZE;
	if(tcph->mss)
		hdrlen += MSS_LENGTH;

	if(data) {
		dlen = blocklen(data);
		data = padblock(data, hdrlen + TCP_PKT);
		if(data == nil)
			return nil;
	}
	else {
		dlen = 0;
		data = allocb(hdrlen + TCP_PKT);
		if(data == nil)
			return nil;
		data->wp += hdrlen + TCP_PKT;
	}


	memmove(data->rp, ph, TCP_PKT);

	h = (Tcphdr *)(data->rp);
	h->proto = IP_TCPPROTO;
	h->frag[0] = 0;
	h->frag[1] = 0;
	hnputs(h->tcplen, hdrlen + dlen);
	hnputs(h->tcpsport, tcph->source);
	hnputs(h->tcpdport, tcph->dest);
	hnputl(h->tcpseq, tcph->seq);
	hnputl(h->tcpack, tcph->ack);
	hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
	hnputs(h->tcpwin, tcph->wnd);
	h->tcpcksum[0] = 0;
	h->tcpcksum[1] = 0;
	h->Unused = 0;
	hnputs(h->tcpurg, tcph->urg);

	if(tcph->mss != 0){
		h->tcpopt[0] = MSSOPT;
		h->tcpopt[1] = MSS_LENGTH;
		hnputs(h->tcpmss, tcph->mss);
	}
	csum = ptclcsum(data, TCP_IPLEN, hdrlen+dlen+TCP_PHDRSIZE);
	hnputs(h->tcpcksum, csum);

	DBG(nhgetl(h->tcpdst))(Logtcpmsg, "%d > %d s %l8.8ux a %8.8lux %s w %.4ux l %d\n",
		tcph->source, tcph->dest,
		tcph->seq, tcph->ack, tcpflag((hdrlen<<10)|tcph->flags),
		tcph->wnd, dlen);

	return data;
}

int
ntohtcp(Tcp *tcph, Block **bpp)
{
	Tcphdr *h;
	byte *optr;
	ushort hdrlen;
	ushort i, optlen;

	*bpp = pullupblock(*bpp, TCP_PKT+TCP_HDRSIZE);
	if(*bpp == nil)
		return -1;

	h = (Tcphdr *)((*bpp)->rp);
	tcph->source = nhgets(h->tcpsport);
	tcph->dest = nhgets(h->tcpdport);
	tcph->seq = nhgetl(h->tcpseq);
	tcph->ack = nhgetl(h->tcpack);

	hdrlen = (h->tcpflag[0] & 0xf0)>>2;
	if(hdrlen < TCP_HDRSIZE) {
		freeblist(*bpp);
		return -1;
	}

	tcph->flags = h->tcpflag[1];
	tcph->wnd = nhgets(h->tcpwin);
	tcph->urg = nhgets(h->tcpurg);
	tcph->mss = 0;

	*bpp = pullupblock(*bpp, hdrlen+TCP_PKT);
	if(*bpp == nil)
		return -1;

	DBG(nhgetl(h->tcpsrc))(Logtcpmsg, "%d > %d s %l8.8ux a %8.8lux %s w %.4ux l %d\n",
		tcph->source, tcph->dest,
		tcph->seq, tcph->ack, tcpflag((hdrlen<<10)|tcph->flags),
		tcph->wnd, nhgets(h->length)-hdrlen-TCP_PKT);

	optr = h->tcpopt;
	for(i = TCP_HDRSIZE; i < hdrlen;) {
		switch(*optr++) {
		case EOLOPT:
			return hdrlen;
		case NOOPOPT:
			i++;
			break;
		case MSSOPT:
			optlen = *optr++;
			if(optlen == MSS_LENGTH)
				tcph->mss = nhgets(optr);
			i += optlen;
			break;
		}
	}
	return hdrlen;
}

/* Generate an initial sequence number and put a SYN on the send queue */
void
tcpsndsyn(Tcpctl *tcb)
{
	tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
	tcb->rttseq = tcb->iss;
	tcb->snd.wl2 = tcb->iss;
	tcb->snd.una = tcb->iss;
	tcb->snd.ptr = tcb->rttseq;
	tcb->snd.nxt = tcb->rttseq;
	tcb->sndcnt++;
	tcb->flags |= FORCE;
	tcb->sndsyntime = msec;
}

void
sndrst(Ipaddr source, Ipaddr dest, ushort length, Tcp *seg)
{
	ushort tmp;
	Tcphdr ph;
	Block *hbp;
	byte rflags;

	if(seg->flags & RST)
		return;


	hnputl(ph.tcpsrc, dest);
	hnputl(ph.tcpdst, source);
	ph.proto = IP_TCPPROTO;
	hnputs(ph.tcplen, TCP_HDRSIZE);

	/* Swap port numbers */
	tmp = seg->dest;
	seg->dest = seg->source;
	seg->source = tmp;

	rflags = RST;

	/* convince the other end that this reset is in band */
	if(seg->flags & ACK) {
		seg->seq = seg->ack;
		seg->ack = 0;
	}
	else {
		rflags |= ACK;
		seg->ack = seg->seq;
		seg->seq = 0;
		if(seg->flags & SYN)
			seg->ack++;
		seg->ack += length;
		if(seg->flags & FIN)
			seg->ack++;
	}
	seg->flags = rflags;
	seg->wnd = 0;
	seg->urg = 0;
	seg->mss = 0;
	hbp = htontcp(seg, nil, &ph);
	if(hbp == nil)
		return;

	ipoput(hbp, 0, MAXTTL);
}

/*
 *  flush an incoming call; send a reset to the remote side and close the
 *  conversation
 */
void
tcpflushincoming(Conv *s)
{
	Tcp seg;
	byte dst[4];
	Tcpctl *tcb;

	tcb = (Tcpctl*)s->ptcl;

	seg.source = s->rport;
	seg.dest = s->lport;
	seg.flags = ACK;
	seg.seq = tcb->snd.ptr;
	tcb->last_ack = tcb->rcv.nxt;
	seg.ack = tcb->rcv.nxt;

	if(s->laddr == 0) {
		hnputl(dst, s->raddr);
		s->laddr = Mediagetsrc(dst);
	}
	sndrst(s->raddr, s->laddr, 0, &seg);
	localclose(s, nil);
}

/*
 *  send a reset to the remote side and close the conversation
 */
char*
tcphangup(Conv *s)
{
	Tcp seg;
	byte dst[4];
	Tcpctl *tcb;
	Tcphdr ph;
	Block *hbp;

	tcb = (Tcpctl*)s->ptcl;
	if(waserror()){
		qunlock(tcb);
		return up->error;
	}
	qlock(tcb);
	if(s->raddr != 0) {
		seg.source = s->lport;
		seg.dest = s->rport;
		seg.flags = RST | ACK;
		seg.ack = tcb->rcv.nxt;
		seg.seq = tcb->snd.ptr;
		seg.wnd = 0;
		seg.urg = 0;
		seg.mss = 0;
		tcb->last_ack = tcb->rcv.nxt;
		if(s->laddr == 0) {
			hnputl(dst, s->raddr);
			s->laddr = Mediagetsrc(dst);
		}
		hnputl(ph.tcpsrc, s->laddr);
		hnputl(ph.tcpdst, s->raddr);
		ph.proto = IP_TCPPROTO;
		hnputs(ph.tcplen, TCP_HDRSIZE);
		hbp = htontcp(&seg, nil, &ph);
		ipoput(hbp, 0, s->ttl);
	}
	localclose(s, nil);
	poperror();
	qunlock(tcb);
	return nil;
}

Conv*
tcpincoming(Conv *s, Tcp *segp, Ipaddr src, Ipaddr dst)
{
	Conv *new;
	Tcpctl *tcb;

	new = Fsnewcall(&fs, s, src, segp->source, dst, segp->dest);
	if(new == nil)
		return nil;

	memmove(new->ptcl, s->ptcl, sizeof(Tcpctl));
	tcb = (Tcpctl*)new->ptcl;
	tcb->flags &= ~CLONE;
	tcb->timer.arg = new;
	tcb->timer.state = TimerOFF;
	tcb->acktimer.arg = new;
	tcb->acktimer.state = TimerOFF;

	return new;
}

int
seq_within(ulong x, ulong low, ulong high)
{
	if(low <= high){
		if(low <= x && x <= high)
			return 1;
	}
	else {
		if(low >= x && x >= high)
			return 1;
	}
	return 0;
}

int
seq_lt(ulong x, ulong y)
{
	return x < y;
}

int
seq_le(ulong x, ulong y)
{
	return x <= y;
}

int
seq_gt(ulong x, ulong y)
{
	return x > y;
}

int
seq_ge(ulong x, ulong y)
{
	return x >= y;
}

/*
 *  use the time between the first SYN and it's ack as the
 *  initial round trip time
 */
void
tcpsynackrtt(Conv *s)
{
	Tcpctl *tcb;
	int delta;

	tcb = (Tcpctl*)s->ptcl;

	delta = msec - tcb->sndsyntime;
	tcb->srtt = delta<<LOGAGAIN;
	tcb->mdev = delta<<LOGDGAIN;

	/* halt round trip timer */
	tcphalt(&tcb->rtt_timer);
}

void
update(Conv *s, Tcp *seg)
{
	int rtt, delta;
	Tcpctl *tcb;
	ushort acked, expand;

	tcb = (Tcpctl*)s->ptcl;

	tcb->kacounter = MAXBACKOFF;	/* keep alive count down */

	if(seq_gt(seg->ack, tcb->snd.nxt)) {
		tcb->flags |= FORCE;
		return;
	}

	if(seq_ge(seg->ack,tcb->snd.wl2))
	if(seq_gt(seg->seq,tcb->snd.wl1) || (seg->seq == tcb->snd.wl1)) {
		if(seg->wnd != 0 && tcb->snd.wnd == 0)
			tcb->snd.ptr = tcb->snd.una;

		tcb->snd.wnd = seg->wnd;
		tcb->snd.wl1 = seg->seq;
		tcb->snd.wl2 = seg->ack;
	}

	if(!seq_gt(seg->ack, tcb->snd.una))
		return;

	/* something new was acked in this packet */
	tcb->flags |= ACKED;

	/* Compute the new send window size */
	acked = seg->ack - tcb->snd.una;
	if(tcb->cwind < tcb->snd.wnd) {
		if(tcb->cwind < tcb->ssthresh) {
			expand = tcb->mss;
			if(acked < expand)
				expand = acked;
		}
		else
			expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;

		if(tcb->cwind + expand < tcb->cwind)
			expand = 65535 - tcb->cwind;
		if(tcb->cwind + expand > tcb->snd.wnd)
			expand = tcb->snd.wnd - tcb->cwind;
		if(expand != 0)
			tcb->cwind += expand;
	}

	/* Adjust the timers according to the round trip time */
	if(tcb->rtt_timer.state == TimerON && seq_ge(seg->ack, tcb->rttseq)) {
		tcphalt(&tcb->rtt_timer);
		if((tcb->flags&RETRAN) == 0) {
			tcb->backoff = 0;
			rtt = tcb->rtt_timer.start - tcb->rtt_timer.count;
			if(rtt == 0)
				rtt = 1;	/* otherwise all close systems will rexmit in 0 time */
			rtt *= MSPTICK;
			if (tcb->srtt == 0) {
				tcb->srtt = rtt << LOGAGAIN;
				tcb->mdev = rtt << LOGDGAIN;
			} else {
				delta = rtt - (tcb->srtt>>LOGAGAIN);
				tcb->srtt += delta;
				if(tcb->srtt <= 0)
					tcb->srtt = 1;

				delta = abs(delta) - (tcb->mdev>>LOGDGAIN);
				tcb->mdev += delta;
				if(tcb->mdev <= 0)
					tcb->mdev = 1;
			}
		}
	}

	if((tcb->flags & SYNACK) == 0) {
		tcb->flags |= SYNACK;
		acked--;
		tcb->sndcnt--;
	}

	qdiscard(s->wq, acked);

	tcb->sndcnt -= acked;
	tcb->snd.una = seg->ack;
	if(seq_gt(seg->ack, tcb->snd.urg))
		tcb->snd.urg = seg->ack;

	tcphalt(&tcb->timer);
	if(tcb->snd.una != tcb->snd.nxt)
		tcpgo(&tcb->timer);

	if(seq_lt(tcb->snd.ptr, tcb->snd.una))
		tcb->snd.ptr = tcb->snd.una;

	tcb->flags &= ~RETRAN;
	tcb->backoff = 0;
}

void
tcpiput(Block *bp)
{
	Tcp seg;
	Tcphdr *h;
	int hdrlen;
	Tcpctl *tcb;
	ushort length;
	Ipaddr source, dest;
	Conv *spec, *gen, *s, **p;

	h = (Tcphdr*)(bp->rp);

	dest = nhgetl(h->tcpdst);
	source = nhgetl(h->tcpsrc);
	length = nhgets(h->length);

	h->Unused = 0;
	hnputs(h->tcplen, length-TCP_PKT);
	if(ptclcsum(bp, TCP_IPLEN, length-TCP_IPLEN)) {
		tcp.csumerr++;
		netlog(Logtcp, "bad tcp proto cksum\n");
		freeblist(bp);
		return;
	}

	hdrlen = ntohtcp(&seg, &bp);
	if(hdrlen < 0){
		tcp.hlenerr++;
		netlog(Logtcp, "bad tcp hdr len\n");
		return;
	}

	/* trim the packet to the size claimed by the datagram */
	length -= hdrlen+TCP_PKT;
	bp = trimblock(bp, hdrlen+TCP_PKT, length);
	if(bp == nil){
		tcp.lenerr++;
		netlog(Logtcp, "tcp len < 0 after trim\n");
		return;
	}


	/* Look for a connection. failing that look for a listener. */
	for(p = tcp.conv; *p; p++) {
		s = *p;
		if(s->rport == seg.source &&
		   s->lport == seg.dest && s->raddr == source)
			break;
	}
	s = *p;
	if(s){
		/* can't send packets to a listener */
		tcb = (Tcpctl*)s->ptcl;
		if(tcb->state == Listen){
			freeblist(bp);
			return;
		}
	}
	if(s == nil && (seg.flags & SYN)) {
		/*
		 *  dump packets with bogus flags
		 */
		if(seg.flags & RST){
			freeblist(bp);
			return;
		}
		if(seg.flags & ACK) {
			sndrst(source, dest, length, &seg);
			freeblist(bp);
			return;
		}

		/*
		 *  find a listener specific to this port (spec) or,
		 *  failing that, a general one (gen)
		 */
		gen = nil;
		spec = nil;
		for(p = tcp.conv; *p; p++) {
			s = *p;
			tcb = (Tcpctl*)s->ptcl;
			if((tcb->flags & CLONE) == 0)
				continue;
			if(tcb->state != Listen)
				continue;
			if(s->rport == 0 && s->raddr == 0) {
				if(s->lport == seg.dest){
					spec = s;
					break;
				}
				if(s->lport == 0)
					gen = s;
			}
		}
		s = nil;
		if(spec != nil)
			s = tcpincoming(spec, &seg, source, dest);
		else
		if(gen != nil)
			s = tcpincoming(gen, &seg, source, dest);
	}
	if(s == nil) {
		freeblist(bp);
		sndrst(source, dest, length, &seg);
		return;
	}

	/* The rest of the input state machine is run with the control block
	 * locked and implements the state machine directly out of the RFC.
	 * Out-of-band data is ignored - it was always a bad idea.
	 */
	tcb = (Tcpctl*)s->ptcl;
	qlock(tcb);

	switch(tcb->state) {
	case Closed:
		sndrst(source, dest, length, &seg);
		goto raise;
	case Listen:
		if(seg.flags & SYN) {
			procsyn(s, &seg);
			tcpsndsyn(tcb);
			tcpsetstate(s, Syn_received);
			if(length != 0 || (seg.flags & FIN))
				break;
		}
		goto raise;
	case Syn_sent:
		if(seg.flags & ACK) {
			if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) {
				sndrst(source, dest, length, &seg);
				goto raise;
			}
		}
		if(seg.flags & RST) {
			if(seg.flags & ACK)
				localclose(s, Econrefused);
			goto raise;
		}

		if(seg.flags & SYN) {
			procsyn(s, &seg);
			if(seg.flags & ACK){
				update(s, &seg);
				tcpsynackrtt(s);
				tcpsetstate(s, Established);
			}
			else
				tcpsetstate(s, Syn_received);

			if(length != 0 || (seg.flags & FIN))
				break;

			freeblist(bp);
			goto output;
		}
		else
			freeblist(bp);

		qunlock(tcb);
		return;
	case Syn_received:
		/* doesn't matter if it's the correct ack, we're just trying to set timing */
		if(seg.flags & ACK)
			tcpsynackrtt(s);
		break;
	}

	/* Cut the data to fit the receive window */
	if(tcptrim(tcb, &seg, &bp, &length) == -1) {
		netlog(Logtcp, "tcp len < 0, %lux\n", seg.seq);
		update(s, &seg);
		if(tcb->sndcnt == 0 && tcb->state == Closing) {
			tcpsetstate(s, Time_wait);
			tcb->timer.start = MSL2*(1000 / MSPTICK);
			tcpgo(&tcb->timer);
		}
		if(!(seg.flags & RST)) {
			tcb->flags |= FORCE;
			goto output;
		}
		qunlock(tcb);
		return;
	}

	/* Cannot accept so answer with a rst */
	if(length && tcb->state == Closed) {
		sndrst(source, dest, length, &seg);
		goto raise;
	}

	/* The segment is beyond the current receive pointer so
	 * queue the data in the resequence queue
	 */
	if(seg.seq != tcb->rcv.nxt)
	if(length != 0 || (seg.flags & (SYN|FIN))) {
		update(s, &seg);
		addreseq(tcb, &seg, bp, length);
		tcb->flags |= FORCE;
		goto output;
	}

	/*
	 *  keep looping till we've processed this packet plus any
	 *  adjacent packets in the resequence queue
	 */
	for(;;) {
		if(seg.flags & RST) {
			localclose(s, Econrefused);
			goto raise;
		}

		if((seg.flags&ACK) == 0)
			goto raise;

		switch(tcb->state) {
		case Syn_received:
			if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){
				sndrst(source, dest, length, &seg);
				goto raise;
			}
			update(s, &seg);
			tcpsetstate(s, Established);
		case Established:
		case Close_wait:
			update(s, &seg);
			break;
		case Finwait1:
			update(s, &seg);
			if(tcb->sndcnt == 0){
				tcb->f2counter = MAXBACKOFF;
				tcpsetstate(s, Finwait2);
				tcb->timer.start = MSL2 * (1000 / MSPTICK);
				tcpgo(&tcb->timer);
			}
			break;
		case Finwait2:
			update(s, &seg);
			break;
		case Closing:
			update(s, &seg);
			if(tcb->sndcnt == 0) {
				tcpsetstate(s, Time_wait);
				tcb->timer.start = MSL2*(1000 / MSPTICK);
				tcpgo(&tcb->timer);
			}
			break;
		case Last_ack:
			update(s, &seg);
			if(tcb->sndcnt == 0) {
				localclose(s, nil);
				goto raise;
			}
		case Time_wait:
			tcb->flags |= FORCE;
			if(tcb->timer.state != TimerON)
				tcpgo(&tcb->timer);
		}

		if((seg.flags&URG) && seg.urg) {
			if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) {
				tcb->rcv.urg = seg.urg + seg.seq;
				pullblock(&bp, seg.urg);
			}
		}
		else
		if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg))
			tcb->rcv.urg = tcb->rcv.nxt;

		if(length == 0) {
			if(bp != nil)
				freeblist(bp);
		}
		else {
			switch(tcb->state){
			default:
				/* Ignore segment text */
				if(bp != nil)
					freeblist(bp);
				break;

			case Syn_received:
			case Established:
			case Finwait1:
				/* If we still have some data place on
				 * receive queue
				 */
				if(bp) {
					qpass(s->rq, bp);
					bp = nil;
				}
				tcb->rcv.nxt += length;
				tcprcvwin(s);
				if(tcb->acktimer.state != TimerON)
					tcpgo(&tcb->acktimer);

				/* force an ack if there's a lot of unacked data */
				if(tcb->rcv.nxt-tcb->last_ack > (QMAX>>4))
					tcb->flags |= FORCE;

				break;
			case Finwait2:
				/* no process to read the data, send a reset */
				if(bp != nil)
					freeblist(bp);
				sndrst(source, dest, length, &seg);
				qunlock(tcb);
				return;
			}
		}

		if(seg.flags & FIN) {
			tcb->flags |= FORCE;

			switch(tcb->state) {
			case Syn_received:
			case Established:
				tcb->rcv.nxt++;
				tcpsetstate(s, Close_wait);
				break;
			case Finwait1:
				tcb->rcv.nxt++;
				if(tcb->sndcnt == 0) {
					tcpsetstate(s, Time_wait);
					tcb->timer.start = MSL2*(1000/MSPTICK);
					tcpgo(&tcb->timer);
				}
				else
					tcpsetstate(s, Closing);
				break;
			case Finwait2:
				tcb->rcv.nxt++;
				tcpsetstate(s, Time_wait);
				tcb->timer.start = MSL2 * (1000/MSPTICK);
				tcpgo(&tcb->timer);
				break;
			case Close_wait:
			case Closing:
			case Last_ack:
				break;
			case Time_wait:
				tcpgo(&tcb->timer);
				break;
			}
		}

		/*
		 *  get next adjacent segment from the resequence queue.
		 *  dump/trim any overlapping segments
		 */
		for(;;) {
			if(tcb->reseq == nil)
				goto output;

			if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0)
				goto output;

			getreseq(tcb, &seg, &bp, &length);

			if(tcptrim(tcb, &seg, &bp, &length) == 0)
				break;
		}
	}
output:
	tcpoutput(s);
	qunlock(tcb);
	return;
raise:
	qunlock(tcb);
	freeblist(bp);
	tcpkick(s, 0);
}

/*
 *  always enters and exits with the tcb locked
 */
void
tcpoutput(Conv *s)
{
	int x;
	Tcp seg;
	int msgs;
	Tcphdr ph;
	Tcpctl *tcb;
	Block *hbp, *bp;
	int sndcnt, n, first;
	ulong ssize, dsize, usable, sent;

	tcb = (Tcpctl*)s->ptcl;

	switch(tcb->state) {
	case Listen:
	case Closed:
	case Finwait2:
		return;
	}

	/* force an ack when a window has opened up */
	if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
		tcb->rcv.blocked = 0;
		tcb->flags |= FORCE;
	}

	first = tcb->snd.ptr == tcb->snd.una;
	for(msgs = 0; msgs < 100; msgs++) {
		sndcnt = tcb->sndcnt;
		sent = tcb->snd.ptr - tcb->snd.una;

		/* Don't send anything else until our SYN has been acked */
		if(sent != 0 && (tcb->flags & (SYNACK|FORCE)) == 0)
			break;

		/* Compute usable segment based on offered window and limit
		 * window probes to one
		 */
		if(tcb->snd.wnd == 0){
			if(sent != 0) {
				if ((tcb->flags&FORCE) == 0)
					break;
				tcb->snd.ptr = tcb->snd.una;
			}
			usable = 1;
		}
		else {
			usable = tcb->cwind;
			if(tcb->snd.wnd < usable)
				usable = tcb->snd.wnd;
			usable -= sent;

			/*
			 *  hold small pieces in the hopes that more will come along.
			 *  this is pessimal in synchronous communications so go ahead
			 *  and send if:
			 *   - all previous xmits are acked
			 *   - we've forced to send anyways
			 *   - we've just gotten an ACK for a previous packet
			 */
			if(!first)
			if(!(tcb->flags&(FORCE|ACKED)))
			if((sndcnt-sent) < tcb->mss)
				usable = 0;
		}
		tcb->flags &= ~ACKED;

		ssize = sndcnt-sent;
		if(usable < ssize)
			ssize = usable;
		if(tcb->mss < ssize)
			ssize = tcb->mss;
		dsize = ssize;
		seg.urg = 0;

		if(ssize == 0)
		if((tcb->flags&FORCE) == 0)
			break;

		tcphalt(&tcb->acktimer);

		tcb->flags &= ~FORCE;
		tcprcvwin(s);

		/* By default we will generate an ack */
		seg.source = s->lport;
		seg.dest = s->rport;
		seg.flags = ACK;
		seg.mss = 0;

		switch(tcb->state){
		case Syn_sent:
			seg.flags = 0;
			/* No break */
		case Syn_received:
			if(tcb->snd.ptr == tcb->iss){
				seg.flags |= SYN;
				dsize--;
				seg.mss = tcpmtu(s);
			}
			break;
		}
		tcb->last_ack = tcb->rcv.nxt;
		seg.seq = tcb->snd.ptr;
		seg.ack = tcb->rcv.nxt;
		seg.wnd = tcb->rcv.wnd;

		/* Pull out data to send */
		bp = nil;
		if(dsize != 0) {
			bp = qcopy(s->wq, dsize, sent);
			if(BLEN(bp) != dsize) {
				seg.flags |= FIN;
				dsize--;
			}
			netlog(Logtcp, "qcopy: dlen %d blen %d sndcnt %d qlen %d sent %d rp[0] %d\n",
				dsize, BLEN(bp), sndcnt, qlen(s->wq), sent, bp->rp[0]);
		}

		if(sent+dsize == sndcnt)
			seg.flags |= PSH;

		/* keep track of balance of resent data */
		if(tcb->snd.ptr < tcb->snd.nxt) {
			n = tcb->snd.nxt - tcb->snd.ptr;
			if(ssize < n)
				n = ssize;
			tcb->resent += n;
		}

		tcb->snd.ptr += ssize;

		/* Pull up the send pointer so we can accept acks
		 * for this window
		 */
		if(seq_gt(tcb->snd.ptr,tcb->snd.nxt))
			tcb->snd.nxt = tcb->snd.ptr;

		/* Fill in fields of pseudo IP header */
		hnputl(ph.tcpdst, s->raddr);
		if(s->laddr == 0)
			s->laddr = Mediagetsrc(ph.tcpdst);

		hnputl(ph.tcpsrc, s->laddr);
		hnputs(ph.tcpsport, s->lport);
		hnputs(ph.tcpdport, s->rport);

		/* Build header, link data and compute cksum */
		hbp = htontcp(&seg, bp, &ph);
		if(hbp == nil) {
			freeblist(bp);
			return;
		}

		/* Start the transmission timers if there is new data and we
		 * expect acknowledges
		 */
		if(ssize != 0){
			x = backoff(tcb->backoff) *
			    (tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
			if(x > (10000/MSPTICK))
				x = 10000/MSPTICK;
			tcb->timer.start = x;

			if(tcb->timer.state != TimerON)
				tcpgo(&tcb->timer);

			/* If round trip timer isn't running, start it */
			if(tcb->rtt_timer.state != TimerON) {
				tcpgo(&tcb->rtt_timer);
				tcb->rttseq = tcb->snd.ptr;
			}
		}

		ipoput(hbp, 0, s->ttl);
	}
}

/*
 *  the BSD convention (hack?) for keep alives.  resend last byte acked.
 */
void
tcpkeepalive(Conv *s)
{
	Tcp seg;
	Tcphdr ph;
	Tcpctl *tcb;
	Block *hbp,*dbp;

	tcb = (Tcpctl*)s->ptcl;


	dbp = nil;
	seg.urg = 0;
	seg.source = s->lport;
	seg.dest = s->rport;
	seg.flags = ACK|PSH;
	seg.mss = 0;
	seg.seq = tcb->snd.una-1;
	seg.ack = tcb->rcv.nxt;
	seg.wnd = tcb->rcv.wnd;
	tcb->last_ack = tcb->rcv.nxt;
	if(tcb->state == Finwait2){
		seg.flags |= FIN;
	} else {
		dbp = allocb(1);
		dbp->wp++;
	}

	/* Fill in fields of pseudo IP header */
	hnputl(ph.tcpdst, s->raddr);
	if(s->laddr == 0)
		s->laddr = Mediagetsrc(ph.tcpdst);
	hnputl(ph.tcpsrc, s->laddr);
	hnputs(ph.tcpsport, s->lport);
	hnputs(ph.tcpdport, s->rport);

	/* Build header, link data and compute cksum */
	hbp = htontcp(&seg, dbp, &ph);
	if(hbp == nil) {
		freeblist(dbp);
		return;
	}

	ipoput(hbp, 0, s->ttl);
}

void
tcprxmit(Conv *s)
{
	Tcpctl *tcb;

	tcb = (Tcpctl*)s->ptcl;


	qlock(tcb);
	tcb->flags |= RETRAN|FORCE;
	tcb->snd.ptr = tcb->snd.una;

	/* Pull window down to a single packet and halve the slow
	 * start threshold
	 */
	tcb->ssthresh = tcb->cwind / 2;
	tcb->ssthresh = tcb->ssthresh;
	if(tcb->mss > tcb->ssthresh)
		tcb->ssthresh = tcb->mss;

	tcb->cwind = tcb->mss;
	tcpoutput(s);

	tcp.rexmit++;

	qunlock(tcb);
}

void
tcptimeout(void *arg)
{
	Conv *s;
	Tcpctl *tcb;
	int maxback;

	s = (Conv*)arg;
	tcb = (Tcpctl*)s->ptcl;


	switch(tcb->state){
	default:
		tcb->backoff++;
		if(tcb->state == Syn_sent)
			maxback = (3*MAXBACKOFF)/4;
		else
			maxback = MAXBACKOFF;
		if(tcb->backoff >= maxback) {
			localclose(s, Etimedout);
			break;
		}
		tcprxmit(s);
		break;
	case Finwait2:
		if(--(tcb->f2counter) <= 0)
			localclose(s, Etimedout);
		else {
			qlock(tcb);
			tcpkeepalive(s);
			qunlock(tcb);
			tcpgo(&tcb->timer);
		}
		break;
	case Time_wait:
		localclose(s, nil);
		break;
	}
}

int
inwindow(Tcpctl *tcb, int seq)
{
	return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
}

void
procsyn(Conv *s, Tcp *seg)
{
	Tcpctl *tcb;
	int mtu;

	tcb = (Tcpctl*)s->ptcl;
	tcb->flags |= FORCE;

	tcb->rcv.nxt = seg->seq + 1;
	tcb->rcv.urg = tcb->rcv.nxt;
	tcb->snd.wl1 = seg->seq;
	tcb->irs = seg->seq;
	tcb->snd.wnd = seg->wnd;

	if(seg->mss != 0)
		tcb->mss = seg->mss;

	tcb->max_snd = seg->wnd;

	mtu = tcpmtu(s);
	if(tcb->mss > mtu)
		tcb->mss = mtu;
	tcb->cwind = tcb->mss;
}

void
addreseq(Tcpctl *tcb, Tcp *seg, Block *bp, ushort length)
{
	Reseq *rp, *rp1;

	rp = malloc(sizeof(Reseq));
	if(rp == nil){
		freeblist(bp);	/* bp always consumed by add_reseq */
		return;
	}

	rp->seg = *seg;
	rp->bp = bp;
	rp->length = length;

	/* Place on reassembly list sorting by starting seq number */
	rp1 = tcb->reseq;
	if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
		rp->next = rp1;
		tcb->reseq = rp;
		return;
	}

	for(;;) {
		if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
			rp->next = rp1->next;
			rp1->next = rp;
			break;
		}
		rp1 = rp1->next;
	}
}

void
getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
{
	Reseq *rp;

	rp = tcb->reseq;
	if(rp == nil)
		return;

	tcb->reseq = rp->next;

	*seg = rp->seg;
	*bp = rp->bp;
	*length = rp->length;

	free(rp);
}

int
tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
{
	ushort len;
	Block *nbp;
	byte accept;
	int dupcnt, excess;

	accept = 0;
	len = *length;
	if(seg->flags & SYN)
		len++;
	if(seg->flags & FIN)
		len++;

	if(tcb->rcv.wnd == 0) {
		if(len == 0 && seg->seq == tcb->rcv.nxt)
			return 0;
	}
	else {
		/* Some part of the segment should be in the window */
		if(inwindow(tcb,seg->seq))
			accept++;
		else
		if(len != 0) {
			if(inwindow(tcb, seg->seq+len-1) ||
			seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1))
				accept++;
		}
	}
	if(!accept) {
		freeblist(*bp);
		return -1;
	}
	dupcnt = tcb->rcv.nxt - seg->seq;
	if(dupcnt > 0){
		tcb->rerecv += dupcnt;
		if(seg->flags & SYN){
			seg->flags &= ~SYN;
			seg->seq++;

			if (seg->urg > 1)
				seg->urg--;
			else
				seg->flags &= ~URG;
			dupcnt--;
		}
		if(dupcnt > 0){
			pullblock(bp, (ushort)dupcnt);
			seg->seq += dupcnt;
			*length -= dupcnt;

			if (seg->urg > dupcnt)
				seg->urg -= dupcnt;
			else {
				seg->flags &= ~URG;
				seg->urg = 0;
			}
		}
	}
	excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd);
	if(excess > 0) {
		tcb->rerecv += excess;
		*length -= excess;
		nbp = copyblock(*bp, *length);
		freeblist(*bp);
		*bp = nbp;
		seg->flags &= ~FIN;
	}
	return 0;
}

void
tcpadvise(Block *bp, char *msg)
{
	Tcphdr *h;
	Tcpctl *tcb;
	Ipaddr source, dest;
	ushort psource, pdest;
	Conv *s, **p;

	h = (Tcphdr*)(bp->rp);

	dest = nhgetl(h->tcpdst);
	source = nhgetl(h->tcpsrc);
	psource = nhgets(h->tcpsport);
	pdest = nhgets(h->tcpdport);

	/* Look for a connection */
	for(p = tcp.conv; *p; p++) {
		s = *p;
		if(s->rport == pdest && s->lport == psource)
		if(s->raddr == dest && s->laddr == source){
			tcb = (Tcpctl*)s->ptcl;
			qlock(tcb);
			switch(tcb->state){
			case Syn_sent:
				localclose(s, msg);
				break;
			}
			qunlock(tcb);
			break;
		}
	}
	freeblist(bp);
}

char*
tcpctl(Conv* c, char** f, int n)
{
	if(n == 1 && strcmp(f[0], "hangup") == 0)
		return tcphangup(c);
	return "unknown control request";
}

void
tcpinit(Fs *fs)
{
	tcp.name = "tcp";
	tcp.kick = tcpkick;
	tcp.connect = tcpconnect;
	tcp.announce = tcpannounce;
	tcp.ctl = tcpctl;
	tcp.state = tcpstate;
	tcp.create = tcpcreate;
	tcp.close = tcpclose;
	tcp.rcv = tcpiput;
	tcp.advise = tcpadvise;
	tcp.ipproto = IP_TCPPROTO;
	tcp.nc = Nchans;
	tcp.ptclsize = sizeof(Tcpctl);

	kproc("tcpack", tcpackproc, 0);

	Fsproto(fs, &tcp);
}
.
## diffname ip/tcp.c 1997/0403
## diff -e /n/emeliedump/1997/0327/sys/src/brazil/ip/tcp.c /n/emeliedump/1997/0403/sys/src/brazil/ip/tcp.c
280a

	return nil;
.
278a
	char *e;

	e = Fsstdannounce(c, argv, argc);
	if(e != nil)
		return e;
.
276,277c
static char*
tcpannounce(Conv *c, char **argv, int argc)
.
248c

	return nil;
.
244,246c
	e = Fsstdconnect(c, argv, argc);
	if(e != nil)
		return e;
.
242c
	char *e;
.
## diffname ip/tcp.c 1997/0423
## diff -e /n/emeliedump/1997/0403/sys/src/brazil/ip/tcp.c /n/emeliedump/1997/0423/sys/src/brazil/ip/tcp.c
1064a
	USED(m);
.
1055c
tcpiput(Media *m, Block *bp)
.
193c
void	tcpiput(Media*, Block*);
.
## diffname ip/tcp.c 1997/0515
## diff -e /n/emeliedump/1997/0423/sys/src/brazil/ip/tcp.c /n/emeliedump/1997/0515/sys/src/brazil/ip/tcp.c
235c
	if(oldstate == Syn_sent && newstate != Closed)
.
## diffname ip/tcp.c 1997/0802
## diff -e /n/emeliedump/1997/0515/sys/src/brazil/ip/tcp.c /n/emeliedump/1997/0802/sys/src/brazil/ip/tcp.c
903c
		if(x >= low || x <= high)
.
## diffname ip/tcp.c 1997/0806
## diff -e /n/emeliedump/1997/0802/sys/src/brazil/ip/tcp.c /n/emeliedump/1997/0806/sys/src/brazil/ip/tcp.c
1471a
			tcp.wclosed++;
.
## diffname ip/tcp.c 1997/0916
## diff -e /n/emeliedump/1997/0806/sys/src/brazil/ip/tcp.c /n/emeliedump/1997/0916/sys/src/brazil/ip/tcp.c
1939a
	tcp.stats = tcpstats;
.
1926a
int
tcpstats(char *buf, int len)
{
	return snprint(buf, len, "\tdupp %d dupb %d\n", tstats.dup, tstats.dupb);
}

.
1848a
		tstats.dup++;
		tstats.dupb += dupcnt;
.
1472d
1252a
		tcp.order++;
.
188a
static struct Tcpstats
{
	ulong	dup;		/* (partially) duplicated packets */
	ulong	dupb;		/* duplicated bytes */
} tstats;

.
## diffname ip/tcp.c 1997/1104
## diff -e /n/emeliedump/1997/0916/sys/src/brazil/ip/tcp.c /n/emeliedump/1997/1104/sys/src/brazil/ip/tcp.c
808,835d
## diffname ip/tcp.c 1998/0306
## diff -e /n/emeliedump/1997/1104/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0306/sys/src/brazil/ip/tcp.c
1934a

.
1926a
	tcp.inuse = tcpinuse;
.
1910c
	int n;

	n = snprint(buf, len,
		"tcp: csum %d hlen %d len %d order %d rexmit %d",
		tcp.csumerr, tcp.hlenerr, tcp.lenerr, tcp.order, tcp.rexmit);
	n += snprint(buf+n, len-n, " dupp %d dupb %d\n",
		tstats.dup, tstats.dupb);
	return n;
.
1898a
/* called with c->car qlocked */
.
1883,1884c
		if(s->rport == pdest)
		if(s->lport == psource)
		if(ipcmp(s->raddr, dest) == 0)
		if(ipcmp(s->laddr, source) == 0){
.
1875,1876c
	v4tov6(dest, h->tcpdst);
	v4tov6(source, h->tcpsrc);
.
1869c
	uchar source[IPaddrlen];
	uchar dest[IPaddrlen];
.
1795c
	uchar accept;
.
1630c
	hbp = htontcp(&seg, dbp, &tcb->protohdr);
.
1621,1628d
1597d
1591c
 *  the BSD convention (hack?) for keep alives.  resend last uchar acked.
.
1560c
		hbp = htontcp(&seg, bp, &tcb->protohdr);
.
1550,1558d
1417d
1119c
			if(s->rport == 0 && ipcmp(s->raddr, IPnoaddr) == 0) {
.
1079,1080c
		if(s->rport == seg.source)
		if(s->lport == seg.dest)
		if(ipcmp(s->raddr, source) == 0)
.
1046,1047c
	v4tov6(dest, h->tcpdst);
	v4tov6(source, h->tcpsrc);
.
1043d
1040c
	uchar source[IPaddrlen], dest[IPaddrlen];
.
1033c
tcpiput(uchar*, Block *bp)
.
869a
	h = &tcb->protohdr;
	memset(h, 0, sizeof(*h));
	h->proto = IP_TCPPROTO;
	hnputs(h->tcpsport, new->lport);
	hnputs(h->tcpdport, new->rport);
	v6tov4(h->tcpsrc, dst);
	v6tov4(h->tcpdst, src);

.
856a
	Tcphdr *h;
.
853c
tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst)
.
843c
		hbp = htontcp(&seg, nil, &tcb->protohdr);
.
835,841d
826,827d
814d
774,778d
772a
	hnputs(ph.tcpsport, seg->dest);
	hnputs(ph.tcpdport, seg->source);
.
768,770c
	/* make pseudo header */
	memset(&ph, 0, sizeof(ph));
	v6tov4(ph.tcpsrc, dest);
	v6tov4(ph.tcpdst, source);
.
763c
	uchar rflags;
.
760d
758c
sndrst(uchar *source, uchar *dest, ushort length, Tcp *seg)
.
756a

/*
 *  called with v4 (4 byte) addresses
 */
.
718c
	DBG(h->tcpsrc)(Logtcpmsg, "%d > %d s %l8.8ux a %8.8lux %s w %.4ux l %d\n",
.
689c
	uchar *optr;
.
677c
	DBG(h->tcpdst)(Logtcpmsg, "%d > %d s %l8.8ux a %8.8lux %s w %.4ux l %d\n",
.
664,666d
658,659d
654,656c
	memmove(h, ph, TCP_TCBPHDRSZ);

	/* copy in variable bits */
.
650,652c
	/* copy in pseudo ip header plus port numbers */
.
564,567c
	ifc = findipifc(s->raddr, 0);
	if(ifc != nil)
		mtu = ifc->maxmtu - ifc->m->hsize - (TCP_PKT + TCP_HDRSIZE);
.
559,560c
	Ipifc *ifc;
.
552a

	/* create a prototype(pseudo) header */
	if(ipcmp(s->laddr, IPnoaddr) == 0)
		findlocalip(s->laddr, s->raddr);
	h = &tcb->protohdr;
	memset(h, 0, sizeof(*h));
	h->proto = IP_TCPPROTO;
	hnputs(h->tcpsport, s->lport);
	hnputs(h->tcpdport, s->rport);
	v6tov4(h->tcpsrc, s->laddr);
	v6tov4(h->tcpdst, s->raddr);
.
535a
	Tcphdr *h;
.
279,280c
static int
tcpinuse(Conv *c)
{
	Tcpctl *s;

	s = (Tcpctl*)(c->ptcl);
	return s->state != Closed;
.
277a
}
.
266,274c
	return snprint(state, n,
.
262d
258,259c
static int
tcpstate(Conv *c, char *state, int n)
.
234c
		ipmove(s->raddr, IPnoaddr);
.
212c
	uchar oldstate;
.
209c
tcpsetstate(Conv *s, uchar newstate)
.
199c
void	tcpiput(uchar*, Block*);
.
192c
	ulong	dupb;		/* duplicated uchars */
.
181c
#define DBG(x)	if((logmask & Logtcpmsg) && (iponlyset == 0 || memcmp(x, iponly+12, 4) == 0))netlog
.
178c

	Tcphdr	protohdr;		/* prototype header */
.
165,166c
	uchar	backoff;		/* Exponential backoff counter */
	uchar	flags;			/* State flags */
.
150c
		ulong	nxt;		/* Receive pointer to next uchar slot */
.
137,139c
	uchar	state;			/* Connection state */
	uchar	type;			/* Listening or active connection */
	uchar	code;			/* Icmp code */
.
118c
	uchar	flags;
.
107,108c
	uchar	tcpopt[2];
	uchar	tcpmss[2];
.
88,105c
	uchar	vihl;		/* Version and header length */
	uchar	tos;		/* Type of service */
	uchar	length[2];	/* packet length */
	uchar	id[2];		/* Identification */
	uchar	frag[2];	/* Fragment information */
	uchar	Unused;
	uchar	proto;
	uchar	tcplen[2];
	uchar	tcpsrc[4];
	uchar	tcpdst[4];
	uchar	tcpsport[2];
	uchar	tcpdport[2];
	uchar	tcpseq[4];
	uchar	tcpack[4];
	uchar	tcpflag[2];
	uchar	tcpwin[2];
	uchar	tcpcksum[2];
	uchar	tcpurg[2];
.
16a
	TCP_TCBPHDRSZ	= 40,
.
## diffname ip/tcp.c 1998/0313
## diff -e /n/emeliedump/1998/0306/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0313/sys/src/brazil/ip/tcp.c
1930a
	kproc("tcpack", tcpackproc, tcp);
}
.
1928,1929c
	Fsproto(fs, tcp);
.
1926c
	tcp = smalloc(sizeof(Proto));
	tpriv = tcp->priv = smalloc(sizeof(Tcppriv));
	tcp->name = "tcp";
	tcp->kick = tcpkick;
	tcp->connect = tcpconnect;
	tcp->announce = tcpannounce;
	tcp->ctl = tcpctl;
	tcp->state = tcpstate;
	tcp->create = tcpcreate;
	tcp->close = tcpclose;
	tcp->rcv = tcpiput;
	tcp->advise = tcpadvise;
	tcp->stats = tcpstats;
	tcp->inuse = tcpinuse;
	tcp->ipproto = IP_TCPPROTO;
	tcp->nc = Nchans;
	tcp->ptclsize = sizeof(Tcpctl);
	tpriv->tstats.tcpMaxConn = Nchans;
.
1910,1924c
	Proto *tcp;
	Tcppriv *tpriv;
.
1899,1904c
	tstats = tcp->priv;
	return snprint(buf, len, "%d %d %d %d %d %d %d %d %d %d %d %d %d %d",
		tstats->tcpRtoAlgorithm,
		tstats->tcpRtoMin,
		tstats->tcpRtoMax,
		tstats->tcpMaxConn,
		tstats->tcpActiveOpens,
		tstats->tcpPassiveOpens,
		tstats->tcpAttemptFails,
		tstats->tcpEstabResets,
		tstats->tcpCurrEstab,
		tstats->tcpInSegs,
		tstats->tcpOutSegs,
		tstats->tcpRetransSegs,
		tstats->InErrs,
		tstats->OutRsts);
.
1897c
	Tcpstats *tstats;
.
1895c
tcpstats(Proto *tcp, char *buf, int len)
.
1865c
	for(p = tcp->conv; *p; p++) {
.
1848c
tcpadvise(Proto *tcp, Block *bp, char *msg)
.
1810,1811d
1681c
			tcpgo(s->p->priv, &tcb->timer);
.
1645,1646c
	tpriv->tstats.tcpRetransSegs++;
.
1629d
1626a
	tpriv = s->p->priv;
.
1625a
	Tcppriv *tpriv;
.
1619c
	ipoput(s->p->f, hbp, 0, s->ttl);
.
1578c
		tpriv->tstats.tcpOutSegs++;
		ipoput(f, hbp, 0, s->ttl);
.
1573c
				tcpgo(tpriv, &tcb->rtt_timer);
.
1569c
				tcpgo(tpriv, &tcb->timer);
.
1528c
			netlog(f, Logtcp, "qcopy: dlen %d blen %d sndcnt %d qlen %d sent %d rp[0] %d\n",
.
1492c
		tcphalt(tpriv, &tcb->acktimer);
.
1423a
	f = s->p->f;
	tpriv = s->p->priv;

.
1422a
	Fs *f;
	Tcppriv *tpriv;
.
1378c
				tcpgo(tpriv, &tcb->timer);
.
1371c
				tcpgo(tpriv, &tcb->timer);
.
1362c
					tcpgo(tpriv, &tcb->timer);
.
1342c
				sndrst(tcp, source, dest, length, &seg);
.
1331c
					tcpgo(tpriv, &tcb->acktimer);
.
1293c
				tcpgo(tpriv, &tcb->timer);
.
1281c
				tcpgo(tpriv, &tcb->timer);
.
1270c
				tcpgo(tpriv, &tcb->timer);
.
1255c
				sndrst(tcp, source, dest, length, &seg);
.
1244a
			if(tcb->state == Established)
				tpriv->tstats.tcpEstabResets++;
.
1233c
		tpriv->order++;
.
1223c
		sndrst(tcp, source, dest, length, &seg);
.
1211c
			tcpgo(tpriv, &tcb->timer);
.
1206c
		netlog(f, Logtcp, "tcp len < 0, %lux\n", seg.seq);
.
1166c
				sndrst(tcp, source, dest, length, &seg);
.
1152c
		sndrst(tcp, source, dest, length, &seg);
.
1139c
		sndrst(tcp, source, dest, length, &seg);
.
1114c
		for(p = tcp->conv; *p; p++) {
.
1103c
			sndrst(tcp, source, dest, length, &seg);
.
1078c
	for(p = tcp->conv; *p; p++) {
.
1071,1072c
		tpriv->lenerr++;
		netlog(f, Logtcp, "tcp len < 0 after trim\n");
.
1062,1063c
		tpriv->hlenerr++;
		netlog(f, Logtcp, "bad tcp hdr len\n");
.
1054,1055c
		tpriv->csumerr++;
		netlog(f, Logtcp, "bad tcp proto cksum\n");
.
1044a
	f = tcp->f;
	tpriv = tcp->priv;
	
	tpriv->tstats.tcpInSegs++;

.
1043a
	Fs *f;
	Tcppriv *tpriv;
.
1035c
tcpiput(Proto *tcp, uchar*, Block *bp)
.
1025c
		tcpgo(tpriv, &tcb->timer);
.
1023c
	tcphalt(tpriv, &tcb->timer);
.
986c
		tcphalt(tpriv, &tcb->rtt_timer);
.
939a
	tpriv = s->p->priv;
.
938a
	Tcppriv *tpriv;
.
930c
	tcphalt(tpriv, &tcb->rtt_timer);
.
923a
	tpriv = s->p->priv;
.
921a
	Tcppriv *tpriv;
.
852c
	new = Fsnewcall(s, src, segp->source, dst, segp->dest);
.
837c
		ipoput(s->p->f, hbp, 0, s->ttl);
.
824c
		return commonerror();
.
807c
	ipoput(tcp->f, hbp, 0, MAXTTL);
.
781a
	tpriv->tstats.OutRsts++;
.
769a
	tpriv = tcp->priv;

.
768a
	Tcppriv *tpriv;
.
764c
sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg)
.
723c
		tcph->wnd, nhgets(h->length)-hdrlen-TCP_PKT); */
.
720c
/*	netlog(Logtcpmsg, "%d > %d s %l8.8ux a %8.8lux %s w %.4ux l %d\n",
.
682c
		tcph->wnd, dlen); */
.
679c
/*	netlog(f, Logtcpmsg, "%d > %d s %l8.8ux a %8.8lux %s w %.4ux l %d\n",
.
599a
		tpriv->tstats.tcpActiveOpens++;
.
594a
		tpriv->tstats.tcpPassiveOpens++;
.
586a
	tpriv = s->p->priv;

.
585a
	Tcppriv *tpriv;
.
574c
	ifc = findipifc(s->p->f, s->raddr, 0);
.
556c
		findlocalip(s->p->f, s->laddr, s->raddr);
.
522c
		Fsconnected(s, reason);
.
511,512c
	tcphalt(tpriv, &tcb->timer);
	tcphalt(tpriv, &tcb->rtt_timer);
.
508a
	tpriv = s->p->priv;
.
507a
	Tcppriv *tpriv;
.
491c
	qunlock(&tpriv->tl);
.
489c
		deltimer(tpriv, t);
.
487c
	qlock(&tpriv->tl);
.
482c
tcphalt(Tcppriv *tpriv, Timer *t)
.
478c
	qunlock(&tpriv->tl);
.
476c
		tpriv->timers = t;
.
473c
		t->next = tpriv->timers;
.
468c
	qlock(&tpriv->tl);
.
463c
tcpgo(Tcppriv *tpriv, Timer *t)
.
448c
		qunlock(&tpriv->tl);
.
441c
					deltimer(tpriv, t);
.
436c
		for(t = tpriv->timers; t != nil; t = tp) {
.
434c
		qlock(&tpriv->tl);
.
432c
		tsleep(&tpriv->tcpr, return0, 0, MSPTICK);
.
430a
	tcp = a;
	tpriv = tcp->priv;

.
429c
	Proto *tcp;
	Tcppriv *tpriv;
.
426c
tcpackproc(void *a)
.
381,382c
	if(tpriv->timers == t)
		tpriv->timers = t->next;
.
379c
deltimer(Tcppriv *tpriv, Timer *t)
.
314c
		Fsconnected(c, "Hangup");
.
291c
	Fsconnected(c, nil);
.
244c
		Fsconnected(s, nil);
.
221a
	if(oldstate == Established)
		tpriv->tstats.tcpCurrEstab--;
	if(newstate == Established)
		tpriv->tstats.tcpCurrEstab++;

.
215a
	tpriv = s->p->priv;

.
214a
	Tcppriv *tpriv;
.
201c
void	tcpiput(Proto*, uchar*, Block*);
.
196a
typedef struct Tcppriv Tcppriv;
struct Tcppriv
{
	Timer 	*timers;		/* List of active timers */
	QLock 	tl;			/* Protect timer list */
	Rendez	tcpr;			/* used by tcpackproc */

	/* MIB stats */
	Tcpstats tstats;

	/* non-MIB stats */
	ulong		csumerr;		/* checksum errors */
	ulong		hlenerr;		/* header length error */
	ulong		lenerr;			/* short packet */
	ulong		order;			/* out of order */

};

.
193,195c
	ulong	tcpRtoAlgorithm;
	ulong	tcpRtoMin;
	ulong	tcpRtoMax;
	ulong	tcpMaxConn;
	ulong	tcpActiveOpens;
	ulong	tcpPassiveOpens;
	ulong	tcpAttemptFails;
	ulong	tcpEstabResets;
	ulong	tcpCurrEstab;
	ulong	tcpInSegs;
	ulong	tcpOutSegs;
	ulong	tcpRetransSegs;
	ulong	InErrs;
	ulong	OutRsts;
};
.
191c
/* MIB II counters */
typedef struct Tcpstats Tcpstats;
struct Tcpstats
.
188,189d
183,185d
## diffname ip/tcp.c 1998/0421
## diff -e /n/emeliedump/1998/0313/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0421/sys/src/brazil/ip/tcp.c
585a
	tcb->mdev = tcp_irtt << LOGDGAIN;
.
40c
	DEF_RTT		= 500,		/* Default round trip */
.
## diffname ip/tcp.c 1998/0630
## diff -e /n/emeliedump/1998/0421/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0630/sys/src/brazil/ip/tcp.c
1969,1982c
		tpriv->tstats.tcpRtoAlgorithm,
		tpriv->tstats.tcpRtoMin,
		tpriv->tstats.tcpRtoMax,
		tpriv->tstats.tcpMaxConn,
		tpriv->tstats.tcpActiveOpens,
		tpriv->tstats.tcpPassiveOpens,
		tpriv->tstats.tcpAttemptFails,
		tpriv->tstats.tcpEstabResets,
		tpriv->tstats.tcpCurrEstab,
		tpriv->tstats.tcpInSegs,
		tpriv->tstats.tcpOutSegs,
		tpriv->tstats.tcpRetransSegs,
		tpriv->tstats.InErrs,
		tpriv->tstats.OutRsts);
.
1967c
	tpriv = tcp->priv;



.
1965c
	Tcppriv *tpriv;
.
586d
257,260c
	/**
	print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
		tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
	**/
.
40c
	DEF_RTT		= 150,		/* Default round trip */
.
## diffname ip/tcp.c 1998/0724
## diff -e /n/emeliedump/1998/0630/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0724/sys/src/brazil/ip/tcp.c
1938d
1935a
		if(tcb->state != Closed)
.
1933a
		tcb = (Tcpctl*)s->ptcl;
.
1142a
		if(tcb->state != Closed)
.
1140a
		tcb = (Tcpctl*)s->ptcl;
.
269,271c
//		s->lport = 0;		/* This connection is toast */
//		s->rport = 0;
//		ipmove(s->raddr, IPnoaddr);
.
## diffname ip/tcp.c 1998/0808
## diff -e /n/emeliedump/1998/0724/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0808/sys/src/brazil/ip/tcp.c
1816c
	for(i = 0;; i++) {
		if(i > 100)
			print("very long tcp resequence queue\n");
.
1796a
	int i;
.
## diffname ip/tcp.c 1998/0813
## diff -e /n/emeliedump/1998/0808/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0813/sys/src/brazil/ip/tcp.c
1580a
				seg.mss = tcpmtu(s);
			}
			break;
		case Syn_received:
			/*
			 *  don't send any data with a SYN/ACK packet
			 *  because Linux rejects the packet in its
			 *  attempt to solve the SYN attack problem
			 */
			if(tcb->snd.ptr == tcb->iss){
				seg.flags |= SYN;
				dsize = 0;
.
1576,1577d
269,271d
## diffname ip/tcp.c 1998/0825
## diff -e /n/emeliedump/1998/0813/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0825/sys/src/brazil/ip/tcp.c
1983c
	return snprint(buf, len, "%lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud",
.
## diffname ip/tcp.c 1998/0831
## diff -e /n/emeliedump/1998/0825/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0831/sys/src/brazil/ip/tcp.c
1587a
				ssize = 1;
.
## diffname ip/tcp.c 1998/0901
## diff -e /n/emeliedump/1998/0831/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0901/sys/src/brazil/ip/tcp.c
1512c
		if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
.
## diffname ip/tcp.c 1998/0918
## diff -e /n/emeliedump/1998/0901/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0918/sys/src/brazil/ip/tcp.c
1641a

			/* take into account delayed ack */
			if(sent <= 2*tcb->mss)
				x += TCP_ACK/MSPTICK;

			/* sanity check */
.
1639a
			/* round trip depenency */
.
1397a
				else if(tcb->acktimer.state != TimerON)
					tcpgo(tpriv, &tcb->acktimer);
.
1395,1396c
				/*
				 *  force an ack if we've got 2 segs
				 *  and the user isn't backing up
				 */
				if(tcb->rcv.nxt - tcb->last_ack >= 2*tcb->mss &&
				   qlen(s->rq) < 8*tcb->mss)
.
1392,1393d
1387c
					qpassnolim(s->rq, packblock(bp));
.
23c
	TCP_ACK		= 200,		/* Timed ack sequence in ms */
.
## diffname ip/tcp.c 1998/0923
## diff -e /n/emeliedump/1998/0918/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0923/sys/src/brazil/ip/tcp.c
1387c
					bp = packblock(bp);
					if(bp == nil)
						panic("tcp packblock");
					qpassnolim(s->rq, bp);
.
## diffname ip/tcp.c 1998/0924
## diff -e /n/emeliedump/1998/0923/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0924/sys/src/brazil/ip/tcp.c
2041,2042d
627a
	if(tpriv->ackprocstarted == 0){
		qlock(&tpriv->apl);
		if(tpriv->ackprocstarted == 0){
			sprint(kpname, "#I%dtcpack", s->p->f->dev);
			kproc(kpname, tcpackproc, s->p);
			tpriv->ackprocstarted = 1;
		}
		qunlock(&tpriv->apl);
	}

.
624a
	char kpname[NAMELEN];
.
221a
	/* for keeping track of tcpackproc */
	int	ackprocstarted;
	QLock	apl;
.
## diffname ip/tcp.c 1998/0925
## diff -e /n/emeliedump/1998/0924/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0925/sys/src/brazil/ip/tcp.c
1417,1418c
					if(tcb->acktimer.state == TimerON)
						tcphalt(tpriv, &tcb->acktimer);
				} else {
					if(tcb->acktimer.state != TimerON)
						tcpgo(tpriv, &tcb->acktimer);
				}
.
1415c
				   qlen(s->rq) < 8*tcb->mss){
.
553a
	tcphalt(tpriv, &tcb->acktimer);
.
489,494c
		loop = 0;
		for(t = timeo; t != nil; t = t->readynext) {
			if(loop++ > 10000)
				panic("tcpackproc2");
.
482c
					t->readynext = timeo;
.
475a
			if(loop++ > 10000)
				panic("tcpackproc1");
.
474a
		loop = 0;
.
465a
	int loop;
.
421a
	t->next = t->prev = nil;
.
277a
	tcb->state = newstate;

.
271a
		break;
.
265,266d
78a
	Timer	*readynext;
.
## diffname ip/tcp.c 1998/0927
## diff -e /n/emeliedump/1998/0925/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/0927/sys/src/brazil/ip/tcp.c
535c
	qunlock(&priv->tl);
.
533c
		deltimer(priv, t);
.
531c
	qlock(&priv->tl);
.
526c
tcphalt(Tcppriv *priv, Timer *t)
.
522c
	qunlock(&priv->tl);
.
520c
		priv->timers = t;
.
517c
		t->next = priv->timers;
.
512c
	qlock(&priv->tl);
.
507c
tcpgo(Tcppriv *priv, Timer *t)
.
494c
		qunlock(&priv->tl);
.
487c
					deltimer(priv, t);
.
480c
		for(t = priv->timers; t != nil; t = tp) {
.
477c
		qlock(&priv->tl);
.
475c
		tsleep(&priv->tcpr, return0, 0, MSPTICK);
.
472c
	priv = tcp->priv;
.
468c
	Tcppriv *priv;
.
418,419c
	if(priv->timers == t)
		priv->timers = t->next;
.
416c
deltimer(Tcppriv *priv, Timer *t)
.
412,414d
## diffname ip/tcp.c 1998/1008
## diff -e /n/emeliedump/1998/0927/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/1008/sys/src/brazil/ip/tcp.c
1570c
			if((sndcnt-sent) < 5)
.
1566a
			 *   - there's more than 5 bytes queued
.
1563,1564c
			 *  and send if any of the following:
			 *   - there's no unacked packets outstanding
.
1559d
1425a
				if(tcb->acktimer.state != TimerON)
					tcpgo(tpriv, &tcb->acktimer);
.
1420,1424d
23c
	TCP_ACK		= 50,		/* Timed ack sequence in ms */
.
## diffname ip/tcp.c 1998/1118
## diff -e /n/emeliedump/1998/1008/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/1118/sys/src/brazil/ip/tcp.c
2003a
	if(n >= 1 && strcmp(f[0], "keepalive") == 0)
		return tcpstartka(c, f, n);
.
1785,1794d
1733a
tcpkeepalive(Conv *s)
{
	Tcpctl *tcb;

	tcb = (Tcpctl*)s->ptcl;
	if(--(tcb->kacounter) <= 0)
		localclose(s, Etimedout);
	else {
		qlock(tcb);
		tcpsendka(s);
		qunlock(tcb);
		tcpgo(s->p->priv, &tcb->katimer);
	}
}

/*
 *  start keepalive timer
 */
char*
tcpstartka(Conv *s, char **f, int n)
{
	Tcpctl *tcb;
	int x;

	tcb = (Tcpctl*)s->ptcl;
	if(n > 1){
		x = atoi(f[1]);
		if(x >= MSPTICK)
			tcb->katimer.start = x/MSPTICK;
	}
	tcb->kacounter = MAXBACKOFF;
	tcpgo(s->p->priv, &tcb->katimer);

	return nil;
}

void
.
1732a
/*
 *  if we've timed out, close the connection
 *  otherwise, send a keepalive and restart the timer
 */
.
1697c
tcpsendka(Conv *s)
.
1688a
		if(tcb->kacounter > 0)
			tcpgo(tpriv, &tcb->katimer);
.
1348c
				tcb->katimer.start = MSL2 * (1000 / MSPTICK);
.
1346c
				tcb->kacounter = MAXBACKOFF;
.
1226a
	if(tcb->kacounter > 0)
		tcb->kacounter = MAXBACKOFF;

.
1010,1011d
596a
	tcb->kacounter = 0;
	tcb->katimer.start = DEF_KAT / MSPTICK;
	tcb->katimer.func = tcpkeepalive;
	tcb->katimer.arg = s;
.
556a
	tcphalt(tpriv, &tcb->katimer);
.
239a
void	tcpkeepalive(Conv*);
.
178d
173a
	Timer	katimer;		/* keep alive timer */
.
40a
	DEF_KAT		= 10000,	/* Default keep alive trip in ms */
.
## diffname ip/tcp.c 1998/1127
## diff -e /n/emeliedump/1998/1118/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/1127/sys/src/brazil/ip/tcp.c
569a
	if(s->state == Announced)
		wakeup(&s->listenr);
.
## diffname ip/tcp.c 1998/1202
## diff -e /n/emeliedump/1998/1127/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/1202/sys/src/brazil/ip/tcp.c
1359c
				tcpgo(tpriv, &tcb->katimer);
.
## diffname ip/tcp.c 1998/1204
## diff -e /n/emeliedump/1998/1202/sys/src/brazil/ip/tcp.c /n/emeliedump/1998/1204/sys/src/brazil/ip/tcp.c
1465a
				tcphalt(tpriv, &tcb->rtt_timer);
				tcphalt(tpriv, &tcb->acktimer);
				tcphalt(tpriv, &tcb->katimer);
.
1456a
					tcphalt(tpriv, &tcb->rtt_timer);
					tcphalt(tpriv, &tcb->acktimer);
					tcphalt(tpriv, &tcb->katimer);
.
1367a
				tcphalt(tpriv, &tcb->rtt_timer);
				tcphalt(tpriv, &tcb->acktimer);
				tcphalt(tpriv, &tcb->katimer);
.
1355a
				tcphalt(tpriv, &tcb->rtt_timer);
				tcphalt(tpriv, &tcb->acktimer);
.
1295a
			tcphalt(tpriv, &tcb->rtt_timer);
			tcphalt(tpriv, &tcb->acktimer);
			tcphalt(tpriv, &tcb->katimer);
.
935a
	tcb->katimer.arg = new;
	tcb->katimer.state = TimerOFF;
	tcb->rtt_timer.arg = new;
	tcb->rtt_timer.state = TimerOFF;
.
## diffname ip/tcp.c 1999/0302
## diff -e /n/emeliedump/1998/1204/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/0302/sys/src/brazil/ip/tcp.c
2055a
	qunlock(tcp);
.
2052,2053c
			qunlock(s);
			freeblist(bp);
			return;
.
2046c
			qlock(s);
			qunlock(tcp);
.
2037a
	qlock(tcp);
.
1829c
	qunlock(s);
.
1813c
	qlock(s);
.
1778c
		qunlock(s);
.
1776c
		qlock(s);
.
1520c
	qunlock(s);
.
1517c
	qunlock(s);
.
1452c
				qunlock(s);
.
1311c
		qunlock(s);
.
1286c
		qunlock(s);
.
1236c
	qlock(s);
	qunlock(tcp);
.
1227a
		freeblist(bp);
.
1226c
		qunlock(tcp);
.
1190a
			qunlock(tcp);
.
1189a

.
1186a
			qunlock(tcp);
.
1177a
			qunlock(tcp);
.
1161a
	/* lock protocol while searching for a conversation */
	qlock(tcp);
.
914c
	qunlock(s);
.
899c
	qlock(s);
.
896c
		qunlock(s);
.
675c
		qunlock(s);
.
670c
		qlock(s);
.
452c
	qunlock(s);
.
448c
	qlock(s);
.
407c
		qunlock(s);
.
403c
		qlock(s);
.
379c
	qunlock(c);
.
372d
365d
360d
355d
346,347d
139d
135a
/*
 *  the qlock in the Conv locks this structure
 */
.
## diffname ip/tcp.c 1999/0320
## diff -e /n/emeliedump/1999/0302/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/0320/sys/src/brazil/ip/tcp.c
1775a
	s = v;
.
1774a
	Conv *s;
.
1772c
tcpkeepalive(void *v)
.
441a
	s = v;
.
440a
	Conv *s;
.
438c
tcpacktimer(void *v)
.
242,243c
void	tcpacktimer(void*);
void	tcpkeepalive(void*);
.
## diffname ip/tcp.c 1999/0327
## diff -e /n/emeliedump/1999/0320/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/0327/sys/src/brazil/ip/tcp.c
457c
	c->wq = qopen(2*QMAX, 0, 0, 0);
.
## diffname ip/tcp.c 1999/0401
## diff -e /n/emeliedump/1999/0327/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/0401/sys/src/brazil/ip/tcp.c
2103a
		tpriv->tstats.tcpRetransTimeouts,
.
2091c
	return snprint(buf, len, "%lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud %lud",
.
1862c
		tcprxmit(s, 1);
		tpriv->tstats.tcpRetransTimeouts++;
		tcb->snd.dupacks = 0;
.
1847a
	tpriv = s->p->priv;
.
1845a
	Tcppriv *tpriv;
.
1836,1837c
	if(dolock)
		qunlock(s);
.
1832a
	/*
	 *  pull window down to a single packet
	 */
.
1828,1831c
//	win = (tcb->cwind<tcb->snd.wnd)?tcb->cwind:tcb->snd.wnd/ tcb->mss;
//	win = win/2;
//	if ( win < 2 )
//		win = 2;
//	tcb->ssthresh = win * tcb->mss;
 	tcb->ssthresh = tcb->mss;
.
1825,1826c
	/*
	 *  We should be halving the slow start thershhold (down to one
	 *  mss) but leaving it at mss seems to work well enough
.
1821c
	if(dolock)
		qlock(s);
.
1818d
1816d
1813c
tcprxmit(Conv *s, int dolock)
.
1715,1716c
			/*  If round trip timer isn't running, start it.
			 *  measure the longest packet only in case the
			 *  transmission time dominates RTT
			 */
			if(tcb->rtt_timer.state != TimerON)
			if(ssize == tcb->mss) {
.
1677a
			tpriv->tstats.tcpRetransSegs++;
.
1673c
		if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
.
1617a
		/* avoid sending short packets unless... */
		if(dsize != 0) {
			/* ...we have a full segment */
			if(dsize != tcb->mss)
			/* ...the data was just queued */
			if((dsize + sent) != sndcnt)
			/* ...we're being forced */
			if(!(tcb->flags&FORCE))
			/* ...we have at least half a window's worth to send */
			if(dsize < tcb->snd.wnd/2 || tcb->snd.wnd == 0)
				return;
		}

.
1604,1605d
1590,1602d
1565d
1542c
	int sndcnt, n;
.
1087,1092c
done:
.
1044c

	/* avoid slow start and timers for SYN acks */
	if((tcb->flags & SYNACK) == 0) {
		tcb->flags |= SYNACK;
		acked--;
		tcb->sndcnt--;
		goto done;
	}

	/* slow start as long as we're not recovering from lost packets */
	if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
.
1039,1040c
        /*
	 *  any positive ack turns off fast rxt,
         *  (should we do new-reno on partial acks?)
	 */
	if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
		tcb->snd.dupacks = 0;
		tcb->snd.recovery = 0;
	} else {
//		print("rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
	}
.
1025a
	/* added by Dong for fast retransmission */
	if( seg->ack == tcb->snd.una &&
	    seg->len == 0 && seg->wnd == tcb->snd.wnd ) {

		/* this is a pure ack w/o window update */
//		print("dupack %lud ack %lud sndwnd %d advwin %d\n",
//		tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);

		if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
			/*
			 *  tahoe tcp rxt the packet, half sshthresh,
 			 *  and set cwnd to one packet
			 */
			tcb->snd.recovery = 1;
			tcb->snd.rxt = tcb->snd.nxt;
//			print("fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
			tcprxmit(s, 0);
		} else {
			/* do reno tcp here. */
		}
	}

.
985c
	return (int)(x-y) >= 0;
.
979c
	return (int)(x-y) > 0;
.
973c
	return (int)(x-y) <= 0;
.
967c
	return (int)(x-y) < 0;
.
781a
	tcph->len = nhgets(h->length) - (hdrlen + TCP_PKT);
.
617,630c
	tcb->mss = tcb->cwind = tcpmtu(s);
.
615d
600d
591d
588,589d
577a
/* mtu (- TCP + IP hdr len) of 1st hop */
int
tcpmtu(Conv *s)
{
	Ipifc *ifc;
	int mtu;

	mtu = 0;
	ifc = findipifc(s->p->f, s->raddr, 0);
	if(ifc != nil)
		mtu = ifc->maxmtu - ifc->m->hsize - (TCP_PKT + TCP_HDRSIZE);
	if(mtu < 4)
		mtu = DEF_MSS;
	return mtu;
}

.
243a
void    tcprxmit(Conv*, int);
.
205a
        ulong   tcpRetransTimeouts;
.
152a

                /* to implement tahoe and reno TCP */
                ulong dupacks;          /* number of duplicate acks rcvd */
	        int   recovery;         /* loss recovery flag */
	        ulong rxt;              /* right window marker for recovery */
.
124a
        ushort  len;  /* size of data */
.
50d
44a
	TCPREXMTTHRESH  = 3,            /* dupack threshhold for rxt */

.
39,40c
	DEF_MSS		= 1460,		/* Default mean segment */
	DEF_RTT		= 1000,		/* Default round trip */
.
## diffname ip/tcp.c 1999/0529
## diff -e /n/emeliedump/1999/0401/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/0529/sys/src/brazil/ip/tcp.c
1909,1910c
			maxback = MAXBACKMS;
		back = backoff(tcb->backoff) * (tcb->mdev + (tcb->srtt>>LOGAGAIN));
		if(back >= maxback) {
.
1907c
			maxback = MAXBACKMS/2;
.
1895c
	int back, maxback;
.
1851c
	tcpsetkacounter(tcb);
.
1818a
tcpsetkacounter(Tcpctl *tcb)
{
	tcb->kacounter = MAXBACKMS / (tcb->katimer.start*MSPTICK);;
	if(tcb->kacounter < 3)
		tcb->kacounter = 3;
}
void
.
1414c
				tcpsetkacounter(tcb);
.
1288c
		tcb->kacounter = MAXBACKMS / (tcb->katimer.start*MSPTICK);
	if(tcb->kacounter < 3)
		tcb->kacounter = 3;
.
251a
void	tcpsetkacounter(Tcpctl*);
.
41c
	DEF_KAT		= 10000,	/* Default time ms) between keep alives */
.
34c
	MAXBACKMS	= 30000,	/* longest backoff time (ms) before hangup */
.
## diffname ip/tcp.c 1999/0607
## diff -e /n/emeliedump/1999/0529/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/0607/sys/src/brazil/ip/tcp.c
1920,1921c
		tcb->backedoff += tcb->timer.start * MSPTICK;
		if(tcb->backedoff >= maxback) {
.
1905c
	int maxback;
.
1152a
	tcb->backedoff = 0;
.
1114a
			tcb->backedoff = 0;
.
177a
	int	backedoff;		/* ms we've backed off for rexmits */
.
## diffname ip/tcp.c 1999/0623
## diff -e /n/emeliedump/1999/0607/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/0623/sys/src/brazil/ip/tcp.c
41c
	DEF_KAT		= 30000,	/* Default time ms) between keep alives */
.
## diffname ip/tcp.c 1999/0810
## diff -e /n/emeliedump/1999/0623/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/0810/sys/src/brazil/ip/tcp.c
2003a
	if(i > 100)
		print("very long tcp resequence queue: %d\n", length);
.
1995,1996c
		length += rp1->length;
.
1993a
	length = 0;
.
1974c
	int i, x;
.
## diffname ip/tcp.c 1999/0811
## diff -e /n/emeliedump/1999/0810/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/0811/sys/src/brazil/ip/tcp.c
1974c
	int i;
.
## diffname ip/tcp.c 1999/0817
## diff -e /n/emeliedump/1999/0811/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/0817/sys/src/brazil/ip/tcp.c
1817c
	ipoput(s->p->f, hbp, 0, s->ttl, s->tos);
.
1776c
		ipoput(f, hbp, 0, s->ttl, s->tos);
.
918c
		ipoput(s->p->f, hbp, 0, s->ttl, s->tos);
.
888c
	ipoput(tcp->f, hbp, 0, MAXTTL, DFLTTOS);
.
## diffname ip/tcp.c 1999/0825
## diff -e /n/emeliedump/1999/0817/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/0825/sys/src/brazil/ip/tcp.c
813c
				tcph->mss = nhgets(optr+2);
.
811c
			optlen = optr[i+1];
.
804c
		switch(optr[i]) {
		default:
			i += optr[i+1];
			break;
.
## diffname ip/tcp.c 1999/0827
## diff -e /n/emeliedump/1999/0825/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/0827/sys/src/brazil/ip/tcp.c
819a
		n -= optlen;
		optr += optlen;
.
817d
814d
808,812c
if(0) print("tcpopt %d %d\n", *optr, optlen);
		switch(*optr) {
.
803,806c
	n = hdrlen - TCP_HDRSIZE;
	while(n > 0 && *optr != EOLOPT) {
		if(*optr == NOOPOPT) {
			n--;
			optr++;
			continue;
		}
		optlen = optr[1];
		if(optlen < 2 || optlen > n)
.
769c
	ushort optlen;
	int n;
.
## diffname ip/tcp.c 1999/1006
## diff -e /n/emeliedump/1999/0827/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/1006/sys/src/brazil/ip/tcp.c
2011a
		for(rp1 = tcb->reseq, i = 0; i < 10 && rp1 != nil; rp1 = rp1->next, i++)
			print("0x%lux 0x%lux 0x%ux", rp1->seg.seq, rp1->seg.ack,
				rp1->seg.flags);
		return -1;
	}
	return 0;
.
2010c
	if(i > 100 && once++ == 0){
.
1997c
		return 0;
.
1985c
		return 0;
.
1980a
	static int once;
.
1976c
int
.
1389c
		if(addreseq(tcb, &seg, bp, length) < 0)
			print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
.
240c
int	addreseq(Tcpctl*, Tcp*, Block*, ushort);
.
## diffname ip/tcp.c 1999/1224
## diff -e /n/emeliedump/1999/1006/sys/src/brazil/ip/tcp.c /n/emeliedump/1999/1224/sys/src/9/ip/tcp.c
2015c
			print("0x%lux 0x%lux 0x%ux\n", rp1->seg.seq, rp1->seg.ack,
.
## diffname ip/tcp.c 2000/0101
## diff -e /n/emeliedump/1999/1224/sys/src/9/ip/tcp.c /n/emeliedump/2000/0101/sys/src/9/ip/tcp.c
2012c
	if(length > QMAX && once++ == 0){
.
540,542c
	timerstate(priv, t, TimerOFF);
.
522,529c
	timerstate(priv, t, TimerON);
.
495,496c
					timerstate(priv, t, TimerDONE);
.
470a
static void
timerstate(Tcppriv *priv, Timer *t, int newstate)
{
	if(newstate != TimerON){
		if(t->state == TimerON){
			// unchain
			if(priv->timers == t){
				priv->timers = t->next;
				if(t->prev != nil)
					panic("timerstate1");
			}
			if(t->next)
				t->next->prev = t->prev;
			if(t->prev)
				t->prev->next = t->next;
			t->next = t->prev = nil;
		}
	} else {
		if(t->state != TimerON){
			// chain
			if(t->prev != nil || t->next != nil)
				panic("timerstate2");
			t->prev = nil;
			t->next = priv->timers;
			if(t->next)
				t->next->prev = t;
			priv->timers = t;
		}
	}
	t->state = newstate;
}

.
421,432d
## diffname ip/tcp.c 2000/0102
## diff -e /n/emeliedump/2000/0101/sys/src/9/ip/tcp.c /n/emeliedump/2000/0102/sys/src/9/ip/tcp.c
1952a
	qunlock(s);
	poperror();
.
1951a
	case Closed:
		break;
.
1945c
		tcprxmit(s);
.
1932c
	if(waserror()){
		qunlock(s);
		nexterror();
	}
	qlock(s);
.
1915,1917d
1894,1895d
1888c
tcprxmit(Conv *s)
.
1863a
	qlock(s);
	if(tcb->state != Closed){
		if(--(tcb->kacounter) <= 0) {
			localclose(s, Etimedout);
		} else {
			tcpsendka(s);
			tcpgo(s->p->priv, &tcb->katimer);
		}
	}
	qunlock(s);
	poperror();
.
1862c
		nexterror();
.
1856,1860c
	if(waserror()){
.
1592a
	poperror();
.
1589a
	poperror();
.
1524a
				poperror();
.
1382a
		poperror();
.
1357a
		poperror();
.
1303a
	if(waserror()){
		qunlock(s);
		nexterror();
	}
.
1068c
			tcprxmit(s);
.
692a
		poperror();
.
687a
		if(waserror()){
			qunlock(s);
			nexterror();
		}
.
595d
585a
	tcb->reseq = nil;
.
449a
	poperror();
.
446,448c
	if(tcb->state != Closed){
		tcb->flags |= FORCE;
		tcprcvwin(s);
		tcpoutput(s);
	}
.
444a
	if(waserror()){
		qunlock(s);
		nexterror();
	}
.
417a
		qunlock(s);
.
416a
		qlock(s);
.
414a
		poperror();
.
409a
		if(waserror()){
			qunlock(s);
			nexterror();
		}
.
254c
void    tcprxmit(Conv*);
.
## diffname ip/tcp.c 2000/0424
## diff -e /n/emeliedump/2000/0102/sys/src/9/ip/tcp.c /n/emeliedump/2000/0424/sys/src/9/ip/tcp.c
2260a
	tcp->gc = tcpgc;
.
2240a
/*
 *  garbage collect any stale conversations:
 *	- SYN received but no SYN-ACK after 5 seconds (could be the SYN attack)
 *	- Finwait2 after 5 minutes
 *
 *  this is called whenever we run out of channels.  Both checks are
 *  of questionable validity so we try to use them only when we're
 *  up against the wall.
 */
int
tcpgc(Proto *tcp)
{
	Conv *c, **pp, **ep;
	int n;
	Tcpctl *tcb;


	n = 0;
	ep = &tcp->conv[tcp->nc];
	for(pp = tcp->conv; pp < ep; pp++) {
		c = *pp;
		if(c == nil)
			break;
		if(!canqlock(c))
			continue;
		tcb = (Tcpctl*)c->ptcl;
		switch(tcb->state){
		case Syn_received:
			if(msec - tcb->time > 5000){
				localclose(c, "timed out");
				n++;
			}
			break;
		case Finwait2:
			if(msec - tcb->time > 5*60*1000){
				localclose(c, "timed out");
				n++;
			}
			break;
		}
		qunlock(c);
	}
	return n;
}

.
1462a
				tcb->time = msec;
.
1369a
			}
.
1368c
			else {
				tcb->time = msec;
.
1342a
			tcb->time = msec;
.
190a
	ulong	time;			/* time Finwait2 or Syn_received was sent */
.
126c
        ushort  len;	/* size of data */
.
## diffname ip/tcp.c 2000/0706
## diff -e /n/emeliedump/2000/0424/sys/src/9/ip/tcp.c /n/emeliedump/2000/0706/sys/src/9/ip/tcp.c
2315c
	tpriv->stats[MaxConn] = Nchans;
.
2224,2243c
	priv = tcp->priv;
	p = buf;
	e = p+len;
	for(i = 0; i < Nstats; i++)
		p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
	return p - buf;
.
2222c
	Tcppriv *priv;
	char *p, *e;
	int i;
.
1985c
		tpriv->stats[RetransTimeouts]++;
.
1823c
		tpriv->stats[OutSegs]++;
.
1774c
			tpriv->stats[RetransSegs]++;
.
1441c
				tpriv->stats[EstabResets]++;
.
1427c
		tpriv->stats[OutOfOrder]++;
.
1238c
		tpriv->stats[LenErrs]++;
		tpriv->stats[InErrs]++;
.
1229c
		tpriv->stats[HlenErrs]++;
		tpriv->stats[InErrs]++;
.
1221c
		tpriv->stats[CsumErrs]++;
		tpriv->stats[InErrs]++;
.
1210c
	tpriv->stats[InSegs]++;
.
898c
	tpriv->stats[OutRsts]++;
.
700c
		tpriv->stats[ActiveOpens]++;
.
694c
		tpriv->stats[PassiveOpens]++;
.
275c
		tpriv->stats[CurrEstab]++;
.
273c
		tpriv->stats[CurrEstab]--;
.
230,235d
227,228c
	ulong	stats[Nstats];
.
203,217c
[MaxConn]	"MaxConn",
[ActiveOpens]	"ActiveOpens",
[PassiveOpens]	"PassiveOpens",
[EstabResets]	"EstabResets",
[CurrEstab]	"CurrEstab",
[InSegs]	"InSegs",
[OutSegs]	"OutSegs",
[RetransSegs]	"RetransSegs",
[RetransTimeouts]	"RetransTimeouts",
[InErrs]	"InErrs",
[OutRsts]	"OutRsts",
[CsumErrs]	"CsumErrs",
[HlenErrs]	"HlenErrs",
[LenErrs]	"LenErrs",
[OutOfOrder]	"OutOfOrder",
.
199,201c
enum {
	/* MIB stats */
	MaxConn,
	ActiveOpens,
	PassiveOpens,
	EstabResets,
	CurrEstab,
	InSegs,
	OutSegs,
	RetransSegs,
	RetransTimeouts,
	InErrs,
	OutRsts,

	/* non-MIB stats */
	CsumErrs,
	HlenErrs,
	LenErrs,
	OutOfOrder,

	Nstats
};

static char *statnames[] =
.
## diffname ip/tcp.c 2000/0914
## diff -e /n/emeliedump/2000/0706/sys/src/9/ip/tcp.c /n/emeliedump/2000/0914/sys/src/9/ip/tcp.c
2232a
	if(n >= 1 && strcmp(f[0], "checksum") == 0)
		return tcpsetchecksum(c, f, n);
.
1943a
/*
 *  turn checksums on/off
 */
char*
tcpsetchecksum(Conv *s, char **f, int)
{
	Tcpctl *tcb;

	tcb = (Tcpctl*)s->ptcl;
	tcb->nochecksum = !atoi(f[1]);

	return nil;
}

.
1878c
	hbp = htontcp(&seg, dbp, &tcb->protohdr, tcb);
.
1803c
		hbp = htontcp(&seg, bp, &tcb->protohdr, tcb);
.
1234c
	if((h->tcpcksum[0] || h->tcpcksum[0]) && 
	    ptclcsum(bp, TCP_IPLEN, length-TCP_IPLEN)) {
.
967c
		hbp = htontcp(&seg, nil, &tcb->protohdr, tcb);
.
934c
	hbp = htontcp(seg, nil, &ph, nil);
.
796,797c
	if(tcb != nil && tcb->nochecksum){
		h->tcpcksum[0] = h->tcpcksum[1] = 0;
	} else {
		csum = ptclcsum(data, TCP_IPLEN, hdrlen+dlen+TCP_PHDRSIZE);
		hnputs(h->tcpcksum, csum);
	}
.
754c
htontcp(Tcp *tcph, Block *data, Tcphdr *ph, Tcpctl *tcb)
.
191a
	int	nochecksum;		/* non-zero means don't send checksums */ 
.
## diffname ip/tcp.c 2000/1012
## diff -e /n/emeliedump/2000/0914/sys/src/9/ip/tcp.c /n/emeliedump/2000/1012/sys/src/9/ip/tcp.c
1822a
			/*  allow for slow initial response
			 *  ([email protected])
			 */
			if(tcb->state == Syn_sent && x < 500/MSPTICK)
				x = 500/MSPTICK;

.
## diffname ip/tcp.c 2000/1219
## diff -e /n/emeliedump/2000/1012/sys/src/9/ip/tcp.c /n/emeliedump/2000/1219/sys/src/9/ip/tcp.c
729a
	qunlock(s);
	poperror();
.
726,727d
716,721d
702a
	qlock(s);
	/* Send SYN, go into SYN_SENT state */
	if(waserror()){
		qunlock(s);
		nexterror();
	}

.
363a
/*
 *  tcpclose is always called with the q locked
 */
.
## diffname ip/tcp.c 2000/1220
## diff -e /n/emeliedump/2000/1219/sys/src/9/ip/tcp.c /n/emeliedump/2000/1220/sys/src/9/ip/tcp.c
732,733d
708,712d
706d
684a
/*
 *  called with s qlocked
 */
.
444a

	qunlock(s);
	poperror();
.
443c
		break;
.
441d
437,438d
429,433d
414a
	if(waserror()){
		qunlock(s);
		nexterror();
	}
	qlock(s);
.
405d
## diffname ip/tcp.c 2000/1221
## diff -e /n/emeliedump/2000/1220/sys/src/9/ip/tcp.c /n/emeliedump/2000/1221/sys/src/9/ip/tcp.c
1853a
		qlock(s);
		poperror();
.
1852a
		qunlock(s);
		if(waserror()){
			qlock(s);
			nexterror();
		}
.
1685a
		tcb = (Tcpctl*)s->ptcl;
	
		switch(tcb->state) {
		case Listen:
		case Closed:
		case Finwait2:
			return;
		}
	
		/* force an ack when a window has opened up */
		if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
			tcb->rcv.blocked = 0;
			tcb->flags |= FORCE;
		}
	
.
1670,1684d
1652c
 *  always enters and exits with the s locked.  We drop
 *  the lock to ipoput the packet so some care has to be
 *  taken by callers.
.
## diffname ip/tcp.c 2001/0117
## diff -e /n/emeliedump/2000/1221/sys/src/9/ip/tcp.c /n/emeliedump/2001/0117/sys/src/9/ip/tcp.c
337a
		s->cwind, s->snd.wnd,
.
336c
		"%s srtt %d mdev %d cwin %d swin %d timer.start %d timer.count %d\n",
.
## diffname ip/tcp.c 2001/0118
## diff -e /n/emeliedump/2001/0117/sys/src/9/ip/tcp.c /n/emeliedump/2001/0118/sys/src/9/ip/tcp.c
2034a
		netlog(s->p->f, Logtcp, "timeout rexmit 0x%lux\n", tcb->snd.una);
.
1992,1996d
1785,1786c
/*			netlog(f, Logtcp, "qcopy: dlen %d blen %d sndcnt %d qlen %d sent %d rp[0] %d\n",
				dsize, BLEN(bp), sndcnt, qlen(s->wq), sent, bp->rp[0]); */
.
1713a
		if(ssize && usable < 2)
			netlog(s->p->f, Logtcp, "throttled snd.wnd 0x%ux cwind 0x%ux\n",
				tcb->snd.wnd, tcb->cwind);
.
1566c

				/*
				 *  turn on the acktimer if there's something
				 *  to ack
				 */
.
1563,1564c
				if(tcb->rcv.nxt - tcb->last_ack >= 2*tcb->mss)
.
1560,1561c
				 *  force an ack if we've got 2 segs since we
				 *  last acked.
.
1556a

				/*
				 *  update our rcv window
				 */
.
1132,1134c
	} else
		netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
.
1113c
	if(seq_ge(seg->seq,tcb->snd.wl1)) {

		/* a closed window opened, start retransmitting.  why? - presotto */
.
1111a
	/*
	 *  update our send window if this is a new ack (ignore old packets
	 *  even if the ack is new)
	 */
.
1105c
			netlog(s->p->f, Logtcpmsg, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
.
1095,1096c
		netlog(s->p->f, Logtcpmsg, "dupack %lud ack %lud sndwnd %d advwin %d\n",
			tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
.
1084a
	/* if everything has been acked, force output(?) */
.
338c
		s->cwind, s->snd.wnd, s->rcv.wnd,
.
336c
		"%s srtt %d mdev %d cwin %d swin %d rwin %d timer.start %d timer.count %d\n",
.
## diffname ip/tcp.c 2001/0119
## diff -e /n/emeliedump/2001/0118/sys/src/9/ip/tcp.c /n/emeliedump/2001/0119/sys/src/9/ip/tcp.c
2078d
1741,1753d
1128a

.
1125d
1117,1123c
	if( seq_gt(seg->ack, tcb->snd.wl2)
	||  (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
.
1114,1115c
	 *  update window
.
710c
	inittcpctl(s, mode);
.
671a
	if(mode != TCP_LISTEN)
.
649c
inittcpctl(Conv *s, int mode)
.
192c
	int	nochecksum;		/* non-zero means don't send checksums */
.
153d
## diffname ip/tcp.c 2001/0127
## diff -e /n/emeliedump/2001/0119/sys/src/9/ip/tcp.c /n/emeliedump/2001/0127/sys/src/9/ip/tcp.c
2200,2202c
		*bp = trimblock(*bp, 0, *length);
		if(*bp == nil)
			panic("presotto is a boofhead");
.
2140d
773c
		data = allocb(hdrlen + TCP_PKT + 64);	/* the 64 pad is to meet mintu's */
.
487c
	c->rq = qopen(QMAX, -1, tcpacktimer, c);
.
338c
		s->timer.start, s->timer.count, s->rerecv);
.
335c
		"%s srtt %d mdev %d cwin %d swin %d rwin %d timer.start %d timer.count %d rerecv %d\n",
.
## diffname ip/tcp.c 2001/0301
## diff -e /n/emeliedump/2001/0127/sys/src/9/ip/tcp.c /n/emeliedump/2001/0301/sys/src/9/ip/tcp.c
1298,1338c
		s = tcpincoming(s, &seg, source, dest);
		if(s == nil)
			goto reset;
.
1296a
		if((seg.flags & SYN) == 0 || (seg.flags & ACK) != 0)
			goto reset;
.
1278,1291c

	/* if it's a listener, look for the right flags and get a new conv */
	tcb = (Tcpctl*)s->ptcl;
	if(tcb->state == Listen){
.
1268,1276c
	/* Look for a matching conversation */
	s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
	if(s == nil){
reset:
		qunlock(tcp);
		sndrst(tcp, source, dest, length, &seg);
		freeblist(bp);
		return;
.
1234d
1231a
	length = nhgets(h->length);
.
1221c
	Conv *s;
.
1010a
	tpriv = new->p->priv;
	iphtadd(&tpriv->ht, new);

.
985a
	Tcppriv *tpriv;
.
713a
	iphtadd(&tpriv->ht, s);
.
606a
	iphtrem(&tpriv->ht, s);

.
247a
	/* hash table for matching conversations */
	Ipht	ht;

.
## diffname ip/tcp.c 2001/0306
## diff -e /n/emeliedump/2001/0301/sys/src/9/ip/tcp.c /n/emeliedump/2001/0306/sys/src/9/ip/tcp.c
1659c
		sndcnt = qlen(s->wq)+tcb->flgcnt;
.
1619c
	tcpkick(s);
.
1563c
				if(qlen(s->wq)+tcb->flgcnt == 0) {
.
1474c
			if(qlen(s->wq)+tcb->flgcnt == 0) {
.
1463c
			if(qlen(s->wq)+tcb->flgcnt == 0) {
.
1448c
			if(qlen(s->wq)+tcb->flgcnt == 0){
.
1383c
		if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
.
1205d
1203c
	if(qdiscard(s->wq, acked) < acked)
		tcb->flgcnt--;
.
1153c
		tcb->flgcnt--;
.
891c
	tcb->flgcnt++;
.
436d
411c
tcpkick(Conv *s)
.
402c
		tcb->flgcnt++;
.
396c
		tcb->flgcnt++;
.
191a
	int	flgcnt;			/* 1 when we're waiting for a SYN/FIN ACK */
.
179d
## diffname ip/tcp.c 2001/0308
## diff -e /n/emeliedump/2001/0306/sys/src/9/ip/tcp.c /n/emeliedump/2001/0308/sys/src/9/ip/tcp.c
1133a
	}
.
1132c
	if(!seq_gt(seg->ack, tcb->snd.una)){
		/*
		 *  don't let us hangup if sending into a closed window and
		 *  we're still getting acks
		 */
		if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
			tcb->backedoff = MAXBACKMS/4;
		}
.
## diffname ip/tcp.c 2001/0316
## diff -e /n/emeliedump/2001/0308/sys/src/9/ip/tcp.c /n/emeliedump/2001/0316/sys/src/9/ip/tcp.c
1790,1795d
662a
	/* setup timers */
.
661a
	tcb->srtt = tcp_irtt<<LOGAGAIN;
	tcb->mdev = 0;
.
40c
	DEF_RTT		= 500,		/* Default round trip */
.
## diffname ip/tcp.c 2001/0317
## diff -e /n/emeliedump/2001/0316/sys/src/9/ip/tcp.c /n/emeliedump/2001/0317/sys/src/9/ip/tcp.c
379a
	qflush(c->rq);
.
## diffname ip/tcp.c 2001/0323
## diff -e /n/emeliedump/2001/0317/sys/src/9/ip/tcp.c /n/emeliedump/2001/0323/sys/src/9/ip/tcp.c
2218c
/* called with c qlocked */
.
985d
969,970d
966,967c
	if(waserror())
.
955a
 *  called with s qlocked
.
## diffname ip/tcp.c 2001/0504
## diff -e /n/emeliedump/2001/0323/sys/src/9/ip/tcp.c /n/emeliedump/2001/0504/sys/src/9/ip/tcp.c
563a
				poperror();
			}
.
562c
			if(t->state == TimerDONE && t->func != nil && !waserror()){
.
## diffname ip/tcp.c 2001/0505
## diff -e /n/emeliedump/2001/0504/sys/src/9/ip/tcp.c /n/emeliedump/2001/0505/sys/src/9/ip/tcp.c
1258c
	if((h->tcpcksum[0] || h->tcpcksum[1]) && 
.
## diffname ip/tcp.c 2001/0527
## diff -e /n/emeliedump/2001/0505/sys/src/9/ip/tcp.c /n/emeliedump/2001/0527/sys/src/9/ip/tcp.c
703c
	char kpname[KNAMELEN];
.
## diffname ip/tcp.c 2001/0530
## diff -e /n/emeliedump/2001/0527/sys/src/9/ip/tcp.c /n/emeliedump/2001/0530/sys/src/9/ip/tcp.c
2287a
}

void
tcpsettimer(Tcpctl *tcb)
{
	int x;

	/* round trip depenency */
	x = backoff(tcb->backoff) *
	    (tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;

	/* take into account delayed ack */
	if((tcb->snd.ptr - tcb->snd.una) <= 2*tcb->mss)
		x += TCP_ACK/MSPTICK;

	/* sanity check */
	if(x > (10000/MSPTICK))
		x = 10000/MSPTICK;
	tcb->timer.start = x;
.
1993a
		tcpsettimer(tcb);
.
1790,1802d
1640d
1208a
			tcpsettimer(tcb);
.
272a
void	tcpsettimer(Tcpctl*);
.
23c
	TCP_ACK		= 200,		/* Timed ack sequence in ms */
.
## diffname ip/tcp.c 2001/0531
## diff -e /n/emeliedump/2001/0530/sys/src/9/ip/tcp.c /n/emeliedump/2001/0531/sys/src/9/ip/tcp.c
1768a
			netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
				s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
.
1105,1106c
	if(seg->ack == tcb->snd.una
	&& tcb->snd.una != tcb->snd.nxt
	&& seg->len == 0
	&& seg->wnd == tcb->snd.wnd) {
.
## diffname ip/tcp.c 2001/0623
## diff -e /n/emeliedump/2001/0531/sys/src/9/ip/tcp.c /n/emeliedump/2001/0623/sys/src/9/ip/tcp.c
1237c
tcpiput(Proto *tcp, Ipifc*, Block *bp)
.
262c
void	tcpiput(Proto*, Ipifc*, Block*);
.
## diffname ip/tcp.c 2001/0922
## diff -e /n/emeliedump/2001/0623/sys/src/9/ip/tcp.c /n/emeliedump/2001/0922/sys/src/9/ip/tcp.c
2326c
	tpriv->stats[MaxConn] = tcp->nc;
.
2324c
	tcp->nc = scalednconv();
.
## diffname ip/tcp.c 2001/1117
## diff -e /n/emeliedump/2001/0922/sys/src/9/ip/tcp.c /n/emeliedump/2001/1117/sys/src/9/ip/tcp.c
2201,2203c
		}
	} else {
		for(p = tcp->conv; *p; p++) {
			s = *p;
			tcb = (Tcpctl*)s->ptcl;
			if(s->rport == pdest)
			if(s->lport == psource)
			if(tcb->state != Closed)
			if(ipcmp(s->raddr, dest) == 0)
			if(ipcmp(s->laddr, source) == 0){
				qlock(s);
				qunlock(tcp);
				switch(tcb->state){
				case Syn_sent:
					localclose(s, msg);
					break;
				}
				qunlock(s);
				freeblist(bp);
				return;
			}
.
2186,2199c
	if(strcmp(msg, "unfragmentable") == 0){
		for(p = tcp->conv; *p; p++) {
			s = *p;
			tcb = (Tcpctl*)s->ptcl;
			if(tcb->state != Closed)
			if(ipcmp(s->raddr, dest) == 0)
			if(ipcmp(s->laddr, source) == 0){
				qlock(s);
				qunlock(tcp);
				switch(tcb->state){
				case Syn_sent:
					localclose(s, msg);
					break;
				}
				qunlock(s);
				freeblist(bp);
				return;
.
652c
	return restrict_mtu(s->raddr, mtu);
.
## diffname ip/tcp.c 2002/0405
## diff -e /n/emeliedump/2001/1117/sys/src/9/ip/tcp.c /n/emeliedump/2002/0405/sys/src/9/ip/tcp.c
1802c
			if(tcb->rtt_timer.state != TcptimerON)
.
1795c
			if(tcb->timer.state != TcptimerON)
.
1551c
				if(tcb->acktimer.state != TcptimerON)
.
1494c
			if(tcb->timer.state != TcptimerON)
.
1189c
	if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
.
1011c
	tcb->rtt_timer.state = TcptimerOFF;
.
1009c
	tcb->katimer.state = TcptimerOFF;
.
1007c
	tcb->acktimer.state = TcptimerOFF;
.
1005c
	tcb->timer.state = TcptimerOFF;
.
590c
	timerstate(priv, t, TcptimerOFF);
.
584c
tcphalt(Tcppriv *priv, Tcptimer *t)
.
579c
	timerstate(priv, t, TcptimerON);
.
572c
tcpgo(Tcppriv *priv, Tcptimer *t)
.
563c
			if(t->state == TcptimerDONE && t->func != nil && !waserror()){
.
551c
					timerstate(priv, t, TcptimerDONE);
.
548c
 			if(t->state == TcptimerON) {
.
530c
	Tcptimer *t, *tp, *timeo;
.
513c
		if(t->state != TcptimerON){
.
498,499c
	if(newstate != TcptimerON){
		if(t->state == TcptimerON){
.
496c
timerstate(Tcppriv *priv, Tcptimer *t, int newstate)
.
244c
	Tcptimer 	*timers;		/* List of active timers */
.
180,183c
	Tcptimer	timer;			/* Activity timer */
	Tcptimer	acktimer;		/* Acknowledge timer */
	Tcptimer	rtt_timer;		/* Round trip timer */
	Tcptimer	katimer;		/* keep alive timer */
.
79,81c
	Tcptimer	*next;
	Tcptimer	*prev;
	Tcptimer	*readynext;
.
76,77c
typedef struct Tcptimer Tcptimer;
struct Tcptimer
.
19,21c
	TcptimerOFF	= 0,
	TcptimerON		= 1,
	TcptimerDONE	= 2,
.
## diffname ip/tcp.c 2002/0507
## diff -e /n/emeliedump/2002/0405/sys/src/9/ip/tcp.c /n/emeliedump/2002/0507/sys/src/9/ip/tcp.c
2204,2224c
			qunlock(s);
			freeblist(bp);
			return;
.
2186,2202c
	for(p = tcp->conv; *p; p++) {
		s = *p;
		tcb = (Tcpctl*)s->ptcl;
		if(s->rport == pdest)
		if(s->lport == psource)
		if(tcb->state != Closed)
		if(ipcmp(s->raddr, dest) == 0)
		if(ipcmp(s->laddr, source) == 0){
			qlock(s);
			qunlock(tcp);
			switch(tcb->state){
			case Syn_sent:
				localclose(s, msg);
				break;
.
2179,2182c
	if((h4->vihl&0xF0)==IP_VER4) {
		v4tov6(dest, h4->tcpdst);
		v4tov6(source, h4->tcpsrc);
		psource = nhgets(h4->tcpsport);
		pdest = nhgets(h4->tcpdport);
	} 
	else {
		ipmove(dest, h6->tcpdst);
		ipmove(source, h6->tcpsrc);
		psource = nhgets(h6->tcpsport);
		pdest = nhgets(h6->tcpdport);
	}
.
2177c
	h4 = (Tcp4hdr*)(bp->rp);
	h6 = (Tcp6hdr*)(bp->rp);
.
2170c
	Tcp4hdr *h4;
	Tcp6hdr *h6;
.
2147c
			if(seg->urg > dupcnt)
.
2136c
			if(seg->urg > 1)
.
1859,1860c
	else {
		/* Build header, link data and compute cksum */
		tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
		hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb);
		if(hbp == nil) {
			freeblist(dbp);
			return;
		}
		ipoput6(s->p->f, hbp, 0, s->ttl, s->tos);
	}
.
1853,1857c
	if(isv4(s->raddr)) {
		/* Build header, link data and compute cksum */
		tcb->protohdr.tcp4hdr.vihl = IP_VER4;
		hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb);
		if(hbp == nil) {
			freeblist(dbp);
			return;
		}
		ipoput4(s->p->f, hbp, 0, s->ttl, s->tos);
.
1835d
1817c
		if(version == 4)
			ipoput4(f, hbp, 0, s->ttl, s->tos);
		else
			ipoput6(f, hbp, 0, s->ttl, s->tos);
.
1789a
		else {
			tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
			hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb);
			if(hbp == nil) {
				freeblist(bp);
				return;
			}
		}
.
1785,1788c
		if(version == 4) {
			tcb->protohdr.tcp4hdr.vihl = IP_VER4;
			hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb);
			if(hbp == nil) {
				freeblist(bp);
				return;
			}
.
1758,1759d
1684c
				if((tcb->flags&FORCE) == 0)
.
1652a
	if( (memcmp(s->raddr, v4prefix, IPv4off) == 0 &&
		memcmp(s->laddr, v4prefix, IPv4off) == 0)
		|| ipcmp(s->raddr, IPnoaddr) == 0)
		version = 4;
	else
		version = 6;

.
1651a
	//int version = isv4(s->raddr) ? 4 : 6;
	int version;
.
1559c
				sndrst(tcp, source, dest, length, &seg, version);
.
1451c
				sndrst(tcp, source, dest, length, &seg, version);
.
1416c
		sndrst(tcp, source, dest, length, &seg, version);
.
1352c
				sndrst(tcp, source, dest, length, &seg, version);
.
1337c
		sndrst(tcp, source, dest, length, &seg, version);
.
1297c
		sndrst(tcp, source, dest, length, &seg, version);
.
1279,1286c
		version = 6;
		length = nhgets(h6->ploadlen);
		ipmove(dest, h6->tcpdst);
		ipmove(source, h6->tcpsrc);

		h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0;
		h6->ttl = proto;
		hnputl(h6->vcf, length);
		if((h6->tcpcksum[0] || h6->tcpcksum[1]) && 
			ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
			tpriv->stats[CsumErrs]++;
			tpriv->stats[InErrs]++;
			netlog(f, Logtcp, "bad tcp proto cksum\n");
			freeblist(bp);
			return;
		}
		h6->ttl = ttl;
		h6->proto = proto;
		hnputs(h6->ploadlen, length);

		hdrlen = ntohtcp6(&seg, &bp);
		if(hdrlen < 0){
			tpriv->stats[HlenErrs]++;
			tpriv->stats[InErrs]++;
			netlog(f, Logtcp, "bad tcp hdr len\n");
			return;
		}

		/* trim the packet to the size claimed by the datagram */
		length -= hdrlen;
		bp = trimblock(bp, hdrlen+TCP6_PKT, length);
		if(bp == nil){
			tpriv->stats[LenErrs]++;
			tpriv->stats[InErrs]++;
			netlog(f, Logtcp, "tcp len < 0 after trim\n");
			return;
		}
.
1277a
	else {
		int ttl = h6->ttl;
		int proto = h6->proto;
.
1271,1276c
		hdrlen = ntohtcp4(&seg, &bp);
		if(hdrlen < 0){
			tpriv->stats[HlenErrs]++;
			tpriv->stats[InErrs]++;
			netlog(f, Logtcp, "bad tcp hdr len\n");
			return;
		}

		/* trim the packet to the size claimed by the datagram */
		length -= hdrlen+TCP4_PKT;
		bp = trimblock(bp, hdrlen+TCP4_PKT, length);
		if(bp == nil){
			tpriv->stats[LenErrs]++;
			tpriv->stats[InErrs]++;
			netlog(f, Logtcp, "tcp len < 0 after trim\n");
			return;
		}
.
1260,1269c
		h4->Unused = 0;
		hnputs(h4->tcplen, length-TCP4_PKT);
		if((h4->tcpcksum[0] || h4->tcpcksum[1]) && 
			ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) {
			tpriv->stats[CsumErrs]++;
			tpriv->stats[InErrs]++;
			netlog(f, Logtcp, "bad tcp proto cksum\n");
			freeblist(bp);
			return;
		}
.
1256,1258c
	if((h4->vihl&0xF0)==IP_VER4) {
		version = 4;
		length = nhgets(h4->length);
		v4tov6(dest, h4->tcpdst);
		v4tov6(source, h4->tcpsrc);
.
1254c
	h4 = (Tcp4hdr*)(bp->rp);
	h6 = (Tcp6hdr*)(bp->rp);
.
1247a
	int version;
.
1240c
	Tcp4hdr *h4;
	Tcp6hdr *h6;
.
1198c
			if(tcb->srtt == 0) {
.
1013,1019c
	if(isv4(src)) {
		Tcp4hdr *h = &tcb->protohdr.tcp4hdr;
		memset(h, 0, sizeof(*h));
		h->proto = IP_TCPPROTO;
		hnputs(h->tcpsport, new->lport);
		hnputs(h->tcpdport, new->rport);
		v6tov4(h->tcpsrc, dst);
		v6tov4(h->tcpdst, src);
	}
	else {
		Tcp6hdr *h = &tcb->protohdr.tcp6hdr;
		memset(h, 0, sizeof(*h));
		h->proto = IP_TCPPROTO;
		hnputs(h->tcpsport, new->lport);
		hnputs(h->tcpdport, new->rport);
		ipmove(h->tcpsrc, dst);
		ipmove(h->tcpdst, src);
	}
.
994d
980,982c
		if(version == 4) {
			tcb->protohdr.tcp4hdr.vihl = IP_VER4;
			hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb);
			ipoput4(s->p->f, hbp, 0, s->ttl, s->tos);
		}
		else {
			tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
			hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb);
			ipoput6(s->p->f, hbp, 0, s->ttl, s->tos);
		}
.
972a
		int version = isv4(s->raddr) ? 4 : 6;
.
966d
950,954c
	if(version == 4) {
		hbp = htontcp4(seg, nil, &ph4, nil);
		if(hbp == nil)
			return;
		ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS);
	}
	else {
		hbp = htontcp6(seg, nil, &ph6, nil);
		if(hbp == nil)
			return;
		ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS);
	}
.
920,926c
	if(version == 4) {
		memset(&ph4, 0, sizeof(ph4));
		ph4.vihl = IP_VER4;
		v6tov4(ph4.tcpsrc, dest);
		v6tov4(ph4.tcpdst, source);
		ph4.proto = IP_TCPPROTO;
		hnputs(ph4.tcplen, TCP4_HDRSIZE);
		hnputs(ph4.tcpsport, seg->dest);
		hnputs(ph4.tcpdport, seg->source);
	}
	else {
		memset(&ph6, 0, sizeof(ph6));
		ph6.vcf[0] = IP_VER6;
		ipmove(ph6.tcpsrc, dest);
		ipmove(ph6.tcpdst, source);
		ph6.proto = IP_TCPPROTO;
		hnputs(ph6.ploadlen, TCP6_HDRSIZE);
		hnputs(ph6.tcpsport, seg->dest);
		hnputs(ph6.tcpdport, seg->source);
	}
.
912a
	Tcp4hdr ph4;
	Tcp6hdr ph6;
.
909d
907c
sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, int version)
.
902,905d
874d
864c
	n = hdrlen - TCP4_HDRSIZE;
.
862a
int
ntohtcp4(Tcp *tcph, Block **bpp)
{
	Tcp4hdr *h;
	uchar *optr;
	ushort hdrlen;
	ushort optlen;
	int n;

	*bpp = pullupblock(*bpp, TCP4_PKT+TCP4_HDRSIZE);
	if(*bpp == nil)
		return -1;

	h = (Tcp4hdr *)((*bpp)->rp);
	tcph->source = nhgets(h->tcpsport);
	tcph->dest = nhgets(h->tcpdport);
	tcph->seq = nhgetl(h->tcpseq);
	tcph->ack = nhgetl(h->tcpack);

	hdrlen = (h->tcpflag[0] & 0xf0)>>2;
	if(hdrlen < TCP4_HDRSIZE) {
		freeblist(*bpp);
		return -1;
	}

	tcph->flags = h->tcpflag[1];
	tcph->wnd = nhgets(h->tcpwin);
	tcph->urg = nhgets(h->tcpurg);
	tcph->mss = 0;
	tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);

	*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
	if(*bpp == nil)
		return -1;

.
858,861c
	optr = h->tcpopt;
	n = hdrlen - TCP6_HDRSIZE;
	while(n > 0 && *optr != EOLOPT) {
		if(*optr == NOOPOPT) {
			n--;
			optr++;
			continue;
		}
		optlen = optr[1];
		if(optlen < 2 || optlen > n)
			break;
		switch(*optr) {
		case MSSOPT:
			if(optlen == MSS_LENGTH)
				tcph->mss = nhgets(optr+2);
			break;
		}
		n -= optlen;
		optr += optlen;
	}
	return hdrlen;
}
.
854c
	*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
.
852c
	tcph->len = nhgets(h->ploadlen) - hdrlen;
.
843c
	if(hdrlen < TCP6_HDRSIZE) {
.
841d
836c
	h = (Tcp6hdr *)((*bpp)->rp);
.
832c
	*bpp = pullupblock(*bpp, TCP6_PKT+TCP6_HDRSIZE);
.
826c
	Tcp6hdr *h;
.
824c
ntohtcp6(Tcp *tcph, Block **bpp)
.
822a
Block *
htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
{
	int dlen;
	Tcp4hdr *h;
	ushort csum;
	ushort hdrlen;

	hdrlen = TCP4_HDRSIZE;
	if(tcph->mss)
		hdrlen += MSS_LENGTH;

	if(data) {
		dlen = blocklen(data);
		data = padblock(data, hdrlen + TCP4_PKT);
		if(data == nil)
			return nil;
	}
	else {
		dlen = 0;
		data = allocb(hdrlen + TCP4_PKT + 64);	/* the 64 pad is to meet mintu's */
		if(data == nil)
			return nil;
		data->wp += hdrlen + TCP4_PKT;
	}

	/* copy in pseudo ip header plus port numbers */
	h = (Tcp4hdr *)(data->rp);
	memmove(h, ph, TCP4_TCBPHDRSZ);

	/* copy in variable bits */
	hnputs(h->tcplen, hdrlen + dlen);
	hnputl(h->tcpseq, tcph->seq);
	hnputl(h->tcpack, tcph->ack);
	hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
	hnputs(h->tcpwin, tcph->wnd);
	hnputs(h->tcpurg, tcph->urg);

	if(tcph->mss != 0){
		h->tcpopt[0] = MSSOPT;
		h->tcpopt[1] = MSS_LENGTH;
		hnputs(h->tcpmss, tcph->mss);
	}
	if(tcb != nil && tcb->nochecksum){
		h->tcpcksum[0] = h->tcpcksum[1] = 0;
	} else {
		csum = ptclcsum(data, TCP4_IPLEN, hdrlen+dlen+TCP4_PHDRSIZE);
		hnputs(h->tcpcksum, csum);
	}

	return data;
}

.
815,818c
	/* move from pseudo header back to normal ip header */
	memset(h->vcf, 0, 4);
	h->vcf[0] = IP_VER6;
	hnputs(h->ploadlen, hdrlen+dlen);
	h->proto = ph->proto;
.
811c
		csum = ptclcsum(data, TCP6_IPLEN, hdrlen+dlen+TCP6_PHDRSIZE);
.
796d
794a
	/* compose pseudo tcp header, do cksum calculation */
	hnputl(h->vcf, hdrlen + dlen);
	h->ploadlen[0] = h->ploadlen[1] = h->proto = 0;
	h->ttl = ph->proto;

.
792,793c
	h = (Tcp6hdr *)(data->rp);
	memmove(h, ph, TCP6_TCBPHDRSZ);
.
788c
		data->wp += hdrlen + TCP6_PKT;
.
785c
		data = allocb(hdrlen + TCP6_PKT + 64);	/* the 64 pad is to meet mintu's */
.
779c
		data = padblock(data, hdrlen + TCP6_PKT);
.
773c
	hdrlen = TCP6_HDRSIZE;
.
769c
	Tcp6hdr *h;
.
766c
htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
.
692a
//	if(isv4(s->raddr)) {
	if(memcmp(s->raddr, v4prefix, IPv4off) == 0 &&
		memcmp(s->laddr, v4prefix, IPv4off) == 0) {
		Tcp4hdr* h4 = &tcb->protohdr.tcp4hdr;
		memset(h4, 0, sizeof(*h4));
		h4->proto = IP_TCPPROTO;
		hnputs(h4->tcpsport, s->lport);
		hnputs(h4->tcpdport, s->rport);
		v6tov4(h4->tcpsrc, s->laddr);
		v6tov4(h4->tcpdst, s->raddr);
	} else {
		Tcp6hdr* h6 = &tcb->protohdr.tcp6hdr;
		memset(h6, 0, sizeof(*h6));
		h6->proto = IP_TCPPROTO;
		hnputs(h6->tcpsport, s->lport);
		hnputs(h6->tcpdport, s->rport);
		ipmove(h6->tcpsrc, s->laddr);
		ipmove(h6->tcpdst, s->raddr);
	}

.
685,691d
659,660d
648,652c
	if(ifc != nil) {
		if(version == 4)
			mtu = ifc->maxmtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
		else
			mtu = ifc->maxmtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
	}

	if(mtu < 32) {
		mtu = 1280;
	}

	return mtu;
.
644a
	int version = (isv4(s->raddr) && isv4(s->laddr)) ? 4 : 6;
.
244c
	Tcptimer 	*timers;	/* List of active timers */
.
193c
	union {
		Tcp4hdr	tcp4hdr;
		Tcp6hdr	tcp6hdr;
	} protohdr;		/* prototype header */
.
154,158c
		/* to implement tahoe and reno TCP */
		ulong dupacks;    /* number of duplicate acks rcvd */
		int   recovery;   /* loss recovery flag */
		ulong rxt;              /* right window marker for recovery */
.
151c
		ushort wnd;		/* Tcp send window */
.
132c
	Reseq	*next;
.
126c
	ushort	len;	/* size of data */
.
114a
typedef struct Tcp6hdr Tcp6hdr;
struct Tcp6hdr
{
	uchar	vcf[4];
	uchar	ploadlen[2];
	uchar	proto;
	uchar	ttl;
	uchar	tcpsrc[IPaddrlen];
	uchar	tcpdst[IPaddrlen];
	uchar	tcpsport[2];
	uchar	tcpdport[2];
	uchar	tcpseq[4];
	uchar	tcpack[4];
	uchar	tcpflag[2];
	uchar	tcpwin[2];
	uchar	tcpcksum[2];
	uchar	tcpurg[2];
	/* Options segment */
	uchar	tcpopt[2];
	uchar	tcpmss[2];
};


.
89,90c
typedef struct Tcp4hdr Tcp4hdr;
struct Tcp4hdr
.
39a
	DEF_MSS6	= 1280,		/* Default mean segment (min) for v6 */
.
20c
	TcptimerON	= 1,
.
14,18c

	TCP4_IPLEN	= 8,
	TCP4_PHDRSIZE	= 12,
	TCP4_HDRSIZE	= 20,
	TCP4_TCBPHDRSZ	= 40,
	TCP4_PKT	= TCP4_IPLEN+TCP4_PHDRSIZE,

	TCP6_IPLEN	= 0,
	TCP6_PHDRSIZE	= 40,
	TCP6_HDRSIZE	= 20,
	TCP6_TCBPHDRSZ	= 60,
	TCP6_PKT	= TCP6_IPLEN+TCP6_PHDRSIZE,

.
## diffname ip/tcp.c 2002/0601
## diff -e /n/emeliedump/2002/0507/sys/src/9/ip/tcp.c /n/emeliedump/2002/0601/sys/src/9/ip/tcp.c
2082a
			break;
		default:
			panic("tcpoutput2: version %d", version);
		}
.
2081c
			break;
		case V6:
.
2079c
		switch(version){
		case V4:
.
2050a
			break;
		default:
			hbp = nil;	/* to suppress a warning */
			panic("tcpoutput: version %d", version);
.
2043,2044c
			break;
		case V6:
.
2036c
		switch(version){
		case V4:
.
1907a
	version = s->ipversion;
.
1899,1905d
1896,1897c
	uchar version;
.
1557c
		s = tcpincoming(s, &seg, source, dest, version);
.
1494c
		version = V6;
.
1456c
		version = V4;
.
1445c
	uchar version;
.
1207,1215d
1198,1205c
	switch(version){
	case V4:
		h4 = &tcb->protohdr.tcp4hdr;
		memset(h4, 0, sizeof(*h4));
		h4->proto = IP_TCPPROTO;
		hnputs(h4->tcpsport, new->lport);
		hnputs(h4->tcpdport, new->rport);
		v6tov4(h4->tcpsrc, dst);
		v6tov4(h4->tcpdst, src);
		break;
	case V6:
		h6 = &tcb->protohdr.tcp6hdr;
		memset(h6, 0, sizeof(*h6));
		h6->proto = IP_TCPPROTO;
		hnputs(h6->tcpsport, new->lport);
		hnputs(h6->tcpdport, new->rport);
		ipmove(h6->tcpsrc, dst);
		ipmove(h6->tcpdst, src);
		break;
	default:
		panic("tcpincoming: version %d", new->ipversion);
.
1182c
	new = Fsnewcall(s, src, segp->source, dst, segp->dest, version);
.
1180a
	Tcp4hdr *h4;
	Tcp6hdr *h6;
.
1175,1176c
static Conv*
tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
.
1167a
			break;
		default:
			panic("tcphangup: version %d", s->ipversion);
.
1163,1164c
			break;
		case V6:
.
1159c
		switch(s->ipversion) {
		case V4:
.
1151d
1132a
		break;
	default:
		panic("sndrst2: version %d", version);
.
1127,1128c
		break;
	case V6:
.
1122c
	switch(version) {
	case V4:
.
1097a
		break;
	default:
		panic("sndrst: version %d", version);
.
1088,1089c
		break;
	case V6:
.
1079c
	switch(version) {
	case V4:
.
1065c
sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version)
.
726,743c
		switch(s->ipversion){
		case V4:
			h4 = &tcb->protohdr.tcp4hdr;
			memset(h4, 0, sizeof(*h4));
			h4->proto = IP_TCPPROTO;
			hnputs(h4->tcpsport, s->lport);
			hnputs(h4->tcpdport, s->rport);
			v6tov4(h4->tcpsrc, s->laddr);
			v6tov4(h4->tcpdst, s->raddr);
			break;
		case V6:
			h6 = &tcb->protohdr.tcp6hdr;
			memset(h6, 0, sizeof(*h6));
			h6->proto = IP_TCPPROTO;
			hnputs(h6->tcpsport, s->lport);
			hnputs(h6->tcpdport, s->rport);
			ipmove(h6->tcpsrc, s->laddr);
			ipmove(h6->tcpdst, s->raddr);
			break;
		default:
			panic("inittcpctl: version %d", s->ipversion);
		}
.
722,724c
	if(mode != TCP_LISTEN){
		if(ipcmp(s->laddr, IPnoaddr) == 0)
			findlocalip(s->p->f, s->laddr, s->raddr);
.
700a
	Tcp4hdr* h4;
	Tcp6hdr* h6;

.
687a
			break;
		default:
			panic("tcpmtu: version %d", version);
		}
.
686c
			break;
		case V6:
.
684c
		switch(version){
		case V4:
.
680a
	version = s->ipversion;
.
679c
	uchar version;
.
## diffname ip/tcp.c 2002/0704
## diff -e /n/emeliedump/2002/0601/sys/src/9/ip/tcp.c /n/emeliedump/2002/0704/sys/src/9/ip/tcp.c
2337,2340d
2335d
2333c
	if(seg->mss != 0 && seg->mss < tcb->mss)
.
2323d
2141c
	seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20);
.
2124a
 *
 *  To avoid stateless Conv hogs, we pick a sequence number at random.  If
 *  it that number gets acked by the other end, we shut down the connection.
 *  See the equivalent code in tcpiput().
.
1667a
	if(tcb->state != Syn_received){
		/*
		 *  One DOS attack is to open connections to us and then forget about them,
		 *  thereby tying up a conv at no long term cost to the attacker.
		 *  This is an attempt to defeat these stateless DOS attacks.  See
		 *  corresponding code in tcpsendka().
		 */
		if(seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){
			print("stateless hog %lux - %lux - %lux\n", tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29));
			localclose(s, "stateless hog");
		}
	}

.
697,700d
691,694c
		break;
.
688,689c
		break;
	case V6:
		mtu = DEF_MSS6;
		if(ifc != nil)
.
684,686c
	switch(version){
	default:
	case V4:
		mtu = DEF_MSS;
		if(ifc != nil)
.
682d
## diffname ip/tcp.c 2002/0710
## diff -e /n/emeliedump/2002/0704/sys/src/9/ip/tcp.c /n/emeliedump/2002/0710/sys/src/9/ip/tcp.c
2600c
			if(NOW - tcb->time > 5*60*1000){
.
2594c
			if(NOW - tcb->time > 5000){
.
2347c

	/* the congestion window always starts out as a single segment */
	tcb->snd.wnd = seg->wnd;
.
2344a
	/* our sending max segment size cannot be bigger than what he asked for */
.
2343d
2331a
/*
 *  set up state for a received SYN (or SYN ACK) packet
 */
.
2027c
				seg.mss = tcpmtu(s->p, s->laddr, s->ipversion);
.
2014c
				seg.mss = tcpmtu(s->p, s->laddr, s->ipversion);
.
1750c
				tcb->time = NOW;
.
1672c
			print("stateless hog %lux - %lux - %lux\n", tcb->snd.una-(1<<31), seg.ack,
				tcb->snd.una-(1<<29));
.
1665,1670d
1663a
	/*
	 *  One DOS attack is to open connections to us and then forget about them,
	 *  thereby tying up a conv at no long term cost to the attacker.
	 *  This is an attempt to defeat these stateless DOS attacks.  See
	 *  corresponding code in tcpsendka().
	 */
.
1641c
				tcb->time = NOW;
.
1610,1619d
1583a
		/* if this is a new SYN, put the call into limbo */
		if((seg.flags & SYN) && (seg.flags & ACK) == 0){
			limbo(s, source, dest, &seg, version);
			qunlock(tcp);
			freeblist(bp);
			return;
		}

		/*
		 *  if there's a matching call in limbo, tcpincoming will
		 *  return it in state Syn_received
		 */
.
1581,1582d
1493a
print("cksum is %ux\n", ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN));
.
1360d
1302c
	delta = NOW - tcb->sndsyntime;
.
1244c
	tcpsetstate(new, Established);

.
1220a
	tcb->irs = lp->irs;
	tcb->rcv.nxt = tcb->irs+1;
	tcb->rcv.urg = tcb->rcv.nxt;

	tcb->iss = lp->iss;
	tcb->rttseq = tcb->iss;
	tcb->snd.wl2 = tcb->iss;
	tcb->snd.una = tcb->iss+1;
	tcb->snd.ptr = tcb->iss+1;
	tcb->snd.nxt = tcb->iss+1;
	tcb->flgcnt = 0;
	tcb->flags |= SYNACK;

	/* our sending max segment size cannot be bigger than what he asked for */
	if(lp->mss != 0 && lp->mss < tcb->mss)
		tcb->mss = lp->mss;

	/* the congestion window always starts out as a single segment */
	tcb->snd.wnd = segp->wnd;
	tcb->cwind = tcb->mss;

	/* set initial round trip time */
	tcb->sndsyntime = lp->lastsend;
	tcpsynackrtt(new);

	free(lp);

	/* set up proto header */
.
1204a
	/* unless it's just an ack, it can't be someone coming out of limbo */
	if((segp->flags & SYN) || (segp->flags & ACK) == 0)
		return nil;

	tpriv = s->p->priv;

	/* find a call in limbo */
	lp = nil;
	h = hashipa(src);
	for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
		lp = *l;
		if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
			continue;
		if(ipcmp(lp->laddr, dst) != 0)
			continue;
		if(ipcmp(lp->raddr, src) != 0)
			continue;

		/* we're assuming no data with the initial SYN */
		if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1)
			lp = nil;
		else{
			tpriv->nlimbo--;
			*l = lp->next;
		}
		break;
	}
	if(lp == nil)
		return nil;

.
1203a
	Limbo *lp, **l;
	int h;
.
1195a
/*
 *  (re)send a SYN ACK
 */
int
sndsynack(Proto *tcp, Limbo *lp)
{
	Block *hbp;
	Tcp4hdr ph4;
	Tcp6hdr ph6;
	Tcp seg;

	/* make pseudo header */
	switch(lp->version) {
	case V4:
		memset(&ph4, 0, sizeof(ph4));
		ph4.vihl = IP_VER4;
		v6tov4(ph4.tcpsrc, lp->laddr);
		v6tov4(ph4.tcpdst, lp->raddr);
		ph4.proto = IP_TCPPROTO;
		hnputs(ph4.tcplen, TCP4_HDRSIZE);
		hnputs(ph4.tcpsport, lp->lport);
		hnputs(ph4.tcpdport, lp->rport);
		break;
	case V6:
		memset(&ph6, 0, sizeof(ph6));
		ph6.vcf[0] = IP_VER6;
		ipmove(ph6.tcpsrc, lp->laddr);
		ipmove(ph6.tcpdst, lp->raddr);
		ph6.proto = IP_TCPPROTO;
		hnputs(ph6.ploadlen, TCP6_HDRSIZE);
		hnputs(ph6.tcpsport, lp->lport);
		hnputs(ph6.tcpdport, lp->rport);
		break;
	default:
		panic("sndrst: version %d", lp->version);
	}

	seg.seq = lp->iss;
	seg.ack = lp->irs+1;
	seg.flags = SYN|ACK;
	seg.wnd = 0;
	seg.urg = 0;
	seg.mss = tcpmtu(tcp, lp->laddr, lp->version);

	switch(lp->version) {
	case V4:
		hbp = htontcp4(&seg, nil, &ph4, nil);
		if(hbp == nil)
			return -1;
		ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS);
		break;
	case V6:
		hbp = htontcp6(&seg, nil, &ph6, nil);
		if(hbp == nil)
			return -1;
		ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS);
		break;
	default:
		panic("sndsnack: version %d", lp->version);
	}
	lp->lastsend = NOW;
	return 0;
}

/*
 *  hash an address, walk the permutation
 */
static int
limbohash(uchar *perm, uchar *addr)
{
	int i;
	uchar x;

	x = 0;
	for(i = 0; i < IPaddrlen; i++)
		x = perm[(addr[i]+x) & 0xff];
	return x;
}

#define hashipa(a) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] )&LHTMASK )

/*
 *  put a call into limbo and respond with a SYN ACK
 *
 *  called with proto locked
 */
static void
limbo(Conv *s, uchar *source, uchar *dest, Tcp *seg, int version)
{
	Limbo *lp, **l;
	Tcppriv *tpriv;
	int h;

	tpriv = s->p->priv;
	h = hashipa(source);

	for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
		lp = *l;
		if(lp->lport != seg->dest || lp->rport != seg->source || lp->version != version)
			continue;
		if(ipcmp(lp->raddr, source) != 0)
			continue;
		if(ipcmp(lp->laddr, dest) != 0)
			continue;

		/* each new SYN restarts the retramsmits */
		lp->irs = seg->seq;
		break;
	}
	lp = *l;
	if(lp == nil){
		if(tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]){
			lp = tpriv->lht[h];
			tpriv->lht[h] = lp->next;
			lp->next = nil;
		} else {
			lp = malloc(sizeof(*lp));
			if(lp == nil)
				return;
			tpriv->nlimbo++;
		}
		*l = lp;
		lp->version = version;
		ipmove(lp->laddr, dest);
		ipmove(lp->raddr, source);
		lp->lport = seg->dest;
		lp->rport = seg->source;
		lp->mss = seg->mss;
		lp->irs = seg->seq;
		lp->iss = (nrand(1<<16)<<16)|nrand(1<<16);
	}

	if(sndsynack(s->p, lp) < 0){
		*l = lp->next;
		tpriv->nlimbo--;
		free(lp);
	}
}

/*
 *  resend SYN ACK's once every 250 ms.
 */
static void
limborexmit(Proto *tcp)
{
	Tcppriv *tpriv;
	Limbo **l, *lp;
	int h;
	int seen;
	ulong now;

	tpriv = tcp->priv;

	if(!canqlock(tcp))
		return;
	seen = 0;
	now = NOW;
	for(h = 0; h < nelem(tpriv->lht) && seen < tpriv->nlimbo; h++){
		for(l = &tpriv->lht[h]; *l != nil && seen < tpriv->nlimbo; ){
			lp = *l;
			seen++;
			if(now - lp->lastsend < 250)
				continue;

			/* time it out after 1 second */
			if(++(lp->rexmits) > 4){
				tpriv->nlimbo--;
				*l = lp->next;
				free(lp);
				continue;
			}

			/* if we're being attacked, don't bother resending SYN ACK's */
			if(tpriv->nlimbo > 100)
				continue;

			if(sndsynack(tcp, lp) < 0){
				tpriv->nlimbo--;
				*l = lp->next;
				free(lp);
				continue;
			}

			l = &lp->next;
		}
	}
	qunlock(tcp);
}

/*
 *  lookup call in limbo.  if found, create a new conversation
 *
 *  called with proto locked
 */
.
1071c
	tcb->sndsyntime = NOW;
.
1059c
/*
 *  For outgiing calls, generate an initial sequence
 *  number and put a SYN on the send queue
 */
.
756c
	tcb->mss = tcb->cwind = tcpmtu(s->p, s->laddr, s->ipversion);
.
681,682c
	ifc = findipifc(tcp->f, addr, 0);
.
679d
675c
tcpmtu(Proto *tcp, uchar *addr, int version)
.
601a

		limborexmit(tcp);
.
460,464d
308a
static void limborexmit(Proto*);
static void limbo(Conv*, uchar*, uchar*, Tcp*, int);

.
307a
void	tcpsynackrtt(Conv*);
.
289a
	int	ackprocstarted;

	ulong	stats[Nstats];
.
288d
285c
	/* calls in limbo waiting for an ACK to our SYN ACK */
	int	nlimbo;
	Limbo	*lht[NLHT];
.
278,279c
	/* List of active timers */
	QLock 	tl;
	Tcptimer *timers;

.
229a
/*
 *  New calls are put in limbo rather than having a conversation structure
 *  allocated.  Thus, a SYN attack results in lots of limbo'd calls but not
 *  any real Conv structures mucking things up.  Calls in limbo rexmit their
 *  SYN ACK every 250 ms up to 4 times, i.e., they disappear after 1 second.
 *
 *  In particular they aren't on a listener's queue so that they don't figure
 *  in the input queue limit.
 *
 *  If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue
 *  of 70000 limbo'd calls.  Not great for a linear list but doable.  Therefore
 *  there is no hashing of this list.
 */
typedef struct Limbo Limbo;
struct Limbo
{
	Limbo	*next;

	uchar	laddr[IPaddrlen];
	uchar	raddr[IPaddrlen];
	ushort	lport;
	ushort	rport;
	ulong	irs;		/* initial received sequence */
	ulong	iss;		/* initial sent sequence */
	ushort	mss;		/* mss from the other end */
	ulong	lastsend;	/* last time we sent a synack */
	uchar	version;	/* v4 or v6 */
	uchar	rexmits;	/* number of retransmissions */
};

.
222c
	int	flgcnt;			/* number of flags in the sequence (FIN,SEQ) */
.
205d
160a
/*
 *  this header is malloc'd to thread together fragments
 *  waiting to be coalesced
 */
.
146c
/*
 *  this represents the control info
 *  for a single packet.  It is derived from
 *  a packet in ntohtcp{4,6}() and stuck into
 *  a packet in htontcp{4,6}().
 */
.
97a
/*
 *  v4 and v6 pseudo headers used for
 *  checksuming tcp
 */
.
74c
	Time_wait,

	Maxlimbo	= 1000,		/* maximum procs waiting for response to SYN ACK */
	NLHT		= 256,		/* hash table size, must be a power of 2 */
	LHTMASK		= NLHT-1
.
63a

.
## diffname ip/tcp.c 2002/0711
## diff -e /n/emeliedump/2002/0710/sys/src/9/ip/tcp.c /n/emeliedump/2002/0711/sys/src/9/ip/tcp.c
2955c
	tcp->kick = nil;
.
1886a
			limborst(s, &seg, source, dest, version);
.
1803d
1464c
	h = hashipa(src, segp->source);
.
1440a
 *  lookup call in limbo.  if found, throw it out.
 *
 *  called with proto locked
 */
static void
limborst(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
{
	Limbo *lp, **l;
	int h;
	Tcppriv *tpriv;

	tpriv = s->p->priv;

	/* find a call in limbo */
	h = hashipa(src, segp->source);
	for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
		lp = *l;
		if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
			continue;
		if(ipcmp(lp->laddr, dst) != 0)
			continue;
		if(ipcmp(lp->raddr, src) != 0)
			continue;

		/* RST can only follow the SYN */
		if(segp->seq == lp->irs+1){
			tpriv->nlimbo--;
			*l = lp->next;
			free(lp);
		}
		break;
	}
}

/*
.
1345c
	h = hashipa(source, seg->source);
.
1324,1331d
1315,1322c
#define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK )
.
577,578c
	c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
	c->wq = qopen(2*QMAX, Qkick, tcpqkick, c);
.
536a
tcpqkick(void *x)
{
	tcpkick((Conv*)x);
}

void
.
## diffname ip/tcp.c 2002/0712
## diff -e /n/emeliedump/2002/0711/sys/src/9/ip/tcp.c /n/emeliedump/2002/0712/sys/src/9/ip/tcp.c
2981d
2962,2967c
	/* bounded twixt 1/2 and 10 seconds */
	if(x < 500/MSPTICK)
		x = 500/MSPTICK;
	else if(x > (10000/MSPTICK))
.
2494d
2492a
	tcb->rcv.lastacked = tcb->rcv.nxt;
.
2370a
		tcb->rcv.lastacked = tcb->rcv.nxt;
.
2368d
2165c
				if(tcb->rcv.nxt - tcb->rcv.lastacked >= 2*tcb->mss)
.
2008,2009c
			print("stateless hog %I.%d->%I.%d f %ux %lux - %lux - %lux\n",
				source, seg.source, dest, seg.dest, seg.flags,
				tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29));
.
2006c
	if(tcb->state != Syn_received && (seg.flags & RST) == 0){
.
1784a
	else
		tcphalt(tpriv, &tcb->timer);
.
1782d
1663c
	/* added by Dong Lin for fast retransmission */
.
1299a
	seg.wnd = QMAX;
.
1297d
1236d
1231a
		tcb->rcv.lastacked = tcb->rcv.nxt;
.
584c
	c->wq = qopen(2*QMAX, Qkick, tcpkick, c);
.
537,542d
505a
	Conv *s = x;
.
504c
tcpkick(void *x)
.
223d
212a
		ulong	lastacked;	/* Last ack sent */
.
205,207c
		ulong	dupacks;	/* number of duplicate acks rcvd */
		int	recovery;	/* loss recovery flag */
		ulong	rxt;		/* right window marker for recovery */
.
201c
		ushort	wnd;		/* Tcp send window */
.
## diffname ip/tcp.c 2002/0713
## diff -e /n/emeliedump/2002/0712/sys/src/9/ip/tcp.c /n/emeliedump/2002/0713/sys/src/9/ip/tcp.c
2645d
2643a
		netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
.
2461,2462c
		if((msgs%4) == 1){
			qunlock(s);
			sched();
			qlock(s);
		}
.
2446,2450c

.
2306c
//				tcb->snd.ptr = tcb->snd.una;
.
1675c
			netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
.
1665c
		netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
.
579c
	c->wq = qopen(4*QMAX, Qkick, tcpkick, c);
.
## diffname ip/tcp.c 2002/0714
## diff -e /n/emeliedump/2002/0713/sys/src/9/ip/tcp.c /n/emeliedump/2002/0714/sys/src/9/ip/tcp.c
579c
	c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
.
## diffname ip/tcp.c 2002/0720
## diff -e /n/emeliedump/2002/0714/sys/src/9/ip/tcp.c /n/emeliedump/2002/0720/sys/src/9/ip/tcp.c
1486,1487c
	for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
.
1484d
1394c
	for(h = 0; h < NLHT && seen < tpriv->nlimbo; h++){
.
## diffname ip/tcp.c 2002/0801
## diff -e /n/emeliedump/2002/0720/sys/src/9/ip/tcp.c /n/emeliedump/2002/0801/sys/src/9/ip/tcp.c
2485c
	if(tcpporthogdefense)
		seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20);
	else
		seg.seq = tcb->snd.una-1;
.
2465,2468d
2001c
		if(tcpporthogdefense
		&& seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){
.
1543c
	tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
.
1402c
			if(++(lp->rexmits) > 5){
.
1398c
			if(now - lp->lastsend < (lp->rexmits+1)*SYNACK_RXTIMER)
.
1377c
 *  resend SYN ACK's once every SYNACK_RXTIMER ms.
.
344a
/* 
 *  Setting tcpporthogdefense to non-zero enables Dong Lin's
 *  solution to hijacked systems staking out port's as a form
 *  of DoS attack.
 *
 *  To avoid stateless Conv hogs, we pick a sequence number at random.  If
 *  it that number gets acked by the other end, we shut down the connection.
 *  Look for tcpporthogedefense in the code.
 */
int tcpporthogdefense = 0;

.
251c
 *  SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second.
.
52a
	SYNACK_RXTIMER	= 250,		/* ms between SYNACK retransmits */
.
## diffname ip/tcp.c 2002/0806
## diff -e /n/emeliedump/2002/0801/sys/src/9/ip/tcp.c /n/emeliedump/2002/0806/sys/src/9/ip/tcp.c
2188c
				sndrst(tcp, source, dest, length, &seg, version,
					"send to Finwait2");
.
2080c
				sndrst(tcp, source, dest, length, &seg, version,
					"bad seq in Syn_received");
.
2045c
		sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
.
1965c
				sndrst(tcp, source, dest, length, &seg, version,
					 "bad seq in Syn_sent");
.
1960c
		sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
.
1909c
		sndrst(tcp, source, dest, length, &seg, version, "no conversation");
.
1906a
		netlog(f, Logtcp, "iphtlook failed");
.
1508c
		} else {
.
1506c
		if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
			netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
				segp->seq, lp->irs+1, segp->ack, lp->iss+1);
.
1497a
		netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
			src, segp->source, lp->raddr, lp->rport,
			dst, segp->dest, lp->laddr, lp->lport,
			version, lp->version
 		);

.
1150a
	netlog(tcp->f, Logtcp, "sndrst: %s", reason);

.
1143c
sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version, char *reason)
.
## diffname ip/tcp.c 2002/0821
## diff -e /n/emeliedump/2002/0806/sys/src/9/ip/tcp.c /n/emeliedump/2002/0821/sys/src/9/ip/tcp.c
2743a
			if(rp->next != nil)
				tpriv->stats[OutOfOrder]++;
.
2734a
		if(rp->next != nil)
			tpriv->stats[OutOfOrder]++;
.
2714c
addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
.
2067,2068c
		if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
.
357c
int	addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
.
## diffname ip/tcp.c 2003/0125
## diff -e /n/emeliedump/2002/0821/sys/src/9/ip/tcp.c /n/emeliedump/2003/0125/sys/src/9/ip/tcp.c
1356c
		/* each new SYN restarts the retransmits */
.
## diffname ip/tcp.c 2003/0130
## diff -e /n/emeliedump/2003/0125/sys/src/9/ip/tcp.c /n/emeliedump/2003/0130/sys/src/9/ip/tcp.c
2360a
		tcb->rcv.una = 0;
.
2172a

					/* force an ack every 2 data messages */
					if(++(tcb->rcv.una) >= 2)
						tcb->flags |= FORCE;
.
215a
		int	una;		/* unacked data segs */
.
31c
	TCP_ACK		= 50,		/* Timed ack sequence in ms */
.
## diffname ip/tcp.c 2003/0206
## diff -e /n/emeliedump/2003/0130/sys/src/9/ip/tcp.c /n/emeliedump/2003/0206/sys/src/9/ip/tcp.c
2174,2177d
## diffname ip/tcp.c 2003/0207
## diff -e /n/emeliedump/2003/0206/sys/src/9/ip/tcp.c /n/emeliedump/2003/0207/sys/src/9/ip/tcp.c
2343c
			netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
.
1964a
	/* scale up window */
	seg.wnd <<= tcb->rcv.scale;

.
1755c
			expand = tcb->snd.wnd - tcb->cwind;
.
1667c
	ulong acked;
	ulong expand;
.
992c
	hnputs(h->tcpwin, tcph->wnd>>tcb->snd.scale);
.
933c
	hnputs(h->tcpwin, tcph->wnd>>tcb->snd.scale);
.
852a
	tcb->rcv.scale = 0;
	tcb->snd.scale = 0;
.
829c
tcpstart(Conv *s, int mode, ulong window)
.
557c
	w = tcb->window - qlen(s->rq);
.
443c
		"%s srtt %d mdev %d cwin %lud swin %lud rwin %lud timer.start %d timer.count %d rerecv %d\n",
.
365c
void	tcpstart(Conv*, int, ulong);
.
225c
	ulong	window;			/* Recevive window */
.
219c
	ulong	cwind;			/* Congestion window */
.
216a
		int	scale;		/* how much to left shift window in rcved packets */
.
212c
		ulong	wnd;		/* Receive window incoming */
.
208a
		int	scale;		/* how much to right shift window in xmitted packets */
.
202c
		ulong	wnd;		/* Tcp send window */
.
170c
	ulong	wnd;
.
## diffname ip/tcp.c 2003/0208
## diff -e /n/emeliedump/2003/0207/sys/src/9/ip/tcp.c /n/emeliedump/2003/0208/sys/src/9/ip/tcp.c
2351c
			netlog(s->p->f, Logtcp, "throttled snd.wnd 0x%ux cwind 0x%ux\n",
.
1970,1972d
1760c
			expand = 65535 - tcb->cwind;
.
1671,1672c
	ushort acked, expand;
.
996c
	hnputs(h->tcpwin, tcph->wnd);
.
937c
	hnputs(h->tcpwin, tcph->wnd);
.
855,856d
831c
tcpstart(Conv *s, int mode, ushort window)
.
559c
	w = QMAX - qlen(s->rq);
.
445c
		"%s srtt %d mdev %d cwin %d swin %d rwin %d timer.start %d timer.count %d rerecv %d\n",
.
367c
void	tcpstart(Conv*, int, ushort);
.
227c
	ushort	window;			/* Recevive window */
.
221c
	ushort	cwind;			/* Congestion window */
.
218d
213c
		ushort	wnd;		/* Receive window incoming */
.
209d
202c
		ushort	wnd;		/* Tcp send window */
.
170c
	ushort	wnd;
.
## diffname ip/tcp.c 2003/0209
## diff -e /n/emeliedump/2003/0208/sys/src/9/ip/tcp.c /n/emeliedump/2003/0209/sys/src/9/ip/tcp.c
3020a
}

void
tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
{
	if(rcvscale){
		tcb->rcv.scale = rcvscale & 0xff;
		tcb->snd.scale = sndscale & 0xff;
		tcb->window = QMAX<<tcb->snd.scale;
		qsetlimit(s->rq, tcb->window);
	} else {
		tcb->rcv.scale = 0;
		tcb->snd.scale = 0;
		tcb->window = QMAX;
		qsetlimit(s->rq, tcb->window);
	}
.
2508a
	seg.ws = 0;
.
2387c
				seg.mss = tcb->mss;
				seg.ws = tcb->scale;
.
2374c
				seg.mss = tcb->mss;
				seg.ws = tcb->scale;
.
2366a
		seg.ws = 0;
.
2343c
			netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
.
1997c
				tcpsetstate(s, Syn_received);	/* DLP - shouldn't this be a reset? */
.
1993a
				tcpsetscale(s, tcb, seg.ws, tcb->scale);
.
1964a
	/* fix up window */
	seg.wnd <<= tcb->rcv.scale;

.
1755c
			expand = tcb->snd.wnd - tcb->cwind;
.
1667c
	ulong acked;
	ulong expand;
.
1560a
	/* window scaling */
	tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);

.
1476a
 *  come here when we finally get an ACK to our SYN-ACK.
.
1379a
		lp->rcvscale = seg->ws;
.
1310a
	/* if the other side set scale, we should too */
	if(lp->rcvscale){
		seg.ws = scale;
		lp->sndscale = scale;
	} else {
		seg.ws = 0;
		lp->sndscale = 0;
	}

.
1308c
	seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
.
1276a
	int scale;
.
1246a
		seg.ws = 0;
.
1206a
	seg->ws = 0;
.
1140a

	/* set desired mss and scale */
	tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
.
1130c
tcpsndsyn(Conv *s, Tcpctl *tcb)
.
1117a
		case WSOPT:
			if(optlen == WS_LENGTH)
				tcph->ws = HaveWS | *(optr+2);
			break;
.
1095a
	tcph->ws = 0;
.
1086c
	hdrlen = h->tcpflag[0]>>2;
.
1059a
		case WSOPT:
			if(optlen == WS_LENGTH)
				tcph->ws = HaveWS | *(optr+2);
			break;
.
1037a
	tcph->ws = 0;
.
1028c
	hdrlen = h->tcpflag[0]>>2;
.
999a
	if(tcph->ws != 0){
		*opt++ = WSOPT;
		*opt++ = WS_LENGTH;
		*opt = tcph->ws;
	}

.
996,998c
		*opt++ = MSSOPT;
		*opt++ = MSS_LENGTH;
		hnputs(opt, tcph->mss);
		opt += 2;
.
994a
	opt = h->tcpopt;
.
992c
	hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
.
967a
	if(tcph->ws)
		hdrlen += WS_LENGTH;
.
963a
	uchar *opt;
.
940a
	if(tcph->ws != 0){
		*opt++ = WSOPT;
		*opt++ = WS_LENGTH;
		*opt = tcph->ws;
	}

.
937,939c
		*opt++ = MSSOPT;
		*opt++ = MSS_LENGTH;
		hnputs(opt, tcph->mss);
		opt += 2;
.
935a
	opt = h->tcpopt;
.
933c
	hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
.
904a
	if(tcph->ws)
		hdrlen += WS_LENGTH;
.
900a
	uchar *opt;
.
865c
		tcpsndsyn(s, tcb);
.
851,852d
849d
829c
tcpstart(Conv *s, int mode)
.
822c
	tcb->mss = tcb->cwind = mss;

	/* default is no window scaling */
	tcb->window = QMAX;
	tcb->rcv.wnd = QMAX;
	tcb->rcv.scale = 0;
	tcb->snd.scale = 0;
	qsetlimit(s->rq, QMAX);
.
815a
			mss = DEF_MSS6;
.
792a
	mss = DEF_MSS;

.
771a
	int mss;
.
761a
	if(ifc != nil){
		if(ifc->mbps > 100)
			*scale = HaveWS | 3;
		else if(ifc->mbps > 10)
			*scale = HaveWS | 1;
		else
			*scale = HaveWS | 0;
	} else
		*scale = HaveWS | 0;
.
759c
			mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
.
754c
			mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
.
743c
tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
.
557c
	w = tcb->window - qlen(s->rq);
.
466c
	tcpstart(c, TCP_LISTEN);
.
445c
		s->cwind, s->snd.wnd, s->snd.scale, s->rcv.wnd, s->rcv.scale,
.
443c
		"%s srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d\n",
.
430c
	tcpstart(c, TCP_CONNECT);
.
374a
void	tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
.
367c
void	tcpsndsyn(Conv*, Tcpctl*);
.
365c
void	tcpstart(Conv*, int);
.
273a
	ushort	rcvscale;	/* how much to scale rcvd windows */
	ushort	sndscale;	/* how much to scale sent windows */
.
225c
	ulong	window;			/* Recevive window */
.
219c
	int	sawwsopt;		/* true if we saw a wsopt on the incoming SYN */
	ulong	cwind;			/* Congestion window */
	int	scale;			/* desired snd.scale */
.
216a
		int	scale;		/* how much to left shift window in rcved packets */
.
212c
		ulong	wnd;		/* Receive window incoming */
.
204a
		int	scale;		/* how much to right shift window in xmitted packets */
.
202c
		ulong	wnd;		/* Tcp send window */
.
172c
	ushort	mss;	/* max segment size option (if not zero) */
.
170c
	ushort	ws;	/* window scale option (if not zero) */
	ulong	wnd;
.
152,153c
	uchar	tcpopt[1];
.
130,131c
	uchar	tcpopt[1];
.
80c
	LHTMASK		= NLHT-1,

	HaveWS		= 1<<8,
.
44a
	WSOPT		= 3,
	WS_LENGTH	= 3,		/* Bits to scale window size by */
.
42d
31a
	MAXBACKMS	= 30000,	/* longest backoff time (ms) before hangup */
.
## diffname ip/tcp.c 2003/0210
## diff -e /n/emeliedump/2003/0209/sys/src/9/ip/tcp.c /n/emeliedump/2003/0210/sys/src/9/ip/tcp.c
1174c
			if(optlen == WS_LENGTH && *(optr+2) <= 14)
.
1140c
	hdrlen = (h->tcpflag[0]>>2) & ~3;
.
1111c
			if(optlen == WS_LENGTH && *(optr+2) <= 14)
.
1077c
	hdrlen = (h->tcpflag[0]>>2) & ~3;
.
1043,1047d
1036,1041c
	if(tcph->flags & SYN){
		opt = h->tcpopt;
		if(tcph->mss != 0){
			*opt++ = MSSOPT;
			*opt++ = MSS_LENGTH;
			hnputs(opt, tcph->mss);
			opt += 2;
		}
		if(tcph->ws != 0){
			*opt++ = WSOPT;
			*opt++ = WS_LENGTH;
			*opt++ = tcph->ws;
		}
		while(optpad-- > 0)
			*opt++ = NOOPOPT;
.
1005,1008c
	if(tcph->flags & SYN){
		if(tcph->mss)
			hdrlen += MSS_LENGTH;
		if(tcph->ws)
			hdrlen += WS_LENGTH;
		optpad = hdrlen & 3;
		if(optpad)
			optpad = 4 - optpad;
		hdrlen += optpad;
	}
.
1001c
	ushort hdrlen, optpad = 0;
.
973,977d
966,971c
	if(tcph->flags & SYN){
		opt = h->tcpopt;
		if(tcph->mss != 0){
			*opt++ = MSSOPT;
			*opt++ = MSS_LENGTH;
			hnputs(opt, tcph->mss);
			opt += 2;
		}
		if(tcph->ws != 0){
			*opt++ = WSOPT;
			*opt++ = WS_LENGTH;
			*opt++ = tcph->ws;
		}
		while(optpad-- > 0)
			*opt++ = NOOPOPT;
.
931,934c
	if(tcph->flags & SYN){
		if(tcph->mss)
			hdrlen += MSS_LENGTH;
		if(tcph->ws)
			hdrlen += WS_LENGTH;
		optpad = hdrlen & 3;
		if(optpad)
			optpad = 4 - optpad;
		hdrlen += optpad;
	}
.
927c
	ushort hdrlen, optpad = 0;
.
455c
		s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
.
## diffname ip/tcp.c 2003/0211
## diff -e /n/emeliedump/2003/0210/sys/src/9/ip/tcp.c /n/emeliedump/2003/0211/sys/src/9/ip/tcp.c
2622c
	tcb->rcv.una = 0;
.
2500d
2472d
2465a
		tcphalt(tpriv, &tcb->acktimer);
.
2460,2461d
2287,2293d
2277a

					/* 
					 *  Force an ack every 2 data messages.  This is
					 *  a hack for rob to make his home system run
					 *  faster.
					 *
					 *  this also keeps the standard TCP congestion
					 *  control working since it needs an ack every
					 *  2 max segs worth.  This is not quite that,
					 *  but under a real stream is equivalent since
					 *  every packet has a max seg in it.
					 */
					if(++(tcb->rcv.una) >= 2)
						tcb->flags |= FORCE;
.
1325c
		tcb->rcv.una = 0;
.
365c
int tcpporthogdefense = 1;
.
218d
## diffname ip/tcp.c 2003/0213
## diff -e /n/emeliedump/2003/0211/sys/src/9/ip/tcp.c /n/emeliedump/2003/0213/sys/src/9/ip/tcp.c
3025a
	if(n >= 1 && strcmp(f[0], "tcpporthogdefense") == 0)
		return tcpporthogdefensectl(f[1]);
.
3015a
static char*
tcpporthogdefensectl(char *val)
{
	if(strcmp(val, "on") == 0)
		tcpporthogdefense = 1;
	else if(strcmp(val, "off") == 0)
		tcpporthogdefense = 0;
	else
		return "unknown value for tcpporthogdefense";
	return nil;
}

.
364c
int tcpporthogdefense = 0;
.
## diffname ip/tcp.c 2003/0214
## diff -e /n/emeliedump/2003/0213/sys/src/9/ip/tcp.c /n/emeliedump/2003/0214/sys/src/9/ip/tcp.c
2598a
out:
	poperror();
.
2553c
				goto out;
.
2545c
				goto out;
.
2418c
			goto out;
.
2410a
	if(waserror()){
		localclose(s, up->errstr);
		return;
	}

.
1322,1342c
		if(!waserror()){
			seg.flags = RST | ACK;
			seg.ack = tcb->rcv.nxt;
			tcb->rcv.una = 0;
			seg.seq = tcb->snd.ptr;
			seg.wnd = 0;
			seg.urg = 0;
			seg.mss = 0;
			seg.ws = 0;
			switch(s->ipversion) {
			case V4:
				tcb->protohdr.tcp4hdr.vihl = IP_VER4;
				hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb);
				ipoput4(s->p->f, hbp, 0, s->ttl, s->tos);
				break;
			case V6:
				tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
				hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb);
				ipoput6(s->p->f, hbp, 0, s->ttl, s->tos);
				break;
			default:
				panic("tcphangup: version %d", s->ipversion);
			}
			poperror();
.
## diffname ip/tcp.c 2003/0218
## diff -e /n/emeliedump/2003/0214/sys/src/9/ip/tcp.c /n/emeliedump/2003/0218/sys/src/9/ip/tcp.c
1944c
		if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) && 
.
## diffname ip/tcp.c 2003/0220
## diff -e /n/emeliedump/2003/0218/sys/src/9/ip/tcp.c /n/emeliedump/2003/0220/sys/src/9/ip/tcp.c
2748c
	 *  We should be halving the slow start threshhold (down to one
.
2607,2608d
2596c
			if(ipoput6(f, hbp, 0, s->ttl, s->tos) < 0){
				/* a negative return means no route */
				localclose(s, "no route");
			}
.
2593c
			if(ipoput4(f, hbp, 0, s->ttl, s->tos) < 0){
				/* a negative return means no route */
				localclose(s, "no route");
			}
.
2561c
				return;
.
2553c
				return;
.
2426c
			return;
.
2414,2418d
## diffname ip/tcp.c 2003/0308
## diff -e /n/emeliedump/2003/0220/sys/src/9/ip/tcp.c /n/emeliedump/2003/0308/sys/src/9/ip/tcp.c
2661c
		ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
.
2651c
		ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
.
2594c
			if(ipoput6(f, hbp, 0, s->ttl, s->tos, s) < 0){
.
2588c
			if(ipoput4(f, hbp, 0, s->ttl, s->tos, s) < 0){
.
1418c
		ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
.
1412c
		ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
.
1340c
				ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
.
1335c
				ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
.
1300c
		ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
.
1294c
		ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
.
## diffname ip/tcp.c 2003/0407
## diff -e /n/emeliedump/2003/0308/sys/src/9/ip/tcp.c /n/emeliedump/2003/0407/sys/src/9/ip/tcp.c
648c
		tsleep(&up->sleep, return0, 0, MSPTICK);
.
339,340d

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].