/*
* intel pci-express 10Gb ethernet driver for 8259[89]
* copyright © 2007, coraid, inc.
* depessimised and made to work on the 82599 at bell labs, 2013.
*
* 82599 requests should ideally not cross a 4KB (page) boundary.
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "../port/error.h"
#include "../port/netif.h"
#include "etherif.h"
#define NEXTPOW2(x, m) (((x)+1) & (m))
enum {
Rbsz = ETHERMAXTU+32, /* +slop is for vlan headers, crcs, etc. */
Descalign= 128, /* 599 manual needs 128-byte alignment */
/* tunable parameters */
Goslow = 0, /* flag: go slow by throttling intrs, etc. */
/* were 256, 1024 & 64, but 30, 47 and 1 are ample. */
Nrd = 64, /* multiple of 8, power of 2 for NEXTPOW2 */
Nrb = 128,
Ntd = 32, /* multiple of 8, power of 2 for NEXTPOW2 */
};
enum {
/* general */
Ctrl = 0x00000/4, /* Device Control */
Status = 0x00008/4, /* Device Status */
Ctrlext = 0x00018/4, /* Extended Device Control */
Esdp = 0x00020/4, /* extended sdp control */
Esodp = 0x00028/4, /* extended od sdp control (i2cctl on 599) */
Ledctl = 0x00200/4, /* led control */
Tcptimer = 0x0004c/4, /* tcp timer */
Ecc = 0x110b0/4, /* errata ecc control magic (pcie intr cause on 599) */
/* nvm */
Eec = 0x10010/4, /* eeprom/flash control */
Eerd = 0x10014/4, /* eeprom read */
Fla = 0x1001c/4, /* flash access */
Flop = 0x1013c/4, /* flash opcode */
Grc = 0x10200/4, /* general rx control */
/* interrupt */
Icr = 0x00800/4, /* interrupt cause read */
Ics = 0x00808/4, /* " set */
Ims = 0x00880/4, /* " mask read/set (actually enable) */
Imc = 0x00888/4, /* " mask clear */
Iac = 0x00810/4, /* " auto clear */
Iam = 0x00890/4, /* " auto mask enable */
Itr = 0x00820/4, /* " throttling rate regs (0-19) */
Ivar = 0x00900/4, /* " vector allocation regs. */
/* msi interrupt */
Msixt = 0x0000/4, /* msix table (bar3) */
Msipba = 0x2000/4, /* msix pending bit array (bar3) */
Pbacl = 0x11068/4, /* pba clear */
Gpie = 0x00898/4, /* general purpose int enable */
/* flow control */
Pfctop = 0x03008/4, /* priority flow ctl type opcode */
Fcttv = 0x03200/4, /* " transmit timer value (0-3) */
Fcrtl = 0x03220/4, /* " rx threshold low (0-7) +8n */
Fcrth = 0x03260/4, /* " rx threshold high (0-7) +8n */
Rcrtv = 0x032a0/4, /* " refresh value threshold */
Tfcs = 0x0ce00/4, /* " tx status */
/* rx dma */
Rbal = 0x01000/4, /* rx desc base low (0-63) +0x40n */
Rbah = 0x01004/4, /* " high */
Rdlen = 0x01008/4, /* " length */
Rdh = 0x01010/4, /* " head */
Rdt = 0x01018/4, /* " tail */
Rxdctl = 0x01028/4, /* " control */
Srrctl = 0x02100/4, /* split & replication rx ctl. array */
Dcarxctl = 0x02200/4, /* rx dca control */
Rdrxctl = 0x02f00/4, /* rx dma control */
Rxpbsize = 0x03c00/4, /* rx packet buffer size */
Rxctl = 0x03000/4, /* rx control */
Dropen = 0x03d04/4, /* drop enable control (598 only) */
/* rx */
Rxcsum = 0x05000/4, /* rx checksum control */
Rfctl = 0x05008/4, /* rx filter control */
Mta = 0x05200/4, /* multicast table array (0-127) */
Ral98 = 0x05400/4, /* rx address low (598) */
Rah98 = 0x05404/4,
Ral99 = 0x0a200/4, /* rx address low array (599) */
Rah99 = 0x0a204/4,
Psrtype = 0x05480/4, /* packet split rx type. */
Vfta = 0x0a000/4, /* vlan filter table array. */
Fctrl = 0x05080/4, /* filter control */
Vlnctrl = 0x05088/4, /* vlan control */
Msctctrl = 0x05090/4, /* multicast control */
Mrqc = 0x05818/4, /* multiple rx queues cmd */
Vmdctl = 0x0581c/4, /* vmdq control (598 only) */
Imir = 0x05a80/4, /* immediate irq rx (0-7) (598 only) */
Imirext = 0x05aa0/4, /* immediate irq rx ext (598 only) */
Imirvp = 0x05ac0/4, /* immediate irq vlan priority (598 only) */
Reta = 0x05c00/4, /* redirection table */
Rssrk = 0x05c80/4, /* rss random key */
/* tx */
Tdbal = 0x06000/4, /* tx desc base low +0x40n array */
Tdbah = 0x06004/4, /* " high */
Tdlen = 0x06008/4, /* " len */
Tdh = 0x06010/4, /* " head */
Tdt = 0x06018/4, /* " tail */
Txdctl = 0x06028/4, /* " control */
Tdwbal = 0x06038/4, /* " write-back address low */
Tdwbah = 0x0603c/4,
Dtxctl98 = 0x07e00/4, /* tx dma control (598 only) */
Dtxctl99 = 0x04a80/4, /* tx dma control (599 only) */
Tdcatxctrl98 = 0x07200/4, /* tx dca register (0-15) (598 only) */
Tdcatxctrl99 = 0x0600c/4, /* tx dca register (0-127) (599 only) */
Tipg = 0x0cb00/4, /* tx inter-packet gap (598 only) */
Txpbsize = 0x0cc00/4, /* tx packet-buffer size (0-15) */
/* mac */
Hlreg0 = 0x04240/4, /* highlander control reg 0 */
Hlreg1 = 0x04244/4, /* highlander control reg 1 (ro) */
Msca = 0x0425c/4, /* mdi signal cmd & addr */
Msrwd = 0x04260/4, /* mdi single rw data */
Mhadd = 0x04268/4, /* mac addr high & max frame */
Pcss1 = 0x04288/4, /* xgxs status 1 */
Pcss2 = 0x0428c/4,
Xpcss = 0x04290/4, /* 10gb-x pcs status */
Serdesc = 0x04298/4, /* serdes control */
Macs = 0x0429c/4, /* fifo control & report */
Autoc = 0x042a0/4, /* autodetect control & status */
Links = 0x042a4/4, /* link status */
Links2 = 0x04324/4, /* 599 only */
Autoc2 = 0x042a8/4,
};
enum {
Factive = 1<<0,
Enable = 1<<31,
/* Ctrl */
Rst = 1<<26, /* full nic reset */
/* Txdctl */
Ten = 1<<25,
/* Dtxctl99 */
Te = 1<<0, /* dma tx enable */
/* Fctrl */
Bam = 1<<10, /* broadcast accept mode */
Upe = 1<<9, /* unicast promiscuous */
Mpe = 1<<8, /* multicast promiscuous */
/* Rxdctl */
Pthresh = 0, /* prefresh threshold shift in bits */
Hthresh = 8, /* host buffer minimum threshold " */
Wthresh = 16, /* writeback threshold */
Renable = 1<<25,
/* Rxctl */
Rxen = 1<<0,
Dmbyps = 1<<1, /* descr. monitor bypass (598 only) */
/* Rdrxctl */
Rdmt½ = 0, /* 598 */
Rdmt¼ = 1, /* 598 */
Rdmt⅛ = 2, /* 598 */
Crcstrip = 1<<1, /* 599 */
Rscfrstsize = 037<<17, /* 599; should be zero */
/* Rxcsum */
Ippcse = 1<<12, /* ip payload checksum enable */
/* Eerd */
EEstart = 1<<0, /* Start Read */
EEdone = 1<<1, /* Read done */
/* interrupts */
Irx0 = 1<<0, /* driver defined */
Itx0 = 1<<1, /* driver defined */
Lsc = 1<<20, /* link status change */
/* Links */
Lnkup = 1<<30,
Lnkspd = 1<<29,
/* Hlreg0 */
Txcrcen = 1<<0, /* add crc during xmit */
Rxcrcstrip = 1<<1, /* strip crc during recv */
Jumboen = 1<<2,
Txpaden = 1<<10, /* pad short frames during xmit */
/* Autoc */
Flu = 1<<0, /* force link up */
Lmsshift = 13, /* link mode select shift */
Lmsmask = 7,
};
typedef struct Ctlr Ctlr;
typedef struct Rd Rd;
typedef struct Td Td;
typedef struct {
uint reg;
char *name;
} Stat;
Stat stattab[] = {
0x4000, "crc error",
0x4004, "illegal byte",
0x4008, "short packet",
0x3fa0, "missed pkt0",
0x4034, "mac local flt",
0x4038, "mac rmt flt",
0x4040, "rx length err",
0x3f60, "xon tx",
0xcf60, "xon rx",
0x3f68, "xoff tx",
0xcf68, "xoff rx",
0x405c, "rx 040",
0x4060, "rx 07f",
0x4064, "rx 100",
0x4068, "rx 200",
0x406c, "rx 3ff",
0x4070, "rx big",
0x4074, "rx ok",
0x4078, "rx bcast",
0x3fc0, "rx no buf0",
0x40a4, "rx runt",
0x40a8, "rx frag",
0x40ac, "rx ovrsz",
0x40b0, "rx jab",
0x40d0, "rx pkt",
0x40d4, "tx pkt",
0x40d8, "tx 040",
0x40dc, "tx 07f",
0x40e0, "tx 100",
0x40e4, "tx 200",
0x40e8, "tx 3ff",
0x40ec, "tx big",
0x40f4, "tx bcast",
0x4120, "xsum err",
};
/* status */
enum {
Pif = 1<<7, /* past exact filter (sic) */
Ipcs = 1<<6, /* ip checksum calculated */
L4cs = 1<<5, /* layer 2 */
Tcpcs = 1<<4, /* tcp checksum calculated */
Vp = 1<<3, /* 802.1q packet matched vet */
Ixsm = 1<<2, /* ignore checksum */
Reop = 1<<1, /* end of packet */
Rdd = 1<<0, /* descriptor done */
};
struct Rd { /* Receive Descriptor */
u32int addr[2];
ushort length;
ushort cksum;
uchar status;
uchar errors;
ushort vlan;
};
enum {
/* Td cmd */
Rs = 1<<3, /* report status */
Ic = 1<<2, /* insert checksum */
Ifcs = 1<<1, /* insert FCS (ethernet crc) */
Teop = 1<<0, /* end of packet */
/* Td status */
Tdd = 1<<0, /* descriptor done */
};
struct Td { /* Transmit Descriptor */
u32int addr[2];
ushort length;
uchar cso;
uchar cmd;
uchar status;
uchar css;
ushort vlan;
};
struct Ctlr {
Pcidev *p;
Ether *edev;
int type;
/* virtual */
u32int *reg;
u32int *msix; /* unused */
/* physical */
u32int *physreg;
u32int *physmsix; /* unused */
uchar flag;
int nrd;
int ntd;
int nrb; /* # bufs this Ctlr has in the pool */
uint rbsz;
int procsrunning;
int attached;
Watermark wmrb;
Watermark wmrd;
Watermark wmtd;
QLock slock;
QLock alock; /* attach lock */
QLock tlock;
Rendez lrendez;
Rendez trendez;
Rendez rrendez;
uint im; /* interrupt mask */
uint lim;
uint rim;
uint tim;
Lock imlock;
Rd* rdba; /* receive descriptor base address */
Block** rb; /* receive buffers */
int rdt; /* receive descriptor tail */
int rdfree; /* rx descriptors awaiting packets */
Td* tdba; /* transmit descriptor base address */
int tdh; /* transmit descriptor head */
int tdt; /* transmit descriptor tail */
Block** tb; /* transmit buffers */
uchar ra[Eaddrlen]; /* receive address */
uchar mta[128]; /* multicast table array */
ulong stats[nelem(stattab)];
uint speeds[3];
};
enum {
I82598 = 1,
I82599,
};
static Ctlr *ctlrtab[4];
static int nctlr;
static Lock rblock;
static Block *rbpool;
static int nrbfull; /* # of rcv Blocks with data awaiting processing */
static void
readstats(Ctlr *ctlr)
{
int i;
qlock(&ctlr->slock);
for(i = 0; i < nelem(ctlr->stats); i++)
ctlr->stats[i] += ctlr->reg[stattab[i].reg >> 2];
qunlock(&ctlr->slock);
}
static int speedtab[] = {
0,
1000,
10000,
};
static long
ifstat(Ether *edev, void *a, long n, ulong offset)
{
uint i, *t;
char *s, *p, *e;
Ctlr *ctlr;
ctlr = edev->ctlr;
p = s = malloc(READSTR);
if(p == nil)
error(Enomem);
e = p + READSTR;
readstats(ctlr);
for(i = 0; i < nelem(stattab); i++)
if(ctlr->stats[i] > 0)
p = seprint(p, e, "%.10s %uld\n", stattab[i].name,
ctlr->stats[i]);
t = ctlr->speeds;
p = seprint(p, e, "speeds: 0:%d 1000:%d 10000:%d\n", t[0], t[1], t[2]);
p = seprint(p, e, "mtu: min:%d max:%d\n", edev->minmtu, edev->maxmtu);
p = seprint(p, e, "rdfree %d rdh %d rdt %d\n", ctlr->rdfree, ctlr->reg[Rdt],
ctlr->reg[Rdh]);
p = seprintmark(p, e, &ctlr->wmrb);
p = seprintmark(p, e, &ctlr->wmrd);
p = seprintmark(p, e, &ctlr->wmtd);
USED(p);
n = readstr(offset, a, n, s);
free(s);
return n;
}
static void
ienable(Ctlr *ctlr, int i)
{
ilock(&ctlr->imlock);
ctlr->im |= i;
ctlr->reg[Ims] = ctlr->im;
iunlock(&ctlr->imlock);
}
static int
lim(void *v)
{
return ((Ctlr*)v)->lim != 0;
}
static void
lproc(void *v)
{
int r, i;
Ctlr *ctlr;
Ether *e;
e = v;
ctlr = e->ctlr;
for (;;) {
r = ctlr->reg[Links];
e->link = (r & Lnkup) != 0;
i = 0;
if(e->link)
i = 1 + ((r & Lnkspd) != 0);
ctlr->speeds[i]++;
e->mbps = speedtab[i];
ctlr->lim = 0;
ienable(ctlr, Lsc);
sleep(&ctlr->lrendez, lim, ctlr);
ctlr->lim = 0;
}
}
static long
ctl(Ether *, void *, long)
{
error(Ebadarg);
return -1;
}
static Block*
rballoc(void)
{
Block *bp;
ilock(&rblock);
if((bp = rbpool) != nil){
rbpool = bp->next;
bp->next = 0;
_xinc(&bp->ref); /* prevent bp from being freed */
}
iunlock(&rblock);
return bp;
}
void
rbfree(Block *b)
{
b->rp = b->wp = (uchar*)PGROUND((uintptr)b->base);
b->flag &= ~(Bipck | Budpck | Btcpck | Bpktck);
ilock(&rblock);
b->next = rbpool;
rbpool = b;
nrbfull--;
iunlock(&rblock);
}
static int
cleanup(Ctlr *ctlr, int tdh)
{
Block *b;
uint m, n;
m = ctlr->ntd - 1;
while(ctlr->tdba[n = NEXTPOW2(tdh, m)].status & Tdd){
tdh = n;
b = ctlr->tb[tdh];
ctlr->tb[tdh] = 0;
if (b)
freeb(b);
ctlr->tdba[tdh].status = 0;
}
return tdh;
}
void
transmit(Ether *e)
{
uint i, m, tdt, tdh;
Ctlr *ctlr;
Block *b;
Td *t;
ctlr = e->ctlr;
if(!canqlock(&ctlr->tlock)){
ienable(ctlr, Itx0);
return;
}
tdh = ctlr->tdh = cleanup(ctlr, ctlr->tdh);
tdt = ctlr->tdt;
m = ctlr->ntd - 1;
for(i = 0; ; i++){
if(NEXTPOW2(tdt, m) == tdh){ /* ring full? */
ienable(ctlr, Itx0);
break;
}
if((b = qget(e->oq)) == nil)
break;
assert(ctlr->tdba != nil);
t = ctlr->tdba + tdt;
t->addr[0] = PCIWADDR(b->rp);
t->length = BLEN(b);
t->cmd = Ifcs | Teop;
if (!Goslow)
t->cmd |= Rs;
ctlr->tb[tdt] = b;
/* note size of queue of tds awaiting transmission */
notemark(&ctlr->wmtd, (tdt + Ntd - tdh) % Ntd);
tdt = NEXTPOW2(tdt, m);
}
if(i) {
coherence();
ctlr->reg[Tdt] = ctlr->tdt = tdt; /* make new Tds active */
coherence();
ienable(ctlr, Itx0);
}
qunlock(&ctlr->tlock);
}
static int
tim(void *c)
{
return ((Ctlr*)c)->tim != 0;
}
static void
tproc(void *v)
{
Ctlr *ctlr;
Ether *e;
e = v;
ctlr = e->ctlr;
for (;;) {
sleep(&ctlr->trendez, tim, ctlr); /* xmit interrupt kicks us */
ctlr->tim = 0;
transmit(e);
}
}
static void
rxinit(Ctlr *ctlr)
{
int i, is598, autoc;
ulong until;
Block *b;
ctlr->reg[Rxctl] &= ~Rxen;
ctlr->reg[Rxdctl] = 0;
for(i = 0; i < ctlr->nrd; i++){
b = ctlr->rb[i];
ctlr->rb[i] = 0;
if(b)
freeb(b);
}
ctlr->rdfree = 0;
coherence();
ctlr->reg[Fctrl] |= Bam;
ctlr->reg[Fctrl] &= ~(Upe | Mpe);
/* intel gets some csums wrong (e.g., errata 44) */
ctlr->reg[Rxcsum] &= ~Ippcse;
ctlr->reg[Hlreg0] &= ~Jumboen; /* jumbos are a bad idea */
ctlr->reg[Hlreg0] |= Txcrcen | Rxcrcstrip | Txpaden;
ctlr->reg[Srrctl] = (ctlr->rbsz + 1024 - 1) / 1024;
ctlr->reg[Mhadd] = ctlr->rbsz << 16;
ctlr->reg[Rbal] = PCIWADDR(ctlr->rdba);
ctlr->reg[Rbah] = 0;
ctlr->reg[Rdlen] = ctlr->nrd*sizeof(Rd); /* must be multiple of 128 */
ctlr->reg[Rdh] = 0;
ctlr->reg[Rdt] = ctlr->rdt = 0;
coherence();
is598 = (ctlr->type == I82598);
if (is598)
ctlr->reg[Rdrxctl] = Rdmt¼;
else {
ctlr->reg[Rdrxctl] |= Crcstrip;
ctlr->reg[Rdrxctl] &= ~Rscfrstsize;
}
if (Goslow && is598)
ctlr->reg[Rxdctl] = 8<<Wthresh | 8<<Pthresh | 4<<Hthresh | Renable;
else
ctlr->reg[Rxdctl] = Renable;
coherence();
/*
* don't wait forever like an idiot (and hang the system),
* maybe it's disconnected.
*/
until = TK2MS(MACHP(0)->ticks) + 250;
while (!(ctlr->reg[Rxdctl] & Renable) && TK2MS(MACHP(0)->ticks) < until)
;
if(!(ctlr->reg[Rxdctl] & Renable))
print("#l%d: Renable didn't come on, might be disconnected\n",
ctlr->edev->ctlrno);
ctlr->reg[Rxctl] |= Rxen | (is598? Dmbyps: 0);
if (is598){
autoc = ctlr->reg[Autoc];
/* what is this rubbish and why do we care? */
print("#l%d: autoc %#ux; lms %d (3 is 10g sfp)\n",
ctlr->edev->ctlrno, autoc, (autoc>>Lmsshift) & Lmsmask);
ctlr->reg[Autoc] |= Flu;
coherence();
delay(50);
}
}
static void
replenish(Ctlr *ctlr, uint rdh)
{
int rdt, m, i;
Block *b;
Rd *r;
m = ctlr->nrd - 1;
i = 0;
for(rdt = ctlr->rdt; NEXTPOW2(rdt, m) != rdh; rdt = NEXTPOW2(rdt, m)){
r = ctlr->rdba + rdt;
if((b = rballoc()) == nil){
print("#l%d: no buffers\n", ctlr->edev->ctlrno);
break;
}
ctlr->rb[rdt] = b;
r->addr[0] = PCIWADDR(b->rp);
r->status = 0;
ctlr->rdfree++;
i++;
}
if(i) {
coherence();
ctlr->reg[Rdt] = ctlr->rdt = rdt; /* hand back recycled rdescs */
coherence();
}
}
static int
rim(void *v)
{
return ((Ctlr*)v)->rim != 0;
}
void
rproc(void *v)
{
int passed;
uint m, rdh;
Block *bp;
Ctlr *ctlr;
Ether *e;
Rd *r;
e = v;
ctlr = e->ctlr;
m = ctlr->nrd - 1;
for (rdh = 0; ; ) {
replenish(ctlr, rdh);
ienable(ctlr, Irx0);
sleep(&ctlr->rrendez, rim, ctlr);
passed = 0;
for (;;) {
ctlr->rim = 0;
r = ctlr->rdba + rdh;
if(!(r->status & Rdd))
break; /* wait for pkts to arrive */
bp = ctlr->rb[rdh];
ctlr->rb[rdh] = 0;
if (r->length > ETHERMAXTU)
print("#l%d: got jumbo of %d bytes\n",
e->ctlrno, r->length);
bp->wp += r->length;
bp->lim = bp->wp; /* lie like a dog */
// r->status = 0;
ilock(&rblock);
nrbfull++;
iunlock(&rblock);
notemark(&ctlr->wmrb, nrbfull);
etheriq(e, bp, 1);
passed++;
ctlr->rdfree--;
rdh = NEXTPOW2(rdh, m);
if (ctlr->rdfree <= ctlr->nrd - 16)
replenish(ctlr, rdh);
}
/* note how many rds had full buffers */
notemark(&ctlr->wmrd, passed);
}
}
static void
promiscuous(void *a, int on)
{
Ctlr *ctlr;
Ether *e;
e = a;
ctlr = e->ctlr;
if(on)
ctlr->reg[Fctrl] |= Upe | Mpe;
else
ctlr->reg[Fctrl] &= ~(Upe | Mpe);
}
static void
multicast(void *a, uchar *ea, int on)
{
int b, i;
Ctlr *ctlr;
Ether *e;
e = a;
ctlr = e->ctlr;
/*
* multiple ether addresses can hash to the same filter bit,
* so it's never safe to clear a filter bit.
* if we want to clear filter bits, we need to keep track of
* all the multicast addresses in use, clear all the filter bits,
* then set the ones corresponding to in-use addresses.
*/
i = ea[5] >> 1;
b = (ea[5]&1)<<4 | ea[4]>>4;
b = 1 << b;
if(on)
ctlr->mta[i] |= b;
// else
// ctlr->mta[i] &= ~b;
ctlr->reg[Mta+i] = ctlr->mta[i];
}
static void
freemem(Ctlr *ctlr)
{
Block *b;
while(b = rballoc()){
b->free = 0;
freeb(b);
}
free(ctlr->rdba);
ctlr->rdba = nil;
free(ctlr->tdba);
ctlr->tdba = nil;
free(ctlr->rb);
ctlr->rb = nil;
free(ctlr->tb);
ctlr->tb = nil;
}
static int
detach(Ctlr *ctlr)
{
int i, is598;
ctlr->reg[Imc] = ~0;
ctlr->reg[Ctrl] |= Rst;
for(i = 0; i < 100; i++){
delay(1);
if((ctlr->reg[Ctrl] & Rst) == 0)
break;
}
if (i >= 100)
return -1;
is598 = (ctlr->type == I82598);
if (is598) { /* errata */
delay(50);
ctlr->reg[Ecc] &= ~(1<<21 | 1<<18 | 1<<9 | 1<<6);
}
/* not cleared by reset; kill it manually. */
for(i = 1; i < 16; i++)
ctlr->reg[is598? Rah98: Rah99] &= ~Enable;
for(i = 0; i < 128; i++)
ctlr->reg[Mta + i] = 0;
for(i = 1; i < (is598? 640: 128); i++)
ctlr->reg[Vfta + i] = 0;
// freemem(ctlr); // TODO
ctlr->attached = 0;
return 0;
}
static void
shutdown(Ether *e)
{
detach(e->ctlr);
// freemem(e->ctlr);
}
/* ≤ 20ms */
static ushort
eeread(Ctlr *ctlr, int i)
{
ctlr->reg[Eerd] = EEstart | i<<2;
while((ctlr->reg[Eerd] & EEdone) == 0)
;
return ctlr->reg[Eerd] >> 16;
}
static int
eeload(Ctlr *ctlr)
{
ushort u, v, p, l, i, j;
if((eeread(ctlr, 0) & 0xc0) != 0x40)
return -1;
u = 0;
for(i = 0; i < 0x40; i++)
u += eeread(ctlr, i);
for(i = 3; i < 0xf; i++){
p = eeread(ctlr, i);
l = eeread(ctlr, p++);
if((int)p + l + 1 > 0xffff)
continue;
for(j = p; j < p + l; j++)
u += eeread(ctlr, j);
}
if(u != 0xbaba)
return -1;
if(ctlr->reg[Status] & (1<<3))
u = eeread(ctlr, 10);
else
u = eeread(ctlr, 9);
u++;
for(i = 0; i < Eaddrlen;){
v = eeread(ctlr, u + i/2);
ctlr->ra[i++] = v;
ctlr->ra[i++] = v>>8;
}
ctlr->ra[5] += (ctlr->reg[Status] & 0xc) >> 2;
return 0;
}
static int
reset(Ctlr *ctlr)
{
int i, is598;
uchar *p;
if(detach(ctlr)){
print("82598: reset timeout\n");
return -1;
}
if(eeload(ctlr)){
print("82598: eeprom failure\n");
return -1;
}
p = ctlr->ra;
is598 = (ctlr->type == I82598);
ctlr->reg[is598? Ral98: Ral99] = p[3]<<24 | p[2]<<16 | p[1]<<8 | p[0];
ctlr->reg[is598? Rah98: Rah99] = p[5]<<8 | p[4] | Enable;
readstats(ctlr);
for(i = 0; i<nelem(ctlr->stats); i++)
ctlr->stats[i] = 0;
ctlr->reg[Ctrlext] |= 1 << 16; /* required by errata (spec change 4) */
if (Goslow) {
/* make some guesses for flow control */
ctlr->reg[Fcrtl] = 0x10000 | Enable;
ctlr->reg[Fcrth] = 0x40000 | Enable;
ctlr->reg[Rcrtv] = 0x6000;
} else
ctlr->reg[Fcrtl] = ctlr->reg[Fcrth] = ctlr->reg[Rcrtv] = 0;
/* configure interrupt mapping (don't ask) */
ctlr->reg[Ivar+0] = 0 | 1<<7;
ctlr->reg[Ivar+64/4] = 1 | 1<<7;
// ctlr->reg[Ivar+97/4] = (2 | 1<<7) << (8*(97%4));
if (Goslow) {
/* interrupt throttling goes here. */
for(i = Itr; i < Itr + 20; i++)
ctlr->reg[i] = 128; /* ¼µs intervals */
ctlr->reg[Itr + Itx0] = 256;
} else { /* don't throttle */
for(i = Itr; i < Itr + 20; i++)
ctlr->reg[i] = 0; /* ¼µs intervals */
ctlr->reg[Itr + Itx0] = 0;
}
return 0;
}
static void
txinit(Ctlr *ctlr)
{
Block *b;
int i;
if (Goslow)
ctlr->reg[Txdctl] = 16<<Wthresh | 16<<Pthresh;
else
ctlr->reg[Txdctl] = 0;
if (ctlr->type == I82599)
ctlr->reg[Dtxctl99] = 0;
coherence();
for(i = 0; i < ctlr->ntd; i++){
b = ctlr->tb[i];
ctlr->tb[i] = 0;
if(b)
freeb(b);
}
assert(ctlr->tdba != nil);
memset(ctlr->tdba, 0, ctlr->ntd * sizeof(Td));
ctlr->reg[Tdbal] = PCIWADDR(ctlr->tdba);
ctlr->reg[Tdbah] = 0;
ctlr->reg[Tdlen] = ctlr->ntd*sizeof(Td); /* must be multiple of 128 */
ctlr->reg[Tdh] = 0;
ctlr->tdh = ctlr->ntd - 1;
ctlr->reg[Tdt] = ctlr->tdt = 0;
coherence();
if (ctlr->type == I82599)
ctlr->reg[Dtxctl99] |= Te;
coherence();
ctlr->reg[Txdctl] |= Ten;
coherence();
while (!(ctlr->reg[Txdctl] & Ten))
;
}
static void
attach(Ether *e)
{
Block *b;
Ctlr *ctlr;
char buf[KNAMELEN];
ctlr = e->ctlr;
ctlr->edev = e; /* point back to Ether* */
qlock(&ctlr->alock);
if(waserror()){
reset(ctlr);
freemem(ctlr);
qunlock(&ctlr->alock);
nexterror();
}
if(ctlr->rdba == nil) {
ctlr->nrd = Nrd;
ctlr->ntd = Ntd;
ctlr->rdba = mallocalign(ctlr->nrd * sizeof *ctlr->rdba,
Descalign, 0, 0);
ctlr->tdba = mallocalign(ctlr->ntd * sizeof *ctlr->tdba,
Descalign, 0, 0);
ctlr->rb = malloc(ctlr->nrd * sizeof(Block *));
ctlr->tb = malloc(ctlr->ntd * sizeof(Block *));
if (ctlr->rdba == nil || ctlr->tdba == nil ||
ctlr->rb == nil || ctlr->tb == nil)
error(Enomem);
for(ctlr->nrb = 0; ctlr->nrb < 2*Nrb; ctlr->nrb++){
b = allocb(ctlr->rbsz + BY2PG); /* see rbfree() */
if(b == nil)
error(Enomem);
b->free = rbfree;
freeb(b);
}
}
if (!ctlr->attached) {
rxinit(ctlr);
txinit(ctlr);
nrbfull = 0;
if (!ctlr->procsrunning) {
snprint(buf, sizeof buf, "#l%dl", e->ctlrno);
kproc(buf, lproc, e);
snprint(buf, sizeof buf, "#l%dr", e->ctlrno);
kproc(buf, rproc, e);
snprint(buf, sizeof buf, "#l%dt", e->ctlrno);
kproc(buf, tproc, e);
ctlr->procsrunning = 1;
}
initmark(&ctlr->wmrb, Nrb, "rcv bufs unprocessed");
initmark(&ctlr->wmrd, Nrd-1, "rcv descrs processed at once");
initmark(&ctlr->wmtd, Ntd-1, "xmit descr queue len");
ctlr->attached = 1;
}
qunlock(&ctlr->alock);
poperror();
}
static void
interrupt(Ureg*, void *v)
{
int icr, im;
Ctlr *ctlr;
Ether *e;
e = v;
ctlr = e->ctlr;
ilock(&ctlr->imlock);
ctlr->reg[Imc] = ~0; /* disable all intrs */
im = ctlr->im;
while((icr = ctlr->reg[Icr] & ctlr->im) != 0){
if(icr & Irx0){
im &= ~Irx0;
ctlr->rim = Irx0;
wakeup(&ctlr->rrendez);
}
if(icr & Itx0){
im &= ~Itx0;
ctlr->tim = Itx0;
wakeup(&ctlr->trendez);
}
if(icr & Lsc){
im &= ~Lsc;
ctlr->lim = Lsc;
wakeup(&ctlr->lrendez);
}
}
ctlr->reg[Ims] = ctlr->im = im; /* enable only intrs we didn't service */
iunlock(&ctlr->imlock);
}
static void
scan(void)
{
int pciregs, pcimsix, type;
ulong io, iomsi;
void *mem, *memmsi;
Ctlr *ctlr;
Pcidev *p;
p = 0;
while(p = pcimatch(p, Vintel, 0)){
switch(p->did){
case 0x10b6: /* 82598 backplane */
case 0x10c6: /* 82598 af dual port */
case 0x10c7: /* 82598 af single port */
case 0x10dd: /* 82598 at cx4 */
case 0x10ec: /* 82598 at cx4 dual port */
pcimsix = 3;
type = I82598;
break;
case 0x10f7: /* 82599 kx/kx4 */
case 0x10f8: /* 82599 kx/kx4/kx */
case 0x10f9: /* 82599 cx4 */
case 0x10fb: /* 82599 sfi/sfp+ */
case 0x10fc: /* 82599 xaui/bx4 */
case 0x1557: /* 82599 single-port sfi */
pcimsix = 4;
type = I82599;
break;
default:
continue;
}
pciregs = 0;
if(nctlr >= nelem(ctlrtab)){
print("i82598: too many controllers\n");
return;
}
io = p->mem[pciregs].bar & ~0xf;
mem = vmap(io, p->mem[pciregs].size);
if(mem == nil){
print("i82598: can't map regs %#p\n",
p->mem[pciregs].bar);
continue;
}
iomsi = p->mem[pcimsix].bar & ~0xf;
memmsi = vmap(iomsi, p->mem[pcimsix].size);
if(memmsi == nil){
print("i82598: can't map msi-x regs %#p\n",
p->mem[pcimsix].bar);
vunmap(mem, p->mem[pciregs].size);
continue;
}
ctlr = malloc(sizeof *ctlr);
if(ctlr == nil) {
vunmap(mem, p->mem[pciregs].size);
vunmap(memmsi, p->mem[pcimsix].size);
error(Enomem);
}
ctlr->p = p;
ctlr->type = type;
ctlr->physreg = (u32int*)io;
ctlr->physmsix = (u32int*)iomsi;
ctlr->reg = (u32int*)mem;
ctlr->msix = (u32int*)memmsi; /* unused */
ctlr->rbsz = Rbsz;
if(reset(ctlr)){
print("i82598: can't reset\n");
free(ctlr);
vunmap(mem, p->mem[pciregs].size);
vunmap(memmsi, p->mem[pcimsix].size);
continue;
}
pcisetbme(p);
ctlrtab[nctlr++] = ctlr;
}
}
static int
pnp(Ether *e)
{
int i;
Ctlr *ctlr;
if(nctlr == 0)
scan();
ctlr = nil;
for(i = 0; i < nctlr; i++){
ctlr = ctlrtab[i];
if(ctlr == nil || ctlr->flag & Factive)
continue;
if(e->port == 0 || e->port == (ulong)ctlr->reg)
break;
}
if (i >= nctlr)
return -1;
ctlr->flag |= Factive;
e->ctlr = ctlr;
e->port = (uintptr)ctlr->physreg;
e->irq = ctlr->p->intl;
e->tbdf = ctlr->p->tbdf;
e->mbps = 10000;
e->maxmtu = ETHERMAXTU;
memmove(e->ea, ctlr->ra, Eaddrlen);
e->arg = e;
e->attach = attach;
e->detach = shutdown;
e->transmit = transmit;
e->interrupt = interrupt;
e->ifstat = ifstat;
e->shutdown = shutdown;
e->ctl = ctl;
e->multicast = multicast;
e->promiscuous = promiscuous;
return 0;
}
void
ether82598link(void)
{
addethercard("i82598", pnp);
addethercard("i10gbe", pnp);
}
|