/*
* xen ethernet driver derived from:
* Etherlink III, Fast EtherLink and Fast EtherLink XL adapters.
* and Linux Xen Front end driver
* To do:
* clean up and fix up by plan 9 experts
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "../port/error.h"
#include "../port/netif.h"
#include "../xen/xennet.h"
static char *status_name[] = {
[NETIF_INTERFACE_STATUS_CLOSED] = "closed",
[NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
[NETIF_INTERFACE_STATUS_CONNECTED] = "connected",
[NETIF_INTERFACE_STATUS_CHANGED] = "changed",
};
/* fix me later ... this xen ether breaks a lot of rules ... not my code ... RGM */
void queue_machphys_update(ulong mfn, ulong pfn);
#include "etherif.h"
#define LOG(a)
#define DPRINTK dp
#define WPRINTK dp
#define IPRINTK dp
#define XCVRDEBUG if(1)print
#define DEBUG
/******************************************************************************
* Virtual network driver for conversing with remote driver backends.
*
* Copyright (c) 2002-2004, K A Fraser
*/
#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
static unsigned long rx_pfn_array[NETIF_RX_RING_SIZE];
static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE+1];
static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
struct Desc {
/* can't do this until xspanalloc is fixed ... */
// unsigned char page[BY2PG];
ulong stat;
ulong size;
ulong offset;
unsigned char *page; /* will point into a page-aligned place in data */
/* just to mess around -- we're having free list corruption and
* it occurred in at least one case when two of these desc's
* butted up against each other
* so make it four not 2
* (later) turned out to bea long-standing bug in
* xspanalloc! But leave this here for now anyway.
*/
unsigned char data[4*BY2PG];
struct Desc *next;
};
int desc_alloc_count = 0, desc_free_count = 0;
void *descarray[2048];
void
recordalloc(void *p) {
int i, found;
for(i = found = 0; (! found) && (i < 2048); i++) {
if (descarray[i] == 0) {
descarray[i] = p;
found++;
}
}
}
void
recordfree(void *p) {
int i;
for(i = 0; i < 2048; i++) {
if (descarray[i] == p) {
descarray[i] = 0;
break;
}
}
}
void
dumplist(void) {
int i;
for(i = 0; i < 2048; i++) {
void *p = descarray[i];
ulong *l;
if (! p)
continue;
l = (ulong *) ((ulong) p - 8);
dp("%p: size 0x%ulx magic 0x%ulx\n", p, *l, l[1]);
}
panic("shit");
}
void
checkit(char *s) {
int i;
for(i = 0; i < 2048; i++) {
void *p = descarray[i];
ulong *l;
if (! p)
continue;
l = (ulong *) ((ulong) p - 4);
if (*l != 0x484f4c45) {
dp("%s: For %p hole is bad: 0x%ulx, mfn 0x%ulx\n",
s, p, *l, xen_mm_mfn(l));
dumplist();
}
}
}
struct Desc *descalloc(char *) {
struct Desc *d;
unsigned long p;
checkit("PREALLOC");
d = xallocz(sizeof(*d), 0);
/* I need to learn how to use waserror/poperror better */
if (! d)
return 0;
p = (unsigned long) d->data;
p = (p + 2*BY2PG) & ~(BY2PG-1);
d->page = (unsigned char *) p;
/* not atomic, sorry */
desc_alloc_count++;
/*
dp("DESC: %s: alloc %p(0x%ulx), page %p(0x%ulx)\n",
s, d, xen_mm_mfn(d), d->page, xen_mm_mfn(d->page));
*/
recordalloc(d);
checkit("ALLOC");
return d;
}
void descfree(struct Desc *d, char *) {
/* dp("DESC: %s: free %p(0x%ulx), page %p(0x%ulx)\n",
type, d, xen_mm_mfn(d), d->page, xen_mm_mfn(d->page));
*/
checkit("FREE");
recordfree(d);
xfree(d);
desc_free_count++;
}
typedef struct Desc Desc;
struct Ctlr
{
Lock; /* consider a Qlock here a la the rhine */
struct netif_st *next;
Ether *dev;
int attached;
NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
unsigned int tx_full, txbusy;
Block *txbp;
netif_tx_interface_t *tx;
netif_rx_interface_t *rx;
Lock tx_lock;
Lock rx_lock;
unsigned int handle;
unsigned int evtchn;
unsigned int irq;
int interrupts;
/* What is the status of our connection to the remote backend? */
#define BEST_CLOSED 0
#define BEST_DISCONNECTED 1
#define BEST_CONNECTED 2
unsigned int backend_state;
/* Is this interface open or closed (down or up)? */
#define UST_CLOSED 0
#define UST_OPEN 1
unsigned int user_state;
Desc *txd[NETIF_TX_RING_SIZE];
Desc *rxd[NETIF_RX_RING_SIZE];
};
static char *be_state_name[] = {
[BEST_CLOSED] = "closed",
[BEST_DISCONNECTED] = "disconnected",
[BEST_CONNECTED] = "connected",
};
/* keep it simple. Just statically allocate it for maxether. */
typedef struct Ctlr Ctlr;
Ctlr controllers[MaxEther];
/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
#define ADD_ID_TO_FREELIST(_list, _id) \
(_list)[(_id)] = (_list)[0]; \
(_list)[0] = (void *)(unsigned long)(_id);
/* I hate this kind of code anyway ...
#define GET_ID_FROM_FREELIST(_list) \
({ unsigned long _id = (unsigned long)(_list)[0]; \
(_list)[0] = (_list)[_id]; \
(unsigned short)_id; })
*/
unsigned short GET_ID_FROM_FREELIST(Desc *_list[], int max) {
unsigned short _id = (unsigned short)(_list)[0];
if (_id > max)
panic("Bogus ID in GET_ID_FROM_FREELIST: 0x%x, max 0x%x\n",
_id, max);
_list[0] = _list[_id];
/*
LOG(dp("get id from freelist _list %p _id %d\n", _list, _id);)
LOG(dp("New list[0] is %d\n", _list[0]);)
*/
return _id;
}
void dumpit(void *x, int size)
{
int i, j;
unsigned char *cp = x;
dp("New packet: %p %d bytes\n", x, size);
for(i = 0; i < size; i += 16) {
dp("0x%x: ", i);
for(j = 0; j < 16 && (i+j) < size; j++) {
dp("%02x ", cp[i+j]);
}
dp("\n");
}
dp("end of packet\n");
}
static struct Ether *find_dev_by_handle(unsigned int handle)
{
struct Ctlr *np = &controllers[handle];
print("XENFE:find_dev_by_handle %d np %p np->dev %p\n",
handle, np, np->dev);
return np->dev;
}
/** Network interface info. */
struct netif_ctrl {
/** Number of interfaces. */
int interface_n;
/** Number of connected interfaces. */
int connected_n;
/** Error code. */
int err;
int iface_up;
};
static struct netif_ctrl netctrl;
static void netctrl_init(void)
{
memset(&netctrl, 0, sizeof(netctrl));
netctrl.interface_n = -1;
}
/** Get or set a network interface error.
*/
static int netctrl_err(int err)
{
if(err < 0 && !netctrl.err){
netctrl.err = err;
print("XENFE:netctrl_err err=%d\n", err);
}
return netctrl.err;
}
/** Test if all network interfaces are connected.
*
* @return 1 if all connected, 0 if not, negative error code otherwise
*/
static int netctrl_connected(void)
{
int ok;
ok = (netctrl.err ? netctrl.err :
(netctrl.connected_n == netctrl.interface_n));
return ok;
}
/** Count the connected network interfaces.
*
* @return connected count
*/
static int netctrl_connected_count(void)
{
struct Ctlr *np = controllers;
unsigned int connected;
int i;
connected = 0;
for(i = 0; i < MaxEther; i++, np++);
{
if ( np->backend_state == BEST_CONNECTED )
connected++;
}
LOG(dp("XENFE: connected count is now %d\n", connected);)
netctrl.connected_n = connected;
return connected;
}
static void network_tx_buf_gc(Ctlr *np)
{
#undef LOG
#define LOG(a)
NETIF_RING_IDX i, prod;
unsigned short id;
Desc *d;
LOG(dp("network_tx_buf_gc backend state is %d, BEST_CONNECTED is %d\n",
np->backend_state, BEST_CONNECTED);)
if ( np->backend_state != BEST_CONNECTED )
return;
LOG(dp("network_tx_buf_gc resp_prod %d resp_cons %d req_prd %d\n",
np->tx->resp_prod, np->tx_resp_cons, np->tx->req_prod);)
checkit("TX_BUF_GC START");
do {
prod = np->tx->resp_prod;
for ( i = np->tx_resp_cons; i != prod; i++ )
{
#undef LOG
#define LOG(a)
id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
LOG(dp("id. Index in ring %d, gc to free is %d\n",
MASK_NETIF_TX_IDX(i), id);)
d = np->txd[id];
LOG(dp(" d for that is %p\n", d);)
ADD_ID_TO_FREELIST(np->txd, id);
LOG(dp("so free %p\n", d);)
descfree(d, "TX");
}
#undef LOG
#define LOG(a)
np->tx_resp_cons = prod;
/*
* Set a new event, then check for race with update of tx_cons. Note
* that it is essential to schedule a callback, no matter how few
* buffers are pending. Even if there is space in the transmit ring,
* higher layers may be blocked because too much data is outstanding:
* in such cases notification from Xen is likely to be the only kick
* that we'll get.
*/
np->tx->event =
prod + ((np->tx->req_prod - prod) >> 1) + 1;
mb();
}
while ( prod != np->tx->resp_prod );
if ( np->tx_full &&
((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE) )
{
np->tx_full = 0;
// if ( np->user_state == UST_OPEN )
// netif_wake_queue(dev);
}
checkit("TX_BUF_GC_END");
#undef LOG
#define LOG(a)
}
static void network_alloc_rx_buffers(Ctlr *np)
{
unsigned short id;
NETIF_RING_IDX i = np->rx->req_prod;
int nr_pfns = 0;
Desc *d;
int xen_mm_decrease_reservation(unsigned long *pfn_array, int npfn);
#undef LOG
#define LOG(a)
LOG(dp("newtwork_alloc_rx_buffers\n");)
/* Make sure the batch is large enough to be worthwhile (1/2 ring). */
if (((i - np->rx_resp_cons) > (NETIF_RX_RING_SIZE/2)) ||
(np->backend_state != BEST_CONNECTED) )
return;
LOG(dp("network_alloc_rx_buffers and i is %d np->rx_resp_cons is %d\n",
i, np->rx_resp_cons);)
do {
LOG(dp(" top of loop ...\n");)
d = descalloc("RX");
LOG(dp("descalloc d returns %p\n", d);)
if ( d== nil)
break;
/**/
id = GET_ID_FROM_FREELIST(np->rxd, NETIF_RX_RING_SIZE);
LOG(dp("np->rxd is %p, i is %d, d is %p\n", np->rxd, id, d);)
np->rxd[id] = d;
LOG(dp("np->rx->ring is %p\n", np->rx->ring);)
np->rx->ring[MASK_NETIF_RX_IDX(i)].req.id = id;
LOG(dp("rx_pfn_array is %p, nr_pfns is %d, PADDR(d)) is 0x%ulx\n",
rx_pfn_array, nr_pfns, PADDR(d->page));)
/**/
/* the below is cute and all; it just doesn't work.
* the error control is just too coarse-graind; which one of the
* many ops failed? You don't really know. So we do it slower
* but with more knowledge.
* we can fix it later once we're more sure that this is all
* working right.
*/
if (set_va_mfn(d->page, 0, 0)) {
dp("XENFE: Failed to set va %p to 0 and 0\n", d->page);
}
/**/
rx_pfn_array[nr_pfns] = xen_mm_mfn(d->page) >> PGSHIFT;
/* * /
rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping;
rx_mcl[nr_pfns].args[0] = (unsigned long)d->page >> PGSHIFT;
rx_mcl[nr_pfns].args[1] = 0;
rx_mcl[nr_pfns].args[2] = 0;
/* */
LOG(dp("bottom of loop ...\n");)
nr_pfns++;
}
while ( (++i - np->rx_resp_cons) != NETIF_RX_RING_SIZE );
LOG(dp(" nr_pfns is %d\n", nr_pfns);)
if ((nr_pfns == 0) )
return;
/*
* We may have allocated buffers which have entries outstanding in the page
* update queue -- make sure we flush those first!
*/
_flush_page_update_queue();
#ifdef NOT
/* After all PTEs have been zapped we blow away stale TLB entries. */
/* done above in non-multi-call
rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB; */
/* Give away a batch of pages. */
rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
rx_mcl[nr_pfns].args[0] = MEMOP_decrease_reservation;
rx_mcl[nr_pfns].args[1] = (unsigned long)rx_pfn_array;
rx_mcl[nr_pfns].args[2] = (unsigned long)nr_pfns;
rx_mcl[nr_pfns].args[3] = 0;
rx_mcl[nr_pfns].args[4] = DOMID_SELF;
/* Zap PTEs and give away pages in one big multicall. */
(void)HYPERVISOR_multicall(rx_mcl, nr_pfns+1);
/* Check return status of HYPERVISOR_dom_mem_op(). */
if ( rx_mcl[nr_pfns].args[5] != nr_pfns )
panic("Unable to reduce memory reservation, err 0x%x\n",
rx_mcl[nr_pfns].args[5]);
#endif
if (xen_mm_decrease_reservation(rx_pfn_array, nr_pfns) < 0) {
dp("XENFE: decrease reservation failed\n");
}
np->rx->req_prod = i;
LOG(dp("ALL DONE i is %d\n", i);)
#undef LOG
#define LOG(a)
}
static int network_start_xmit(Ctlr *np, void *data, int size)
{
#undef LOG
#define LOG(a)
unsigned short id;
Desc *d;
netif_tx_request_t *tx;
NETIF_RING_IDX i;
extern int faultpanic;
if (size > BY2PG) {
panic("xen network_start_xmit: size %d > 4096\n", size);
}
if ((np->tx_full) )
{
print("%s: full queue wasn't stopped!\n","ether");
return -1;
}
/* xspanalloc is broken, you can not free memory allocated with
* xspanalloc!
*/
//d = xspanalloc(sizeof(*d), BY2PG, 0);
/* so we allocate 2x the size of d, then take a page from the middle
* for data. blech.
*/
d = descalloc("TX");
LOG(dp("XENFE: tx: allocated %p\n", d);)
if (! d) {
print("Trouble in network_start_xmit: descalloc failed\n");
return -1;
}
d->size = size;
memmove(d->page, data, size);
ilock(&np->tx_lock);
if (np->backend_state != BEST_CONNECTED )
{
iunlock(&np->tx_lock);
return 1;
}
i = np->tx->req_prod;
id = GET_ID_FROM_FREELIST(np->txd, NETIF_TX_RING_SIZE);
np->txd[id] = d;
tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
LOG(dp("np->tx is %p, np->tx->ring is %p\n", np->tx, np->tx->ring);)
LOG(dp("tx is %p, PADDR(d)) is 0x%lx, size is %d\n", tx, PADDR(d), d->size);)
LOG(dp("offsets: &tx->addr is %p, &tx->id is %p, &tx->size is %p\n",
&tx->addr, &tx->id, &tx->size);)
faultpanic = 0;
LOG(dp("tx->id BEFORE assign is %d, tx->addr is 0x%lx, size is 0x%lx\n",
tx->id, tx->addr, tx->size);)
LOG(dp("assign id %d\n", id);)
tx->id = id;
LOG(dp("E");)
checkit("XMITMIDDLE");
tx->addr = xen_mm_mfn(d->page);
tx->size = d->size;
wmb();
LOG(dp("network_stat_xmit: id %d, addr %p, size %d, set req_prod to %d\n",
id, d, d->size, i+1);)
np->tx->req_prod = i + 1;
network_tx_buf_gc(np);
if ( (i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1) )
{
np->tx_full = 1;
// netif_stop_queue(dev);
}
iunlock(&np->tx_lock);
/*
np->stats.tx_bytes += d->size;
np->stats.tx_packets++;
*/
/* Only notify Xen if there are no outstanding responses. */
mb();
// if ( np->tx->resp_prod == i ) {
if (1) {
LOG(dp("network_start_xmit: notify via evtchn %d\n",
np->evtchn);)
notify_via_evtchn(np->evtchn);
/*
LOG(dp(" TRY A GC\n");)
network_tx_buf_gc(np);
LOG(dp(" DONE THE GC\n");)
*/
}
checkit("XMIT DONE");
LOG(dp("network_start_xmit: done\n");)
return 0;
#undef LOG
#define LOG(a)
}
static int netif_poll(Ether *ether)
{
#undef LOG
#define LOG(a)
extern unsigned long *mfn;
unsigned char *packet;
Ctlr *np = ether->ctlr;
Desc *d = 0, *newd = 0;
netif_rx_response_t *rx;
NETIF_RING_IDX i;
/* mmu_update_t *mmu = rx_mmu;*/
multicall_entry_t *mcl = rx_mcl;
LOG(dp("netif_poll\n");)
np->interrupts++;
ilock(&np->rx_lock);
if ( np->backend_state != BEST_CONNECTED )
{
LOG(dp("XENFE: poll on unconnected %p\n", ether);)
iunlock(&np->rx_lock);
return 0;
}
LOG(dp("i will be from rx_resp_cons(%d) to rx->resp_prod(%d))\n",
np->rx_resp_cons, np->rx->resp_prod);)
#undef LOG
#define LOG(a)
for ( i = np->rx_resp_cons;
i != np->rx->resp_prod;
i++ )
{
rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
LOG(dp("XENFE: poll: rx is %p, status %d, addr 0x%ulx\n", rx, rx->status, rx->addr);)
/*
* An error here is very odd. Usually indicates a backend bug,
* low-memory condition, or that we didn't have reservation headroom.
* Whatever - print an error and queue the id again straight away.
*/
if (rx->status <= 0)
{
LOG(dp("Status for el %d is <= 0 (0x%x)\n", i, rx->status);)
/* Gate this error. We get a (valid) slew of them on suspend. */
if ( np->user_state == UST_OPEN ) {
LOG(dp( "bad buffer on RX ring!(%d)\n", rx->status);)
}
np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
wmb();
np->rx->req_prod++;
continue;
}
LOG(dp("id is %d\n", rx->id);)
LOG(dp("np %p np->rxd %p\n", np, np->rxd);)
if (! d) {
d = np->rxd[rx->id];
LOG(dp("d %p\n", d);)
d->next = 0;
newd = d;
LOG(dp("newd %p\n", newd);)
}
else {
newd->next = np->rxd[rx->id];
newd = newd->next;
newd->next = 0;
}
LOG(dp("Collected Desc %p id %d\n", newd, rx->id);)
ADD_ID_TO_FREELIST(np->rxd, rx->id);
/*
np->stats.rx_packets++;
np->stats.rx_bytes += rx->status;
*/
LOG(dp("time to remap the page\n");)
/* Remap the page. */
LOG(dp("remap rx->addr %p (PPN 0x%ulx) to be 0x%ulx (PADDR 0x%ulx))\n",
rx->addr, PPN(rx->addr), newd->page, PADDR(newd->page));)
#ifdef NOT
mmu->ptr = PPN(rx->addr) | MMU_MACHPHYS_UPDATE;
mmu->val = PADDR(newd->page) >> PGSHIFT;
mmu++;
mcl->op = __HYPERVISOR_update_va_mapping;
mcl->args[0] = (unsigned long)newd->page >> PGSHIFT;
mcl->args[1] = PPN(rx->addr) | KZERO;
mcl->args[2] = 0;
mcl++;
#endif
mfn[PADDR(newd->page) >> PGSHIFT] = rx->addr >> PGSHIFT;
queue_machphys_update(PPN(rx->addr)>>PGSHIFT,
PADDR(newd->page)>>PGSHIFT);
_flush_page_update_queue();
set_va_mfn(newd->page, rx->addr>>PGSHIFT, PTEWRITE|PTEVALID);
_flush_page_update_queue();
newd->offset = rx->addr & (BY2PG-1);
newd->size = rx->status;
}
if ((mcl-rx_mcl ) > 0) {
LOG(dp("mcl-rx_mcl is %d\n", mcl-rx_mcl);)
}
/* Do all the remapping work, and M->P updates, in one big hypercall. */
#ifdef NOT
if ((mcl - rx_mcl) != 0)
{
mcl->op = __HYPERVISOR_mmu_update;
mcl->args[0] = (unsigned long)rx_mmu;
mcl->args[1] = mmu - rx_mmu;
mcl->args[2] = 0;
mcl++;
(void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
}
LOG(dp("DONE multicall\n");)
#endif
while (d)
{
Block *bp;
Desc *nextd;
/* throw away the MAC header? no */
packet = &d->page[d->offset];
LOG(dp("packet is %p and offset is %d\n", packet, d->offset);)
LOG(dp("Packet size is %d bytes\n", d->size);)
bp = iallocb(d->size);
if (! bp) {
/* too bad, drop it */
nextd = d->next;
descfree(d, "RXDROP");
d = nextd;
continue;
}
LOG(dp("bp->rp is %p and packet is %p\n", bp->rp, packet);)
// dp("and val of first byte of page is 0x%ux\n",
// *(unsigned char *)d->page);
// dumpit(packet, d->size);
memmove(bp->rp, packet, d->size);
bp->wp = bp->rp + d->size;
nextd = d->next;
descfree(d, "RXOK");
LOG(dp("etheriq bp %p size %d\n", bp, d->size);)
d = nextd;
etheriq(ether, bp, 1);
}
np->rx_resp_cons = i;
// LOG(dp("Set rx_resp_cons to %d\n", i);)
network_alloc_rx_buffers(np);
// LOG(dp("Set np->rx->event to %d\n", i+1);)
np->rx->event = i + 1;
iunlock(&np->rx_lock);
return 0;
#undef LOG
#define LOG(a)
}
static void
interrupt(Ureg *, void *v)
{
Ether *ether;
Ctlr *np;
// LOG(dp("XENFE: xen fe interrupt v %p\n", v);)
ether = (Ether *) v;
np = ether->ctlr;
netif_poll(ether);
ilock(&np->tx_lock);
// network_tx_buf_gc(np);
iunlock(&np->tx_lock);
// LOG(dp("XENFE: interrupt done\n");)
}
static void network_connect(struct Ether *dev,
netif_fe_interface_status_changed_t *status)
{
struct Ctlr *np;
int i, requeue_idx;
// netif_tx_request_t *tx;
np = dev->ctlr;
ilock(&np->rx_lock);
ilock(&np->tx_lock);
/* Recovery procedure: */
/* Step 1: Reinitialise variables. */
np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
np->rx->event = 1;
/* Step 2: Rebuild the RX and TX ring contents.
* NB. We could just free the queued TX packets now but we hope
* that sending them out might do some good. We have to rebuild
* the RX ring because some of our pages are currently flipped out
* so we can't just free the RX skbs.
* NB2. Freelist index entries are always going to be less than
* __PAGE_OFFSET, whereas pointers to skbs will always be equal or
* greater than __PAGE_OFFSET: we use this property to distinguish
* them.
*/
/* Rebuild the TX buffer freelist and the TX ring itself.
* NB. This reorders packets. We could keep more private state
* to avoid this but maybe it doesn't matter so much given the
* interface has been down.
*/
for ( requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++ )
{
/*
if ( (unsigned long)np->tx_skbs[i] >= KZERO )
{
struct sk_buff *skb = np->tx_skbs[i];
tx = &np->tx->ring[requeue_idx++].req;
tx->id = i;
tx->addr = virt_to_machine(skb->data);
tx->size = skb->len;
np->stats.tx_bytes += skb->len;
np->stats.tx_packets++;
}
*/
}
wmb();
np->tx->req_prod = requeue_idx;
/* Rebuild the RX buffer freelist and the RX ring itself. */
for ( requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++ )
if ( (unsigned long)np->rxd[i] >= KZERO )
np->rx->ring[requeue_idx++].req.id = i;
wmb();
np->rx->req_prod = requeue_idx;
/* Step 3: All public and private state should now be sane. Get
* ready to start sending and receiving packets and give the driver
* domain a kick because we've probably just requeued some
* packets.
*/
np->backend_state = BEST_CONNECTED;
notify_via_evtchn(status->evtchn);
/**/
network_tx_buf_gc(np);
network_alloc_rx_buffers(np);
/**/
/*
IS there a plan 9 thing we should do here?
if ( np->user_state == UST_OPEN )
netif_start_queue(dev);
*/
iunlock(&np->tx_lock);
iunlock(&np->rx_lock);
}
static void vif_show(struct Ctlr *np)
{
#ifdef DEBUG
if (np) {
IPRINTK("<(%p):vif handle=%d %s(%s) evtchn=%d irq=%d tx=%p rx=%p>\n",
np, np->handle,
be_state_name[np->backend_state],
np->user_state ? "open" : "closed",
np->evtchn,
np->irq,
np->tx,
np->rx);
} else {
IPRINTK("<vif NULL>\n");
}
#endif
}
/* Send a connect message to xend to tell it to bring up the interface. */
static void send_interface_connect(struct Ctlr *np)
{
ctrl_msg_t cmsg = {
.type = CMSG_NETIF_FE,
.subtype = CMSG_NETIF_FE_INTERFACE_CONNECT,
.length = sizeof(netif_fe_interface_connect_t),
};
netif_fe_interface_connect_t *msg = (void*)cmsg.msg;
DPRINTK(">\n"); vif_show(np);
msg->handle = np->handle;
msg->tx_shmem_frame = xen_mm_mfn(np->tx) >> PGSHIFT;
msg->rx_shmem_frame = xen_mm_mfn(np->rx) >> PGSHIFT;
/* Tell the controller to bring up the interface. */
ctrl_if_send_message_block(&cmsg, nil, 0, 0);
print("CONNECT: message sent. Set something to 0 just to see\n");
/* np->tx->ring[0].req.id = 0;*/
print("CONNECT: set it\n");
DPRINTK("<\n");
}
/* Send a driver status notification to the domain controller. */
static int send_driver_status(int ok)
{
int err;
ctrl_msg_t cmsg = {
.type = CMSG_NETIF_FE,
.subtype = CMSG_NETIF_FE_DRIVER_STATUS,
.length = sizeof(netif_fe_driver_status_t),
};
netif_fe_driver_status_t *msg = (void*)cmsg.msg;
msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN);
err = ctrl_if_send_message_block(&cmsg, nil, 0, 0);
return err;
}
/* Stop network device and free tx/rx queues and irq.
*/
static void vif_release(struct Ctlr *np)
{
/* Stop old i/f to prevent errors whilst we rebuild the state. */
ilock(&np->tx_lock);
ilock(&np->rx_lock);
// netif_stop_queue(np->dev);
/* np->backend_state = BEST_DISCONNECTED; */
iunlock(&np->rx_lock);
iunlock(&np->tx_lock);
/* Free resources. */
if(np->tx != nil){
/* leave leak here ...
* this will matter at some point, I want plan 9 experts to
* get all the ins/outs of release done right.
free_irq(np->irq, np->dev);
unbind_evtchn_from_irq(np->evtchn);
free_page((unsigned long)np->tx);
free_page((unsigned long)np->rx);
*/
np->irq = 0;
np->evtchn = 0;
np->tx = nil;
np->rx = nil;
}
}
/* Release vif resources and close it down completely.
*/
static void vif_close(struct Ctlr *np)
{
DPRINTK(">\n"); vif_show(np);
WPRINTK("Unexpected netif-CLOSED message in state %s\n",
be_state_name[np->backend_state]);
vif_release(np);
np->backend_state = BEST_CLOSED;
/* todo: take dev down and free. */
vif_show(np); DPRINTK("<\n");
}
/* Move the vif into disconnected state.
* Allocates tx/rx pages.
* Sends connect message to xend.
* N.B. On plan 9 we don't expect this to get called (yet)
*/
static void vif_disconnect(struct Ctlr *np){
DPRINTK(">\n");
/* LEAK
if(np->tx) free_page((unsigned long)np->tx);
if(np->rx) free_page((unsigned long)np->rx);
*/
print("There's still a leak in vif_disconnect\n");
// Before this np->tx and np->rx had better be null.
np->tx = (netif_tx_interface_t *)xspanalloc(BY2PG, BY2PG, 0);
np->rx = (netif_rx_interface_t *)xspanalloc(BY2PG, BY2PG, 0);
memset(np->tx, 0, BY2PG);
memset(np->rx, 0, BY2PG);
LOG(dp("CONNECT: np->tx is %p, np->rx is %p\n", np->tx, np->rx);)
LOG(dp("CONNECT: MFN of tx is 0x%lx, PADDR is 0x%lx\n",
xen_mm_mfn(np->tx), PADDR(np->tx));)
np->backend_state = BEST_DISCONNECTED;
send_interface_connect(np);
vif_show(np); DPRINTK("<\n");
}
/* Begin interface recovery.
*
* NB. Whilst we're recovering, we turn the carrier state off. We
* take measures to ensure that this device isn't used for
* anything. We also stop the queue for this device. Various
* different approaches (e.g. continuing to buffer packets) have
* been tested but don't appear to improve the overall impact on
* TCP connections.
*
* TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
* is initiated by a special "RESET" message - disconnect could
* just mean we're not allowed to use this interface any more.
*/
static void
vif_reset(
struct Ctlr *np)
{
DPRINTK(">\n");
IPRINTK("Attempting to reconnect network interface: handle=%d\n",
np->handle);
vif_release(np);
vif_disconnect(np);
vif_show(np); DPRINTK("<\n");
}
/* Move the vif into connected state.
* Sets the mac and event channel from the message.
* Binds the irq to the event channel.
*/
static void
vif_connect(
struct Ctlr *np, netif_fe_interface_status_t *status)
{
static int create_netdev(int, struct Ether **);
int xenfrontendreset(Ether *ether);
struct Ether *dev = np->dev;
DPRINTK(">\n");
memmove(dev->ea, status->mac, sizeof(dev->ea));
memmove(dev->addr, status->mac, sizeof(dev->addr));
create_netdev(status->handle, nil);
network_connect(dev, status);
np->evtchn = status->evtchn;
np->irq = bind_evtchn_to_irq(np->evtchn, 0);
intrenable(np->irq, interrupt, dev, 0, "xen network");
unmask_evtchn(np->evtchn);
netctrl_connected_count();
netctrl_connected_count();
// vif_wake(dev);
addethercard("xenfrontend", xenfrontendreset);
vif_show(np); DPRINTK("<\n");
}
/* Get the target interface for a status message.
* Creates the interface when it makes sense.
* The returned interface may be null when there is no error.
*
* @param status status message
* @param np return parameter for interface state
* @return 0 on success, error code otherwise
*/
static int
target_vif(
netif_fe_interface_status_t *status, struct Ctlr **np)
{
static int create_netdev(int, struct Ether **);
int err = 0;
struct Ether *dev;
DPRINTK("> handle=%d\n", status->handle);
if ( status->handle < 0 )
{
err = -1;
goto exit;
}
if ( (dev = find_dev_by_handle(status->handle)) != nil )
goto exit;
if ( status->status == NETIF_INTERFACE_STATUS_CLOSED )
goto exit;
if ( status->status == NETIF_INTERFACE_STATUS_CHANGED )
goto exit;
/* It's a new interface in a good state - create it. */
DPRINTK("> create device...\n");
if ( (err = create_netdev(status->handle, &dev)) != 0 )
goto exit;
netctrl.interface_n++;
exit:
if ( np != nil )
*np = ((dev && !err) ? dev->ctlr : nil);
DPRINTK("< err=%d\n", err);
return err;
}
/* Handle an interface status message. */
static void netif_interface_status(netif_fe_interface_status_t *status)
{
int err;
struct Ctlr *np = nil;
DPRINTK(">\n");
DPRINTK("> status=%s handle=%ud\n",
status_name[status->status], status->handle);
if ( (err = target_vif(status, &np)) != 0 )
{
WPRINTK("Invalid netif: handle=%ud, err %d\n", status->handle, err);
return;
}
if ( np == nil )
{
DPRINTK("> no vif\n");
return;
}
DPRINTK(">\n"); vif_show(np);
switch ( status->status )
{
case NETIF_INTERFACE_STATUS_CLOSED:
switch ( np->backend_state )
{
case BEST_CLOSED:
case BEST_DISCONNECTED:
case BEST_CONNECTED:
vif_close(np);
break;
}
break;
case NETIF_INTERFACE_STATUS_DISCONNECTED:
switch ( np->backend_state )
{
case BEST_CLOSED:
vif_disconnect(np);
break;
case BEST_DISCONNECTED:
case BEST_CONNECTED:
vif_reset(np);
break;
}
break;
case NETIF_INTERFACE_STATUS_CONNECTED:
switch ( np->backend_state )
{
case BEST_CLOSED:
WPRINTK("Unexpected netif status %s in state %s\n",
status_name[status->status],
be_state_name[np->backend_state]);
vif_disconnect(np);
vif_connect(np, status);
break;
case BEST_DISCONNECTED:
vif_connect(np, status);
break;
}
break;
case NETIF_INTERFACE_STATUS_CHANGED:
/*
* The domain controller is notifying us that a device has been
* added or removed.
*/
break;
default:
WPRINTK("Invalid netif status code %d\n", status->status);
break;
}
vif_show(np);
DPRINTK("<\n");
}
/*
* Initialize the network control interface.
*/
static void netif_driver_status(netif_fe_driver_status_t *status)
{
DPRINTK("> status=%d\n", status->status);
netctrl.iface_up = status->status;
//netctrl.interface_n = status->max_handle;
//netctrl.connected_n = 0;
netctrl_connected_count();
}
/** Create a network device.
* @param handle device handle
* @param val return parameter for created device
* @return 0 on success, error code otherwise
*/
static int create_netdev(int handle, struct Ctlr ** /*val*/)
{
int i;
struct Ctlr *np = &controllers[handle];
dp("XENFE: create_netdev %d\n", handle);
np->backend_state = BEST_CLOSED;
np->user_state = UST_CLOSED;
np->handle = handle;
// np->tx_lock = np->rx_lock = 0;
/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ )
np->txd[i] = (void *)(i+1);
for ( i = 0; i <= NETIF_RX_RING_SIZE; i++ )
np->rxd[i] = (void *)(i+1);
LOG(dp("XENFE: all done set up the rings\n");)
return 0;
}
/*
* Initialize the network control interface. Set the number of network devices
* and create them.
*/
static void netif_driver_status_change(
netif_fe_driver_status_changed_t *status)
{
int err;
int i;
LOG(dp("XENFE: netif_driverr_status_chnage # ifaces %d\n", netctrl.interface_n );)
LOG(dp("XENFE: status says %d interfaces\n", status->nr_interfaces);)
netctrl.interface_n = status->nr_interfaces;
netctrl.connected_n = 0;
netctrl.iface_up = status->status;
/* leave this here? Probably not*/
for ( i = 0; i < netctrl.interface_n; i++ )
{
if ( (err = create_netdev(i, nil)) != 0 )
{
netctrl_err(err);
LOG(dp("create netdev failed...\n");)
break;
}
}
/**/
netctrl_connected_count();
LOG(dp("XENFE DONE driver status change\n");)
}
static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long /*id*/)
{
int respond = 1;
LOG(dp("XENFE: xenfe netif_ctrlif_rx subtype %d\n", msg->type);)
switch ( msg->subtype )
{
case CMSG_NETIF_FE_INTERFACE_STATUS:
dp("XENFE: got an iface status changed message, length %d want %d\n",
msg->length, sizeof(netif_fe_interface_status_changed_t));
if ( msg->length != 18 ) /* SHITsizeof(netif_fe_interface_status_changed_t) )*/
goto error;
netif_interface_status((netif_fe_interface_status_t *)
&msg->msg[0]);
dp("Done iface status\n");
break;
case CMSG_NETIF_FE_DRIVER_STATUS:
dp("XENFE: got a driver status changed message, len %d, want %d\n",
msg->length, sizeof(netif_fe_driver_status_changed_t));
if ( msg->length != sizeof(netif_fe_driver_status_changed_t) )
goto error;
dp("Call netif_driver_status_change ...\n");
netif_driver_status((netif_fe_driver_status_t *)
&msg->msg[0]);
dp("Done driver status\n");
/* Message is a response */
respond = 0;
break;
error:
default:
msg->length = 0;
break;
}
if ( respond )
ctrl_if_send_response(msg);
}
static int netif_init(void)
{
ctrl_msg_t cmsg;
netif_fe_driver_status_changed_t st;
int err = 0;
//Rendez r;
/*
if ( (start_info.flags & SIF_INITDOMAIN) ||
(start_info.flags & SIF_NET_BE_DOMAIN) )
return 0;
*/
print("XENFE:Initialising Xen virtual ethernet frontend driver");
netctrl_init();
ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
CALLBACK_IN_BLOCKING_CONTEXT);
/* Send a driver-UP notification to the domain controller. */
cmsg.type = CMSG_NETIF_FE;
cmsg.subtype = CMSG_NETIF_FE_DRIVER_STATUS;
cmsg.length = sizeof(netif_fe_driver_status_changed_t);
st.status = NETIF_DRIVER_STATUS_UP;
st.nr_interfaces = 0;
memmove(cmsg.msg, &st, sizeof(st));
ctrl_if_send_message_block(&cmsg, nil, 0, 0);
return err;
}
static void
initxen(Ctlr* ctlr)
{
LOG(dp("XENFE: xenfe init %p\n", ctlr);)
USED(ctlr);
}
static uchar*
startdma(Ether* ether, ulong address)
{
print("XENFE:xenfe startdma %p %ulx\n", ether, address);
return 0;
}
static void
promiscuous(void* arg, int on)
{
print("XENFE:xenfe promisc %p %d\n", arg, on);
}
static void
multicast(void* arg, uchar *addr, int on)
{
print("XENFE:xenfe mc %p %p %d\n", arg, addr, on);
}
static void
attach(Ether* ether)
{
Ctlr *ctlr;
// int wait_i, wait_n = 20;
// int err;
dp("XENFE: etherxenfrontend: attach ether port 0x%lux ctrl %p\n",
ether->port, ether->ctlr);
/* Wait for all interfaces to be connected. */
while (netctrl_connected() < 0) {
LOG(dp("XENFE: waiting for one connect\n");)
HYPERVISOR_yield();
}
ctlr = ether->ctlr;
ilock(ctlr);
if(ctlr->attached){
iunlock(ctlr);
return;
}
ctlr->attached = 1;
iunlock(ctlr);
LOG(dp("XENFE: attach done for port %d\n", ether->port);)
}
static void
statistics(Ether* ether)
{
LOG(dp("XENFE: xenfe statistics %p\n", ether);)
USED(ether);
}
static void
txstart(Ether* ether)
{
#undef LOG
#define LOG(a)
int len;
Ctlr *ctlr;
Block *bp;
LOG(dp("XENFE: xenfe txstart %p\n", ether);)
ctlr = ether->ctlr;
/*
* Attempt to top-up the transmit FIFO. If there's room simply
* stuff in the packet length (unpadded to a dword boundary), the
* packet data (padded) and remove the packet from the queue.
* If there's no room post an interrupt for when there is.
* This routine is called both from the top level and from interrupt
* level and expects to be called with ctlr->wlock already locked
* and the correct register window (Wop) in place.
*/
for(;;){
if(ctlr->txbp){
bp = ctlr->txbp;
ctlr->txbp = 0;
}
else{
bp = qget(ether->oq);
if(bp == nil)
break;
}
len = ROUNDUP(BLEN(bp), 2);
if(! ctlr->tx_full){
// int i;
// for(i = 0; i < 16; i++)
// LOG(dp("0x%x ", bp->rp[i]);)
// LOG(dp("\n");)
memmove(&bp->rp[6], ether->ea, sizeof(ether->ea));
network_start_xmit(ctlr, bp->rp, len);
freeb(bp);
ether->outpackets++;
}
else{
ctlr->txbp = bp;
if(ctlr->txbusy == 0){
ctlr->txbusy = 1;
}
break;
}
}
LOG(dp("txstart: done\n");)
#undef LOG
#define LOG(a)
}
static void
transmit(Ether* ether)
{
#undef LOG
#define LOG(a)
Ctlr *ctlr;
ctlr = ether->ctlr;
ilock(ctlr);
LOG(dp("XENFE: xenfe xmit port %d ctlr %p\n", ether->port, ctlr);)
txstart(ether);
iunlock(ctlr);
LOG(dp("XENFE: transmit done\n");)
#undef LOG
#define LOG(a)
}
static long
ifstat(Ether* ether, void* a, long n, ulong offset)
{
char *p;
int len;
Ctlr *ctlr;
LOG(dp("XENFE: ifstat %p, %p, %ld, %ld\n", ether, a, n, offset);)
if(n == 0)
return 0;
ctlr = ether->ctlr;
ilock(ctlr);
statistics(ether);
iunlock(ctlr);
p = malloc(READSTR);
len = snprint(p, READSTR, "interrupts: %d\n", ctlr->interrupts);
// len += snprint(p+len, READSTR-len, "bogusinterrupts: %lud\n", ctlr->bogusinterrupts);
USED(len);
#ifdef NOT
if(ctlr->upenabled){
if(ctlr->upqmax > ctlr->upqmaxhw)
ctlr->upqmaxhw = ctlr->upqmax;
len += snprint(p+len, READSTR-len, "up: q %lud i %lud m %d h %d s %lud\n",
ctlr->upqueued, ctlr->upinterrupts,
ctlr->upqmax, ctlr->upqmaxhw, ctlr->upstalls);
ctlr->upqmax = 0;
}
if(ctlr->dnenabled){
if(ctlr->dnqmax > ctlr->dnqmaxhw)
ctlr->dnqmaxhw = ctlr->dnqmax;
len += snprint(p+len, READSTR-len, "dn: q %lud i %lud m %d h %d\n",
ctlr->dnqueued, ctlr->dninterrupts, ctlr->dnqmax, ctlr->dnqmaxhw);
ctlr->dnqmax = 0;
}
snprint(p+len, READSTR-len, "badssd: %lud\n", ctlr->stats[BytesRcvdOk+2]);
#endif
n = readstr(offset, a, n, p);
free(p);
return n;
}
static void
txrxreset(int port)
{
LOG(dp("XENFE: txrxreset %d\n", port);)
USED(port);
}
static void
shutdown(Ether *ether)
{
LOG(dp("XENFE: xenfrontend shutting down %p\n", ether);)
// resetctlr(ether->ctlr);
USED(ether);
}
/* this is an experiment ... */
Ether *theEther = nil;
void
xenpoll(void) {
dp("X");
if (theEther)
netif_poll(theEther);
}
int
xenfrontendreset(Ether* ether)
{
// char *p;
Ctlr *ctlr;
// uchar ea[Eaddrlen];
static int scandone;
LOG(dp("XENFE: xenfrontendreset!\n");)
if (ether->ctlrno > 0)
return -1;
ctlr = &controllers[ether->ctlrno];
/*
* Clear out the
* adapter statistics, clear the statistics logged into ctlr
* and enable statistics collection.
*/
ilock(ctlr);
/*
* Linkage to the generic ethernet driver.
*/
ether->attach = attach;
ether->transmit = transmit;
ether->interrupt = interrupt;
ether->ifstat = ifstat;
ether->promiscuous = promiscuous;
ether->multicast = multicast;
ether->shutdown = shutdown;
ether->arg = ether;
ether->ctlr = ctlr;
ctlr->dev = ether;
ether->maxmtu = 1514;
iunlock(ctlr);
theEther = ether;
LOG(dp("XENFE: xenfrontendreset: OK\n");)
return 0;
}
void
etherxenfrontendlink(void)
{
LOG(dp("XENFE: etherxenfrontendlink!\n");)
netif_init();
addethercard("xenfrontend", xenfrontendreset);
}
|