Plan 9 from Bell Labs’s /usr/web/sources/xen/xen2/9/xenpc/etherxenfrontend.c

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


/*
 * xen ethernet driver derived from:
 * Etherlink III, Fast EtherLink and Fast EtherLink XL adapters.
 * and Linux Xen Front end driver
 * To do:
 * clean up and fix up by plan 9 experts
 */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "../port/error.h"
#include "../port/netif.h"
#include "../xen/xennet.h"

static char *status_name[] = {
    [NETIF_INTERFACE_STATUS_CLOSED]       = "closed",
    [NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
    [NETIF_INTERFACE_STATUS_CONNECTED]    = "connected",
    [NETIF_INTERFACE_STATUS_CHANGED]      = "changed",
};


/* fix me later ... this xen ether breaks a lot of rules ... not my code ... RGM */
void queue_machphys_update(ulong mfn, ulong pfn);

#include "etherif.h"
#define LOG(a) 
#define DPRINTK dp
#define WPRINTK dp
#define IPRINTK dp
#define XCVRDEBUG		if(1)print
#define DEBUG

/******************************************************************************
 * Virtual network driver for conversing with remote driver backends.
 * 
 * Copyright (c) 2002-2004, K A Fraser
 */

#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */


static unsigned long rx_pfn_array[NETIF_RX_RING_SIZE];
static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE+1];
static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];

struct Desc {
	/* can't do this until xspanalloc is fixed ... */
//	unsigned char page[BY2PG];
	ulong stat;
	ulong size;
	ulong offset;
	unsigned char *page; /* will point into a page-aligned place in data */
	/* just to mess around -- we're having free list corruption and 
	  * it occurred in at least one case when two of these desc's
	  * butted up against each other
	  * so make it four not 2
           * (later) turned out to bea  long-standing bug in 
           * xspanalloc! But leave this here for now anyway.
	  */

	unsigned char data[4*BY2PG];
	struct Desc *next;
};

int desc_alloc_count = 0, desc_free_count = 0;
void *descarray[2048];

void
recordalloc(void *p) {
	int i, found;
	for(i = found = 0; (! found) && (i < 2048); i++) {
		if (descarray[i] == 0) {
			descarray[i] = p;
			found++;
		}
	}
}

void
recordfree(void *p) {
	int i;
	for(i = 0; i < 2048; i++) {
		if (descarray[i] == p) {
			descarray[i] = 0;
			break;
		}
	}
}

void
dumplist(void) {
	int i;
	for(i = 0; i < 2048; i++) {
		void *p = descarray[i];
		ulong *l;
		if (! p)
			continue;
		l = (ulong *) ((ulong) p - 8);
		dp("%p: size 0x%ulx magic 0x%ulx\n", p, *l, l[1]);
	}
	panic("shit");
}

void
checkit(char *s) {
	int i;
	for(i = 0; i < 2048; i++) {
		void *p = descarray[i];
		ulong *l;
		if (! p)
			continue;
		l = (ulong *) ((ulong) p - 4);
		if (*l != 0x484f4c45) {
			dp("%s: For %p hole is bad: 0x%ulx, mfn 0x%ulx\n", 
				s, p, *l, xen_mm_mfn(l));
			dumplist();
		}
	}
}

		
struct Desc *descalloc(char *) {
	struct Desc *d;
	unsigned long p;
	checkit("PREALLOC");
	d = xallocz(sizeof(*d), 0);
	/* I need to learn how to use waserror/poperror better */
	if (! d)
		return 0;
	p = (unsigned long) d->data;
	p = (p + 2*BY2PG) & ~(BY2PG-1);
	d->page = (unsigned char *) p;
	/* not atomic, sorry */
	desc_alloc_count++;
	/*
	dp("DESC: %s: alloc %p(0x%ulx), page %p(0x%ulx)\n", 
		s, d, xen_mm_mfn(d), d->page, xen_mm_mfn(d->page));
	*/
	recordalloc(d);
	checkit("ALLOC");
	return d;
}

void descfree(struct Desc *d, char *) {
/*	dp("DESC: %s: free %p(0x%ulx), page %p(0x%ulx)\n", 
	type, d, xen_mm_mfn(d), d->page, xen_mm_mfn(d->page));
	*/
	checkit("FREE");
	recordfree(d);
	xfree(d);
	desc_free_count++;
}

	
typedef struct Desc Desc;

struct Ctlr
{
	Lock; /* consider a Qlock here a la the rhine */
    struct netif_st *next;
	Ether *dev;
	int attached;
    NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
    unsigned int tx_full, txbusy;
	Block *txbp;
    
    netif_tx_interface_t *tx;
    netif_rx_interface_t *rx;

    Lock   tx_lock;
    Lock   rx_lock;

    unsigned int handle;
    unsigned int evtchn;
    unsigned int irq;
	int interrupts;

    /* What is the status of our connection to the remote backend? */
#define BEST_CLOSED       0
#define BEST_DISCONNECTED 1
#define BEST_CONNECTED    2
    unsigned int backend_state;

    /* Is this interface open or closed (down or up)? */
#define UST_CLOSED        0
#define UST_OPEN          1
    unsigned int user_state;

	Desc *txd[NETIF_TX_RING_SIZE];
	Desc *rxd[NETIF_RX_RING_SIZE];
};

static char *be_state_name[] = {
    [BEST_CLOSED]       = "closed",
    [BEST_DISCONNECTED] = "disconnected",
    [BEST_CONNECTED]    = "connected",
};

/* keep it simple. Just statically allocate it for maxether. */
typedef struct Ctlr Ctlr;
Ctlr controllers[MaxEther];
/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
#define ADD_ID_TO_FREELIST(_list, _id)             \
    (_list)[(_id)] = (_list)[0];                   \
    (_list)[0]     = (void *)(unsigned long)(_id);
/* I hate this kind of code anyway ...
#define GET_ID_FROM_FREELIST(_list)                \
 ({ unsigned long _id = (unsigned long)(_list)[0]; \
    (_list)[0]  = (_list)[_id];                    \
    (unsigned short)_id; })
*/

unsigned short GET_ID_FROM_FREELIST(Desc *_list[], int max) {
	unsigned short _id = (unsigned short)(_list)[0];
	if (_id > max)
		panic("Bogus ID in GET_ID_FROM_FREELIST: 0x%x, max 0x%x\n", 
				_id, max);
	_list[0] = _list[_id];
/*
	LOG(dp("get id from freelist _list %p _id %d\n", _list, _id);)
	LOG(dp("New list[0] is %d\n", _list[0]);)
 */
	return _id;
}

void dumpit(void *x, int size) 
{
	int i, j;
	unsigned char *cp = x;

	dp("New packet: %p %d bytes\n", x, size);
	for(i = 0; i < size; i += 16) {
		dp("0x%x: ", i);
		for(j = 0; j < 16 && (i+j) < size; j++) {
			dp("%02x ", cp[i+j]);
		}
		dp("\n");
	}
	dp("end of packet\n");
}

static struct Ether *find_dev_by_handle(unsigned int handle)
{
    struct Ctlr *np = &controllers[handle];    
    print("XENFE:find_dev_by_handle %d np %p np->dev %p\n", 
		handle, np, np->dev);
    return np->dev;

}


/** Network interface info. */
struct netif_ctrl {
    /** Number of interfaces. */
    int interface_n;
    /** Number of connected interfaces. */
    int connected_n;
    /** Error code. */
    int err;
  int iface_up;
};

static struct netif_ctrl netctrl;

static void netctrl_init(void)
{
    memset(&netctrl, 0, sizeof(netctrl));
    netctrl.interface_n = -1;
}

/** Get or set a network interface error.
 */
static int netctrl_err(int err)
{
    if(err < 0 && !netctrl.err){
        netctrl.err = err;
        print("XENFE:netctrl_err err=%d\n", err);
    }
    return netctrl.err;
}

/** Test if all network interfaces are connected.
 *
 * @return 1 if all connected, 0 if not, negative error code otherwise
 */
static int netctrl_connected(void)
{
    int ok;
    ok = (netctrl.err ? netctrl.err :
          (netctrl.connected_n == netctrl.interface_n));
    return ok;
}

/** Count the connected network interfaces.
 *
 * @return connected count
 */
static int netctrl_connected_count(void)
{
    
    struct Ctlr *np = controllers;
    unsigned int connected;
    int i;

    connected = 0;
    
    for(i = 0; i < MaxEther; i++, np++);
    {

        if ( np->backend_state == BEST_CONNECTED )
            connected++;
    }
    LOG(dp("XENFE: connected count is now %d\n", connected);)
    netctrl.connected_n = connected;
    return connected;
}

static void network_tx_buf_gc(Ctlr *np)
{
#undef LOG
#define LOG(a) 
    NETIF_RING_IDX i, prod;
    unsigned short id;
    Desc *d;

	LOG(dp("network_tx_buf_gc backend state is %d, BEST_CONNECTED is %d\n", 
		np->backend_state, BEST_CONNECTED);)

    if ( np->backend_state != BEST_CONNECTED )
        return;
	LOG(dp("network_tx_buf_gc resp_prod %d resp_cons %d req_prd %d\n", 
		np->tx->resp_prod, np->tx_resp_cons, np->tx->req_prod);)
	checkit("TX_BUF_GC START");
    do {
        prod = np->tx->resp_prod;

        for ( i = np->tx_resp_cons; i != prod; i++ )
        {
#undef LOG
#define LOG(a) 
            id  = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
	   LOG(dp("id. Index in ring %d, gc to free is %d\n", 
		MASK_NETIF_TX_IDX(i), id);)
            d = np->txd[id];
	   LOG(dp("   d for that is %p\n", d);)
            ADD_ID_TO_FREELIST(np->txd, id);
	   LOG(dp("so free %p\n", d);)
            descfree(d, "TX");
        }
#undef LOG
#define LOG(a) 
        np->tx_resp_cons = prod;
        
        /*
         * Set a new event, then check for race with update of tx_cons. Note
         * that it is essential to schedule a callback, no matter how few
         * buffers are pending. Even if there is space in the transmit ring,
         * higher layers may be blocked because too much data is outstanding:
         * in such cases notification from Xen is likely to be the only kick
         * that we'll get.
         */
        np->tx->event = 
            prod + ((np->tx->req_prod - prod) >> 1) + 1;
        mb();
    }
    while ( prod != np->tx->resp_prod );

    if ( np->tx_full && 
         ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE) )
    {
        np->tx_full = 0;
   //     if ( np->user_state == UST_OPEN )
     //       netif_wake_queue(dev);
    }
	checkit("TX_BUF_GC_END");
#undef LOG
#define LOG(a) 
}


static void network_alloc_rx_buffers(Ctlr *np)
{
    unsigned short id;
    NETIF_RING_IDX i = np->rx->req_prod;
    int nr_pfns = 0;
    Desc *d;
    int xen_mm_decrease_reservation(unsigned long *pfn_array, int npfn);
#undef LOG
#define LOG(a) 
    LOG(dp("newtwork_alloc_rx_buffers\n");)
    /* Make sure the batch is large enough to be worthwhile (1/2 ring). */
    if (((i - np->rx_resp_cons) > (NETIF_RX_RING_SIZE/2)) || 
        (np->backend_state != BEST_CONNECTED) )
        return;
    LOG(dp("network_alloc_rx_buffers and i is %d np->rx_resp_cons is %d\n", 
				i, np->rx_resp_cons);)
    do {
	LOG(dp("  top of loop ...\n");)
        d = descalloc("RX");
	LOG(dp("descalloc d returns %p\n", d);)
        if ( d== nil)
            break;
    /**/
   	id = GET_ID_FROM_FREELIST(np->rxd, NETIF_RX_RING_SIZE);
	LOG(dp("np->rxd is %p, i is %d, d is %p\n", np->rxd, id, d);)
        np->rxd[id] = d;
        
	LOG(dp("np->rx->ring is %p\n", np->rx->ring);)
        np->rx->ring[MASK_NETIF_RX_IDX(i)].req.id = id;
        
	LOG(dp("rx_pfn_array is %p, nr_pfns is %d, PADDR(d)) is 0x%ulx\n",
			rx_pfn_array, nr_pfns, PADDR(d->page));)
/**/
	/* the below is cute and all; it just doesn't work.
	  * the error control is just too coarse-graind; which one of the 
           * many ops failed? You don't really know. So we do it slower
           * but with more knowledge.
	  * we can fix it later once we're more sure that this is all 
           * working right. 
           */
	if (set_va_mfn(d->page, 0, 0)) {
		dp("XENFE: Failed to set va %p to 0 and 0\n", d->page);
	}
/**/
        rx_pfn_array[nr_pfns] = xen_mm_mfn(d->page) >> PGSHIFT;
/* * /
        rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping;
        rx_mcl[nr_pfns].args[0] = (unsigned long)d->page >> PGSHIFT;
        rx_mcl[nr_pfns].args[1] = 0;
        rx_mcl[nr_pfns].args[2] = 0;
/* */

	LOG(dp("bottom of loop ...\n");)
        nr_pfns++;
    }
    while ( (++i - np->rx_resp_cons) != NETIF_RX_RING_SIZE );


	LOG(dp("   nr_pfns is %d\n", nr_pfns);)
    if ((nr_pfns == 0) )
        return;

    /*
     * We may have allocated buffers which have entries outstanding in the page
     * update queue -- make sure we flush those first!
     */
    _flush_page_update_queue();
#ifdef NOT
    /* After all PTEs have been zapped we blow away stale TLB entries. */
	/* done above in non-multi-call
    rx_mcl[nr_pfns-1].args[2] = UVMF_FLUSH_TLB; */

    /* Give away a batch of pages. */
    rx_mcl[nr_pfns].op = __HYPERVISOR_dom_mem_op;
    rx_mcl[nr_pfns].args[0] = MEMOP_decrease_reservation;
    rx_mcl[nr_pfns].args[1] = (unsigned long)rx_pfn_array;
    rx_mcl[nr_pfns].args[2] = (unsigned long)nr_pfns;
    rx_mcl[nr_pfns].args[3] = 0;
    rx_mcl[nr_pfns].args[4] = DOMID_SELF;

    /* Zap PTEs and give away pages in one big multicall. */
    (void)HYPERVISOR_multicall(rx_mcl, nr_pfns+1);

    /* Check return status of HYPERVISOR_dom_mem_op(). */
    if ( rx_mcl[nr_pfns].args[5] != nr_pfns )
        panic("Unable to reduce memory reservation, err 0x%x\n", 
			rx_mcl[nr_pfns].args[5]);
#endif
    if (xen_mm_decrease_reservation(rx_pfn_array, nr_pfns) < 0) {
	dp("XENFE: decrease reservation failed\n");
    }
    np->rx->req_prod = i;
    LOG(dp("ALL DONE i is %d\n", i);)
#undef LOG
#define LOG(a) 
}


static int network_start_xmit(Ctlr *np, void *data, int size)
{
#undef LOG
#define LOG(a) 
    unsigned short id;
    Desc *d;
    netif_tx_request_t *tx;
    NETIF_RING_IDX i;
	extern int faultpanic;

	if (size > BY2PG) {
		panic("xen network_start_xmit: size %d > 4096\n", size);
	}
    if ((np->tx_full) )
    {
        print("%s: full queue wasn't stopped!\n","ether");
        return -1;
    }

	/* xspanalloc is broken, you can not free memory allocated with 
	  * xspanalloc!
	  */
    //d = xspanalloc(sizeof(*d), BY2PG, 0);
	/* so we allocate 2x the size of d, then take a page from the middle
	  * for data. blech.
	  */
	d = descalloc("TX");
	LOG(dp("XENFE: tx: allocated %p\n", d);)
    if (! d) {
	print("Trouble in network_start_xmit: descalloc failed\n");
	return -1;
	}

    d->size = size;
    memmove(d->page, data, size);

    ilock(&np->tx_lock);

    if (np->backend_state != BEST_CONNECTED )
    {
        iunlock(&np->tx_lock);
        return 1;
    }

    i = np->tx->req_prod;

    id = GET_ID_FROM_FREELIST(np->txd, NETIF_TX_RING_SIZE);
    np->txd[id] = d;

    tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
	LOG(dp("np->tx is %p, np->tx->ring is %p\n", np->tx, np->tx->ring);)
	LOG(dp("tx is %p, PADDR(d)) is 0x%lx, size is %d\n", 	tx, PADDR(d), d->size);)
	LOG(dp("offsets: &tx->addr is %p, &tx->id is %p, &tx->size is %p\n", 
		&tx->addr, &tx->id, &tx->size);)
	faultpanic = 0;
	LOG(dp("tx->id BEFORE assign is %d, tx->addr is 0x%lx, size is 0x%lx\n", 
			tx->id, tx->addr, tx->size);)
	LOG(dp("assign id %d\n", id);)
    tx->id   = id;
	LOG(dp("E");)
	checkit("XMITMIDDLE");
    tx->addr = xen_mm_mfn(d->page);
    tx->size = d->size;

    wmb();
    LOG(dp("network_stat_xmit: id %d, addr %p, size %d, set req_prod to %d\n",
			id, d, d->size, i+1);)
    np->tx->req_prod = i + 1;

    network_tx_buf_gc(np);

    if ( (i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1) )
    {
        np->tx_full = 1;
 //       netif_stop_queue(dev);
    }

    iunlock(&np->tx_lock);

/*
    np->stats.tx_bytes += d->size;
    np->stats.tx_packets++;
*/

    /* Only notify Xen if there are no outstanding responses. */
    mb();
 //   if ( np->tx->resp_prod == i ) {
	if (1) {
	LOG(dp("network_start_xmit: notify via evtchn %d\n", 
				np->evtchn);)
        notify_via_evtchn(np->evtchn);
/*
	LOG(dp("   TRY A GC\n");)
	network_tx_buf_gc(np);
	LOG(dp("    DONE THE GC\n");)
*/
    }
	checkit("XMIT DONE");
    LOG(dp("network_start_xmit: done\n");)
    return 0;
#undef LOG
#define LOG(a) 
}

static int netif_poll(Ether *ether)
{
#undef LOG
#define LOG(a) 
	extern unsigned long *mfn;
	unsigned char *packet;
    Ctlr *np = ether->ctlr;
    Desc *d = 0, *newd = 0;
    netif_rx_response_t *rx;
    NETIF_RING_IDX i;
   /* mmu_update_t *mmu = rx_mmu;*/
    multicall_entry_t *mcl = rx_mcl;
	LOG(dp("netif_poll\n");)
	np->interrupts++;
    ilock(&np->rx_lock);

    if ( np->backend_state != BEST_CONNECTED )
    {
	LOG(dp("XENFE: poll on unconnected %p\n", ether);)
        iunlock(&np->rx_lock);
        return 0;
    }
    LOG(dp("i will be from rx_resp_cons(%d) to rx->resp_prod(%d))\n",
		np->rx_resp_cons, np->rx->resp_prod);)
#undef LOG
#define LOG(a) 
    for ( i = np->rx_resp_cons; 
          i != np->rx->resp_prod; 
          i++ )
    {
        rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
	LOG(dp("XENFE: poll: rx is %p, status %d, addr 0x%ulx\n", rx, rx->status, rx->addr);)
        /*
         * An error here is very odd. Usually indicates a backend bug,
         * low-memory condition, or that we didn't have reservation headroom.
         * Whatever - print an error and queue the id again straight away.
         */
        if (rx->status <= 0)
        {
		LOG(dp("Status for el %d is <= 0 (0x%x)\n", i, rx->status);)
            /* Gate this error. We get a (valid) slew of them on suspend. */
            if ( np->user_state == UST_OPEN ) {
                LOG(dp( "bad buffer on RX ring!(%d)\n", rx->status);)
	   }
            np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
            wmb();
            np->rx->req_prod++;
            continue;
        }

	LOG(dp("id is %d\n", rx->id);)
	LOG(dp("np %p np->rxd %p\n", np, np->rxd);)
        if (! d) {
		d = np->rxd[rx->id];
		LOG(dp("d %p\n", d);)
		d->next = 0;
		newd = d;
		LOG(dp("newd %p\n", newd);)
	}
	else {
		newd->next = np->rxd[rx->id];
		newd = newd->next;
		newd->next = 0;
	}
		
	LOG(dp("Collected Desc %p id %d\n", newd, rx->id);)
        ADD_ID_TO_FREELIST(np->rxd, rx->id);

/*
        np->stats.rx_packets++;
        np->stats.rx_bytes += rx->status;
*/
	LOG(dp("time to remap the page\n");)
        /* Remap the page. */
	LOG(dp("remap rx->addr %p (PPN 0x%ulx) to be 0x%ulx (PADDR 0x%ulx))\n",
			rx->addr, PPN(rx->addr), newd->page, PADDR(newd->page));)
#ifdef NOT
        mmu->ptr  = PPN(rx->addr)  | MMU_MACHPHYS_UPDATE;
        mmu->val  = PADDR(newd->page) >> PGSHIFT;
        mmu++;
        mcl->op = __HYPERVISOR_update_va_mapping;
        mcl->args[0] = (unsigned long)newd->page >> PGSHIFT;
        mcl->args[1] = PPN(rx->addr) | KZERO;
        mcl->args[2] = 0;
        mcl++;
#endif
	mfn[PADDR(newd->page) >> PGSHIFT] = rx->addr >> PGSHIFT;
	queue_machphys_update(PPN(rx->addr)>>PGSHIFT, 
				PADDR(newd->page)>>PGSHIFT);
	_flush_page_update_queue();
	set_va_mfn(newd->page, rx->addr>>PGSHIFT, PTEWRITE|PTEVALID);
	_flush_page_update_queue();
	newd->offset =  rx->addr & (BY2PG-1);
	newd->size = rx->status;
    }

	if ((mcl-rx_mcl ) > 0) {
		LOG(dp("mcl-rx_mcl is %d\n", mcl-rx_mcl);)
	}
    /* Do all the remapping work, and M->P updates, in one big hypercall. */
#ifdef NOT
    if ((mcl - rx_mcl) != 0)
    {
        mcl->op = __HYPERVISOR_mmu_update;
        mcl->args[0] = (unsigned long)rx_mmu;
        mcl->args[1] = mmu - rx_mmu;
        mcl->args[2] = 0;
        mcl++;
        (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
    }
    LOG(dp("DONE multicall\n");)
#endif
    while (d)
    {
	Block *bp;
	Desc *nextd;
	/* throw away the MAC header? no */
	packet = &d->page[d->offset];
	LOG(dp("packet is %p and offset is %d\n", packet, d->offset);)
	LOG(dp("Packet size is %d bytes\n", d->size);)
	bp = iallocb(d->size);
	if (! bp) {
		/* too bad, drop it */
		nextd = d->next;
		descfree(d, "RXDROP");
		d = nextd;
		continue;
	}
	LOG(dp("bp->rp is %p and packet is %p\n", bp->rp, packet);)
//	dp("and val of first byte of page is 0x%ux\n", 
//			*(unsigned char *)d->page);
//	dumpit(packet, d->size);
	memmove(bp->rp, packet, d->size);
	bp->wp = bp->rp + d->size;
	nextd = d->next;
 	descfree(d, "RXOK");
	LOG(dp("etheriq bp %p size %d\n", bp, d->size);)
	d = nextd;
	etheriq(ether, bp, 1);
    }

    np->rx_resp_cons = i;
//	LOG(dp("Set rx_resp_cons to %d\n", i);)
    network_alloc_rx_buffers(np);

//    LOG(dp("Set np->rx->event to %d\n", i+1);)
    np->rx->event = i + 1;
    iunlock(&np->rx_lock);

    return 0;
#undef LOG
#define LOG(a) 
}

static void 
interrupt(Ureg *, void *v)
{
	Ether *ether;
	Ctlr *np;
//	LOG(dp("XENFE: xen fe interrupt v %p\n", v);)
	ether = (Ether *) v;
	np = ether->ctlr;
	netif_poll(ether);
	ilock(&np->tx_lock);
//	network_tx_buf_gc(np);
	iunlock(&np->tx_lock);
//	LOG(dp("XENFE: interrupt done\n");)
}

static void network_connect(struct Ether *dev,
                            netif_fe_interface_status_changed_t *status)
{
    struct Ctlr *np;
    int i, requeue_idx;
  //  netif_tx_request_t *tx;

    np = dev->ctlr;
    ilock(&np->rx_lock);
    ilock(&np->tx_lock);

    /* Recovery procedure: */

    /* Step 1: Reinitialise variables. */
    np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
    np->rx->event = 1;

    /* Step 2: Rebuild the RX and TX ring contents.
     * NB. We could just free the queued TX packets now but we hope
     * that sending them out might do some good.  We have to rebuild
     * the RX ring because some of our pages are currently flipped out
     * so we can't just free the RX skbs.
     * NB2. Freelist index entries are always going to be less than
     *  __PAGE_OFFSET, whereas pointers to skbs will always be equal or
     * greater than __PAGE_OFFSET: we use this property to distinguish
     * them.
     */

    /* Rebuild the TX buffer freelist and the TX ring itself.
     * NB. This reorders packets.  We could keep more private state
     * to avoid this but maybe it doesn't matter so much given the
     * interface has been down.
     */
    for ( requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++ )
    {
	/*
            if ( (unsigned long)np->tx_skbs[i] >= KZERO )
            {
                struct sk_buff *skb = np->tx_skbs[i];
                
                tx = &np->tx->ring[requeue_idx++].req;
                
                tx->id   = i;
                tx->addr = virt_to_machine(skb->data);
                tx->size = skb->len;
                
                np->stats.tx_bytes += skb->len;
                np->stats.tx_packets++;
            }
	*/
    }
    wmb();
    np->tx->req_prod = requeue_idx;

    /* Rebuild the RX buffer freelist and the RX ring itself. */
    for ( requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++ )
        if ( (unsigned long)np->rxd[i] >= KZERO )
            np->rx->ring[requeue_idx++].req.id = i;

    wmb();                
    np->rx->req_prod = requeue_idx;

    /* Step 3: All public and private state should now be sane.  Get
     * ready to start sending and receiving packets and give the driver
     * domain a kick because we've probably just requeued some
     * packets.
     */
    np->backend_state = BEST_CONNECTED;
    notify_via_evtchn(status->evtchn);  
/**/
   network_tx_buf_gc(np);
    network_alloc_rx_buffers(np);
/**/
/*
	IS there a plan 9 thing we should do here?
    if ( np->user_state == UST_OPEN )
        netif_start_queue(dev);
*/

    iunlock(&np->tx_lock);
    iunlock(&np->rx_lock);
}


static void vif_show(struct Ctlr *np)
{
#ifdef DEBUG
    if (np) {
        IPRINTK("<(%p):vif handle=%d %s(%s) evtchn=%d irq=%d tx=%p rx=%p>\n",
               np, np->handle,
               be_state_name[np->backend_state],
               np->user_state ? "open" : "closed",
               np->evtchn,
               np->irq,
               np->tx,
               np->rx);
    } else {
        IPRINTK("<vif NULL>\n");
    }
#endif
}

/* Send a connect message to xend to tell it to bring up the interface. */
static void send_interface_connect(struct Ctlr *np)
{
    ctrl_msg_t cmsg = {
        .type    = CMSG_NETIF_FE,
        .subtype = CMSG_NETIF_FE_INTERFACE_CONNECT,
        .length  = sizeof(netif_fe_interface_connect_t),
    };
    netif_fe_interface_connect_t *msg = (void*)cmsg.msg;

    DPRINTK(">\n"); vif_show(np); 
    msg->handle = np->handle;
    msg->tx_shmem_frame = xen_mm_mfn(np->tx) >> PGSHIFT;
    msg->rx_shmem_frame = xen_mm_mfn(np->rx) >> PGSHIFT;
        
    /* Tell the controller to bring up the interface. */
    ctrl_if_send_message_block(&cmsg, nil, 0, 0);
    print("CONNECT: message sent. Set something to 0 just to see\n");
    /*    np->tx->ring[0].req.id = 0;*/
    print("CONNECT: set it\n");
    
    DPRINTK("<\n");
}

/* Send a driver status notification to the domain controller. */
static int send_driver_status(int ok)
{
    int err;
    ctrl_msg_t cmsg = {
        .type    = CMSG_NETIF_FE,
        .subtype = CMSG_NETIF_FE_DRIVER_STATUS,
        .length  = sizeof(netif_fe_driver_status_t),
    };
    netif_fe_driver_status_t *msg = (void*)cmsg.msg;

    msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN);
    err = ctrl_if_send_message_block(&cmsg, nil, 0, 0);
    return err;
}

/* Stop network device and free tx/rx queues and irq.
 */
static void vif_release(struct Ctlr *np)
{
    /* Stop old i/f to prevent errors whilst we rebuild the state. */
  ilock(&np->tx_lock);
  ilock(&np->rx_lock);
  //    netif_stop_queue(np->dev);
    /* np->backend_state = BEST_DISCONNECTED; */
    iunlock(&np->rx_lock);
    iunlock(&np->tx_lock);
    
    /* Free resources. */
    if(np->tx != nil){
      /* leave leak here ...
        * this will matter at some point, I want plan 9 experts to 
        * get all the ins/outs of release done right. 
        free_irq(np->irq, np->dev);
        unbind_evtchn_from_irq(np->evtchn);
        free_page((unsigned long)np->tx);
        free_page((unsigned long)np->rx);
      */
        np->irq = 0;
        np->evtchn = 0;
        np->tx = nil;
        np->rx = nil;
    }
}

/* Release vif resources and close it down completely.
 */
static void vif_close(struct Ctlr *np)
{
    DPRINTK(">\n"); vif_show(np);
    WPRINTK("Unexpected netif-CLOSED message in state %s\n",
            be_state_name[np->backend_state]);
    vif_release(np);
    np->backend_state = BEST_CLOSED;
    /* todo: take dev down and free. */
    vif_show(np); DPRINTK("<\n");
}

/* Move the vif into disconnected state.
 * Allocates tx/rx pages.
 * Sends connect message to xend.
 * N.B. On plan 9 we don't expect this to get called (yet)
 */
static void vif_disconnect(struct Ctlr *np){
    DPRINTK(">\n");
    /* LEAK
    if(np->tx) free_page((unsigned long)np->tx);
    if(np->rx) free_page((unsigned long)np->rx);
    */
    print("There's still a leak in vif_disconnect\n");
    // Before this np->tx and np->rx had better be null.
    np->tx = (netif_tx_interface_t *)xspanalloc(BY2PG, BY2PG, 0);
    np->rx = (netif_rx_interface_t *)xspanalloc(BY2PG, BY2PG, 0);
    memset(np->tx, 0, BY2PG);
    memset(np->rx, 0, BY2PG);
    LOG(dp("CONNECT: np->tx is %p, np->rx is %p\n", np->tx, np->rx);)
      LOG(dp("CONNECT: MFN of tx is 0x%lx, PADDR is 0x%lx\n", 
	     xen_mm_mfn(np->tx), PADDR(np->tx));)
      np->backend_state = BEST_DISCONNECTED;
    
    send_interface_connect(np);
    vif_show(np); DPRINTK("<\n");
}

/* Begin interface recovery.
 *
 * NB. Whilst we're recovering, we turn the carrier state off.  We
 * take measures to ensure that this device isn't used for
 * anything.  We also stop the queue for this device.  Various
 * different approaches (e.g. continuing to buffer packets) have
 * been tested but don't appear to improve the overall impact on
 * TCP connections.
 *
 * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
 * is initiated by a special "RESET" message - disconnect could
 * just mean we're not allowed to use this interface any more.
 */
static void 
vif_reset(
    struct Ctlr *np)
{
    DPRINTK(">\n");
    IPRINTK("Attempting to reconnect network interface: handle=%d\n",
            np->handle);    
    vif_release(np);
    vif_disconnect(np);
    vif_show(np); DPRINTK("<\n");
}

/* Move the vif into connected state.
 * Sets the mac and event channel from the message.
 * Binds the irq to the event channel.
 */
static void
vif_connect(
    struct Ctlr *np, netif_fe_interface_status_t *status)
{
    static int create_netdev(int, struct Ether **);
    int xenfrontendreset(Ether *ether);
    struct Ether *dev = np->dev;
    DPRINTK(">\n");
    memmove(dev->ea, status->mac, sizeof(dev->ea));
    memmove(dev->addr, status->mac, sizeof(dev->addr));
    create_netdev(status->handle, nil);
    network_connect(dev, status);
    np->evtchn = status->evtchn;
    np->irq = bind_evtchn_to_irq(np->evtchn, 0);
    intrenable(np->irq, interrupt, dev, 0, "xen network");
    unmask_evtchn(np->evtchn);
    netctrl_connected_count();


    netctrl_connected_count();
    //    vif_wake(dev);
    addethercard("xenfrontend", xenfrontendreset);
    vif_show(np); DPRINTK("<\n");
}

/* Get the target interface for a status message.
 * Creates the interface when it makes sense.
 * The returned interface may be null when there is no error.
 *
 * @param status status message
 * @param np return parameter for interface state
 * @return 0 on success, error code otherwise
 */
static int 
target_vif(
    netif_fe_interface_status_t *status, struct Ctlr **np)
{
static	int create_netdev(int, struct Ether **);
    int err = 0;
    struct Ether *dev;

    DPRINTK("> handle=%d\n", status->handle);
    if ( status->handle < 0 )
    {
        err = -1;
        goto exit;
    }

    if ( (dev = find_dev_by_handle(status->handle)) != nil )
        goto exit;

    if ( status->status == NETIF_INTERFACE_STATUS_CLOSED )
        goto exit;
    if ( status->status == NETIF_INTERFACE_STATUS_CHANGED )
        goto exit;

    /* It's a new interface in a good state - create it. */
    DPRINTK("> create device...\n");
    if ( (err = create_netdev(status->handle, &dev)) != 0 )
        goto exit;

    netctrl.interface_n++;

  exit:
    if ( np != nil )
        *np = ((dev && !err) ? dev->ctlr : nil);
    DPRINTK("< err=%d\n", err);
    return err;
}


/* Handle an interface status message. */
static void netif_interface_status(netif_fe_interface_status_t *status)
{
    int err;
    struct Ctlr *np = nil;
    
    DPRINTK(">\n");
    DPRINTK("> status=%s handle=%ud\n",
            status_name[status->status], status->handle);

    if ( (err = target_vif(status, &np)) != 0 )
    {
        WPRINTK("Invalid netif: handle=%ud, err %d\n", status->handle, err);
        return;
    }

    if ( np == nil )
    {
        DPRINTK("> no vif\n");
        return;
    }

    DPRINTK(">\n"); vif_show(np);

    switch ( status->status )
    {
    case NETIF_INTERFACE_STATUS_CLOSED:
        switch ( np->backend_state )
        {
        case BEST_CLOSED:
        case BEST_DISCONNECTED:
        case BEST_CONNECTED:
            vif_close(np);
            break;
        }
        break;

    case NETIF_INTERFACE_STATUS_DISCONNECTED:
        switch ( np->backend_state )
        {
        case BEST_CLOSED:
            vif_disconnect(np);
            break;
        case BEST_DISCONNECTED:
        case BEST_CONNECTED:
            vif_reset(np);
            break;
        }
        break;

    case NETIF_INTERFACE_STATUS_CONNECTED:
        switch ( np->backend_state )
        {
        case BEST_CLOSED:
            WPRINTK("Unexpected netif status %s in state %s\n",
                    status_name[status->status],
                    be_state_name[np->backend_state]);
            vif_disconnect(np);
            vif_connect(np, status);
            break;
        case BEST_DISCONNECTED:
            vif_connect(np, status);
            break;
        }
        break;

    case NETIF_INTERFACE_STATUS_CHANGED:
        /*
         * The domain controller is notifying us that a device has been
         * added or removed.
         */
        break;

    default:
        WPRINTK("Invalid netif status code %d\n", status->status);
        break;
    }
    vif_show(np);
    DPRINTK("<\n");
}

/*
 * Initialize the network control interface. 
 */
static void netif_driver_status(netif_fe_driver_status_t *status)
{
    DPRINTK("> status=%d\n", status->status);
    netctrl.iface_up = status->status;
    //netctrl.interface_n = status->max_handle;
    //netctrl.connected_n = 0;
    netctrl_connected_count();
}

/** Create a network device.
 * @param handle device handle
 * @param val return parameter for created device
 * @return 0 on success, error code otherwise
 */
static int create_netdev(int handle, struct Ctlr ** /*val*/)
{
   int i;
    struct Ctlr *np = &controllers[handle];

    dp("XENFE: create_netdev %d\n", handle);
    np->backend_state = BEST_CLOSED;
    np->user_state    = UST_CLOSED;
    np->handle        = handle;
    
//	np->tx_lock = np->rx_lock = 0;

    /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */

    for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ )
        np->txd[i] = (void *)(i+1);

    for ( i = 0; i <=  NETIF_RX_RING_SIZE; i++ )
        np->rxd[i] = (void *)(i+1);

	LOG(dp("XENFE: all done set up the rings\n");)
	return 0;
}


/*
 * Initialize the network control interface. Set the number of network devices
 * and create them.
 */
static void netif_driver_status_change(
    netif_fe_driver_status_changed_t *status)
{
    int err;
    int i;
    LOG(dp("XENFE: netif_driverr_status_chnage # ifaces %d\n", netctrl.interface_n );)
    LOG(dp("XENFE: status says %d interfaces\n", status->nr_interfaces);)
    
    netctrl.interface_n = status->nr_interfaces;
    netctrl.connected_n = 0;
    netctrl.iface_up = status->status;

    /* leave this here? Probably not*/
    for ( i = 0; i < netctrl.interface_n; i++ )
    {
        if ( (err = create_netdev(i, nil)) != 0 )
        {
            netctrl_err(err);
	   LOG(dp("create netdev failed...\n");)
            break;
        }
    }
    /**/
    netctrl_connected_count();

    LOG(dp("XENFE DONE driver status change\n");)
}

static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long /*id*/)
{
    int respond = 1;
	LOG(dp("XENFE: xenfe netif_ctrlif_rx subtype %d\n", msg->type);)
    switch ( msg->subtype )
    {
    case CMSG_NETIF_FE_INTERFACE_STATUS:
	dp("XENFE: got an iface status changed message, length %d want %d\n",
			msg->length, sizeof(netif_fe_interface_status_changed_t));
        if ( msg->length != 18 ) /* SHITsizeof(netif_fe_interface_status_changed_t) )*/
            goto error;

        netif_interface_status((netif_fe_interface_status_t *)
                               &msg->msg[0]);
	dp("Done iface status\n");
        break;

    case CMSG_NETIF_FE_DRIVER_STATUS:
	dp("XENFE: got a driver status changed message, len %d, want %d\n", 
		msg->length, sizeof(netif_fe_driver_status_changed_t));
        if ( msg->length != sizeof(netif_fe_driver_status_changed_t) )
            goto error;
	dp("Call netif_driver_status_change ...\n");
        netif_driver_status((netif_fe_driver_status_t *)
                            &msg->msg[0]);
	dp("Done driver status\n");

        /* Message is a response */
        respond = 0;
        break;

    error:
    default:
        msg->length = 0;
        break;
    }

    if ( respond )
        ctrl_if_send_response(msg);
}


static int netif_init(void)
{
    ctrl_msg_t                       cmsg;
    netif_fe_driver_status_changed_t st;
    int err = 0;
    //Rendez r;
/*
    if ( (start_info.flags & SIF_INITDOMAIN) ||
         (start_info.flags & SIF_NET_BE_DOMAIN) )
        return 0;
*/

    print("XENFE:Initialising Xen virtual ethernet frontend driver");


    netctrl_init();

    ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
                                    CALLBACK_IN_BLOCKING_CONTEXT);

    /* Send a driver-UP notification to the domain controller. */
    cmsg.type      = CMSG_NETIF_FE;
    cmsg.subtype   = CMSG_NETIF_FE_DRIVER_STATUS;
    cmsg.length    = sizeof(netif_fe_driver_status_changed_t);
    st.status      = NETIF_DRIVER_STATUS_UP;
    st.nr_interfaces = 0;
    memmove(cmsg.msg, &st, sizeof(st));
    ctrl_if_send_message_block(&cmsg, nil, 0, 0);
    return err;
}

static void
initxen(Ctlr* ctlr)
{
	LOG(dp("XENFE: xenfe init %p\n", ctlr);)
	USED(ctlr);
}


static uchar*
startdma(Ether* ether, ulong address)
{
	print("XENFE:xenfe startdma %p %ulx\n", ether, address);
	return 0;
}

static void
promiscuous(void* arg, int on)
{
	print("XENFE:xenfe promisc %p %d\n", arg, on);
}

static void
multicast(void* arg, uchar *addr, int on)
{
	print("XENFE:xenfe mc %p %p %d\n", arg, addr, on);
}

static void
attach(Ether* ether)
{
	Ctlr *ctlr;
//	int wait_i, wait_n = 20;
//	int err;

	dp("XENFE: etherxenfrontend: attach ether port 0x%lux ctrl %p\n", 
		ether->port, ether->ctlr);
    /* Wait for all interfaces to be connected. */
	while (netctrl_connected() < 0) {
		LOG(dp("XENFE: waiting for one connect\n");)
		HYPERVISOR_yield();
	}
	ctlr = ether->ctlr;
	ilock(ctlr);
	if(ctlr->attached){
		iunlock(ctlr);
		return;
	}

	ctlr->attached = 1;
	iunlock(ctlr);
	LOG(dp("XENFE: attach done for port %d\n", ether->port);)
}

static void
statistics(Ether* ether)
{
	LOG(dp("XENFE: xenfe statistics %p\n", ether);)
	USED(ether);
}

static void
txstart(Ether* ether)
{
#undef LOG
#define LOG(a) 
	int len;
	Ctlr *ctlr;
	Block *bp;

	LOG(dp("XENFE: xenfe txstart %p\n", ether);)
	ctlr = ether->ctlr;

	/*
	 * Attempt to top-up the transmit FIFO. If there's room simply
	 * stuff in the packet length (unpadded to a dword boundary), the
	 * packet data (padded) and remove the packet from the queue.
	 * If there's no room post an interrupt for when there is.
	 * This routine is called both from the top level and from interrupt
	 * level and expects to be called with ctlr->wlock already locked
	 * and the correct register window (Wop) in place.
	 */
	for(;;){
		if(ctlr->txbp){
			bp = ctlr->txbp;
			ctlr->txbp = 0;
		}
		else{
			bp = qget(ether->oq);
			if(bp == nil)
				break;
		}

		len = ROUNDUP(BLEN(bp), 2);
		if(! ctlr->tx_full){
		//	int i;
		//	for(i = 0; i < 16; i++)
		//		LOG(dp("0x%x ", bp->rp[i]);)
		//	LOG(dp("\n");)
			memmove(&bp->rp[6], ether->ea, sizeof(ether->ea));
			network_start_xmit(ctlr, bp->rp, len);

			freeb(bp);

			ether->outpackets++;
		}
		else{
			ctlr->txbp = bp;
			if(ctlr->txbusy == 0){
				ctlr->txbusy = 1;
			}
			break;
		}
	}
	LOG(dp("txstart: done\n");)
#undef LOG
#define LOG(a) 
}

static void
transmit(Ether* ether)
{
#undef LOG
#define LOG(a) 
	Ctlr *ctlr;

	ctlr = ether->ctlr;

	ilock(ctlr);
	LOG(dp("XENFE: xenfe xmit port %d ctlr %p\n", ether->port, ctlr);)
	txstart(ether);
	iunlock(ctlr);
	LOG(dp("XENFE: transmit done\n");)
#undef LOG
#define LOG(a) 
}


static long
ifstat(Ether* ether, void* a, long n, ulong offset)
{

	char *p;
	int len;
	Ctlr *ctlr;

	LOG(dp("XENFE: ifstat %p, %p, %ld, %ld\n", ether, a, n, offset);)


	if(n == 0)
		return 0;

	ctlr = ether->ctlr;

	ilock(ctlr);
	statistics(ether);
	iunlock(ctlr);

	p = malloc(READSTR);
	len = snprint(p, READSTR, "interrupts: %d\n", ctlr->interrupts);
//	len += snprint(p+len, READSTR-len, "bogusinterrupts: %lud\n", ctlr->bogusinterrupts);
	USED(len);
#ifdef NOT
	if(ctlr->upenabled){
		if(ctlr->upqmax > ctlr->upqmaxhw)
			ctlr->upqmaxhw = ctlr->upqmax;
		len += snprint(p+len, READSTR-len, "up: q %lud i %lud m %d h %d s %lud\n",
			ctlr->upqueued, ctlr->upinterrupts,
			ctlr->upqmax, ctlr->upqmaxhw, ctlr->upstalls);
		ctlr->upqmax = 0;
	}
	if(ctlr->dnenabled){
		if(ctlr->dnqmax > ctlr->dnqmaxhw)
			ctlr->dnqmaxhw = ctlr->dnqmax;
		len += snprint(p+len, READSTR-len, "dn: q %lud i %lud m %d h %d\n",
			ctlr->dnqueued, ctlr->dninterrupts, ctlr->dnqmax, ctlr->dnqmaxhw);
		ctlr->dnqmax = 0;
	}

	snprint(p+len, READSTR-len, "badssd: %lud\n", ctlr->stats[BytesRcvdOk+2]);
#endif
	n = readstr(offset, a, n, p);
	free(p);
	return n;
}

static void
txrxreset(int port)
{
	LOG(dp("XENFE: txrxreset %d\n", port);)
	USED(port);
}


static void
shutdown(Ether *ether)
{
	LOG(dp("XENFE: xenfrontend shutting down %p\n", ether);)
//	resetctlr(ether->ctlr);
	USED(ether);
}

/* this is an experiment ... */
Ether *theEther = nil;

void
xenpoll(void) {
	dp("X");
	if (theEther)
		netif_poll(theEther);
}


int
xenfrontendreset(Ether* ether)
{
//	char *p;
	Ctlr *ctlr;
//	uchar ea[Eaddrlen];
	static int scandone;

	LOG(dp("XENFE: xenfrontendreset!\n");)
	if (ether->ctlrno > 0)
		return -1;
	ctlr = &controllers[ether->ctlrno];

	/*
	 * Clear out the
	 * adapter statistics, clear the statistics logged into ctlr
	 * and enable statistics collection.
	 */
	ilock(ctlr);
	/*
	 * Linkage to the generic ethernet driver.
	 */
	ether->attach = attach;
	ether->transmit = transmit;
	ether->interrupt = interrupt;
	ether->ifstat = ifstat;

	ether->promiscuous = promiscuous;
	ether->multicast = multicast;
	ether->shutdown = shutdown;
	ether->arg = ether;
	ether->ctlr = ctlr;
	ctlr->dev = ether;
	ether->maxmtu = 1514;
	iunlock(ctlr);
	theEther = ether;
	LOG(dp("XENFE: xenfrontendreset: OK\n");)
	return 0;
}

void
etherxenfrontendlink(void)
{
	LOG(dp("XENFE: etherxenfrontendlink!\n");)
	netif_init();
	addethercard("xenfrontend", xenfrontendreset);
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].