#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "ureg.h"
#include "../port/error.h"
#include "../port/sd.h"
#include "../xen/xenblk.h"
#define LOG(a)
Lock io_request_lock;
/******************************************************************************
* arch/xen/drivers/blkif/frontend/vbd.c
*
* Xenolinux virtual block-device driver.
*
* Copyright (c) 2003-2004, Keir Fraser & Steve Hand
* Modifications by Mark A. Williamson are (c) Intel Research Cambridge
*/
/* Information about our VBDs. */
#define MAX_VBDS 64
static int nr_vbds;
static vdisk_t *vbd_info;
static void dumpit(void *x, int size)
{
int i, j;
unsigned char *cp = x;
print("New packet: %p %d bytes\n", x, size);
for(i = 0; i < size; i += 16) {
print("0x%x: ", i);
for(j = 0; j < 16 && (i+j) < size; j++) {
print("%02x ", cp[i+j]);
}
print("\n");
}
print("end of packet\n");
}
static int xlvbd_get_vbd_info(vdisk_t *disk_info)
{
int i;
vdisk_t *buf = mallocalign(BY2PG, BY2PG, 0, 0);
blkif_request_t req;
blkif_response_t rsp;
int nr;
void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
memset(&req, 0, sizeof(req));
req.operation = BLKIF_OP_PROBE;
req.nr_segments = 1;
req.frame_and_sects[0] = xen_mm_mfn(buf) | 7; /* the 7 is icky! */
blkif_control_send(&req, &rsp);
LOG( dp("===> blkif_control-=send returns %d\n", rsp.status);)
dumpit(&rsp, sizeof(rsp));
if ( rsp.status <= 0 )
{
dp( "Could not probe disks (%d)\n", rsp.status);
free(buf);
return -1;
}
if ( (nr = rsp.status) > MAX_VBDS )
nr = MAX_VBDS;
// dumpit(buf, BY2PG);
for(i = 0; i < nr; i++) {
/* gosh we sure use C to abstract away machine details, eh? */
/* well, not in GCC-land any more! */
unsigned char *c = (unsigned char *) buf;
c = &c[i*12];
print("Pointer is %p\n", c);
disk_info[i].capacity = *(blkif_sector_t *) c;
disk_info[i].device = *(blkif_vdev_t *) &c[8];
disk_info[i] .info = *(u16 *)&c[10];
print("Disk %d cap %lld dev %d info 0x%x\n", i,
disk_info[i].capacity, disk_info[i].device, disk_info[i].info);
}
// memmove(disk_info, buf, nr * sizeof(vdisk_t));
free(buf);
return nr;
}
/*
* xlvbd_init_device - initialise a VBD device
* @disk: a vdisk_t describing the VBD
*
* Takes a vdisk_t * that describes a VBD the domain has access to.
* Performs appropriate initialisation and registration of the device.
*
* Care needs to be taken when making re-entrant calls to ensure that
* corruption does not occur. Also, devices that are in use should not have
* their details updated. This is the caller's responsibility.
*/
static int xlvbd_init_device(vdisk_t *xd)
{
// int device = xd->device;
int rc = 0;
unsigned long capacity;
// unsigned char buf[64];
// if ( VDISK_READONLY(xd->info) )
// set_device_ro(device, 1);
/* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
capacity = (unsigned long)xd->capacity;
print("capacity is %lud\n", capacity);
return rc;
}
/*
* Set up all the linux device goop for the virtual block devices (vbd's) that
* we know about. Note that although from the backend driver's p.o.v. VBDs are
* addressed simply an opaque 16-bit device number, the domain creation tools
* conventionally allocate these numbers to correspond to those used by 'real'
* linux -- this is just for convenience as it means e.g. that the same
* /etc/fstab can be used when booting with or without Xen.
*/
int xlvbd_init(void)
{
int i;
/*
* If compiled as a module, we don't support unloading yet. We therefore
* permanently increment the reference count to disallow it.
*/
vbd_info = malloc(MAX_VBDS * sizeof(vdisk_t));
nr_vbds = xlvbd_get_vbd_info(vbd_info);
if ( nr_vbds < 0 )
{
dp("============> nr_vbds is ZERO!\n");
free(vbd_info);
vbd_info = nil;
nr_vbds = 0;
}
else
{
for ( i = 0; i < nr_vbds; i++ ) {
print("======> init device %d\n", i);
xlvbd_init_device(&vbd_info[i]);
}
}
return 0;
}
/******************************************************************************
* arch/xen/drivers/blkif/frontend/main.c
*
* Xenolinux virtual block-device driver.
*
* Copyright (c) 2003-2004, Keir Fraser & Steve Hand
* Modifications by Mark A. Williamson are (c) Intel Research Cambridge
*/
typedef unsigned char byte; /* from linux/ide.h */
#define BLKIF_STATE_CLOSED 0
#define BLKIF_STATE_DISCONNECTED 1
#define BLKIF_STATE_CONNECTED 2
static unsigned int blkif_state = BLKIF_STATE_CLOSED;
static unsigned int blkif_evtchn, blkif_irq;
static int blkif_control_rsp_valid;
static blkif_response_t blkif_control_rsp;
static blkif_ring_t *blk_ring;
static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */
static BLKIF_RING_IDX req_prod; /* Private request producer. */
static blkif_ring_t *blk_ring_rec; /* Private copy of requests, used for
* recovery. Responses not stored here. */
static BLKIF_RING_IDX resp_cons_rec; /* Copy of response consumer, used for
* recovery */
static int recovery = 0; /* "Recovery in progress" flag. Protected
* by the io_request_lock */
/* We plug the I/O ring if the driver is suspended or if the ring is full. */
#define RING_PLUGGED (((req_prod - resp_cons) == BLKIF_RING_SIZE) || \
(blkif_state != BLKIF_STATE_CONNECTED))
/*
* Request queues with outstanding work, but ring is currently full.
* We need no special lock here, as we always access this with the
* io_request_lock held. We only need a small maximum list.
*/
#define MAX_PENDING 8
/* find a plan 9 equivalent
static request_queue_t *pending_queues[MAX_PENDING];
static int nr_pending;
*/
static int sg_operation = -1;
static unsigned long sg_next_sect;
#define DISABLE_SCATTERGATHER() (sg_operation = -1)
static void flush_requests(void)
{
DISABLE_SCATTERGATHER();
blk_ring->req_prod = req_prod;
notify_via_evtchn(blkif_evtchn);
}
/*
* blkif_queue_request
*
* request block io
*
* id: for guest use only.
* operation: BLKIF_OP_{READ,WRITE,PROBE}
* buffer: buffer to read/write into. this should be a
* virtual address in the guest os.
*/
static int blkif_queue_request(unsigned long id,
int operation,
unsigned char * buffer,
unsigned long sector_number,
unsigned short nr_sectors,
unsigned long device)
{
unsigned long buffer_ma = xen_va_to_ma(buffer);
blkif_request_t *req;
unsigned int fsect, lsect;
fsect = (buffer_ma & (BY2PG-1)) >> 9;
lsect = fsect + nr_sectors - 1;
/* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */
if (((buffer_ma & ((1<<9)-1)) != 0) )
panic("buffer not sector-aligned\n");
if ( lsect > 7 )
panic("lsect > 7 in blkif_queue_request\n");
buffer_ma = PPN(buffer_ma);
LOG(dp("buffer_ma is 0x%ulx, fsect 0x%x, lsect 0x%x\n",
buffer_ma, fsect, lsect);)
if ((blkif_state != BLKIF_STATE_CONNECTED) )
return 1;
switch ( operation )
{
case BLKIF_OP_WRITE:
// dumpit(buffer, nr_sectors*512);
case BLKIF_OP_READ:
#ifdef NOT
/* scatter_gather */
if ( (sg_operation == operation) &&
(sg_dev == device) &&
(sg_next_sect == sector_number) )
{
req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod-1)].req;
req->id = id;
req->frame_and_sects[req->nr_segments] =
buffer_ma | (fsect<<3) | lsect;
if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
sg_next_sect += nr_sectors;
else
DISABLE_SCATTERGATHER();
/* Update the copy of the request in the recovery ring. */
blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod - 1)].req
= *req;
return 0;
}
else
#endif
/* ring plugged, eh? I don't think so in Plan 9 ...
if ( RING_PLUGGED )
{
return 1;
}
*/
#ifdef NOT
else
{
sg_operation = operation;
sg_dev = device;
sg_next_sect = sector_number + nr_sectors;
}
#endif
break;
default:
panic("unknown op %d\n", operation);
}
/* Fill out a communications ring structure. */
LOG(dp("Fill out the ring ...\n");)
req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
req->id = id;
req->operation = operation;
req->sector_number = (blkif_sector_t)sector_number;
req->device = device;
req->nr_segments = 1;
req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
req_prod++;
LOG(dp("req_f_a_s is 0x%ulx\n", req->frame_and_sects[0]);)
blk_ring->req_prod = req_prod;
notify_via_evtchn(blkif_evtchn);
LOG(dp("req_prod is now ... %d\n", req_prod);)
/* Keep a private copy so we can reissue requests when recovering. */
/* eh? That can come later!
blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod)].req = *req;
blk_ring_rec->req_prod++;
*/
return 0;
}
static void blkif_int(Ureg *, void *)
{
BLKIF_RING_IDX i;
ilock(&io_request_lock);
if ((blkif_state == BLKIF_STATE_CLOSED || recovery) )
{
LOG( dp("Bailed out\n");)
iunlock(&io_request_lock);
return;
}
LOG(dp(" blkif_int before for loop\n");)
for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
{
blkif_response_t bret;
unsigned char *c = (void *)
&blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
LOG(dp("probe looks like this:\n");)
// dumpit(c, 7);
bret.id = *(ulong *) c;
bret.operation = *(uchar *) &c[4];
bret.status = *(ushort *) &c[5];
LOG(dp("bret id %lud op %d status %d\n", bret.id, bret.operation, bret.status);)
switch ( bret.operation )
{
case BLKIF_OP_READ:
case BLKIF_OP_WRITE:
LOG(dp("interrupt for read or write, ...\n");)
LOG( dp("ID is %lud\n", bret.id);)
LOG(dp("Indicate done-ness ...\n");)
*(unsigned long *)bret.id = 1;
LOG(dp("all done I/O intr for blkif\n");)
break;
case BLKIF_OP_PROBE:
memmove(&blkif_control_rsp, &bret, sizeof(bret));
LOG( dp("blkif_int: op probe returns ...\n");)
blkif_control_rsp_valid = 1;
break;
default:
LOG( dp("blkif_in: unkonw op %d\n", bret.operation);)
break;
}
}
resp_cons = i;
resp_cons_rec = i;
// kick_pending_request_queues();
iunlock(&io_request_lock);
}
void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
{
retry:
while ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
{
// halt();
sched();
/*
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
*/
}
ilock(&io_request_lock);
if ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
{
iunlock(&io_request_lock);
goto retry;
}
DISABLE_SCATTERGATHER();
memmove(&blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req, req, sizeof(*req));
memmove(&blk_ring_rec->ring[MASK_BLKIF_IDX(blk_ring_rec->req_prod++)].req,
req, sizeof(*req));
req_prod++;
flush_requests();
iunlock(&io_request_lock);
while ( !blkif_control_rsp_valid )
{
sched();
}
memmove(rsp, &blkif_control_rsp, sizeof(*rsp));
blkif_control_rsp_valid = 0;
}
static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
{
ctrl_msg_t cmsg;
blkif_fe_interface_connect_t diskup;
if ( status->handle != 0 )
{
print( "Status change on unsupported blkif %d\n",
status->handle);
dp("=============> bad status change\n");
return;
}
print("===========> blkif_status_change to %d\n", status->status);
switch ( status->status )
{
case BLKIF_INTERFACE_STATUS_DESTROYED:
print( "Unexpected blkif-DESTROYED message in state %d\n",
blkif_state);
break;
case BLKIF_INTERFACE_STATUS_DISCONNECTED:
if ( blkif_state != BLKIF_STATE_CLOSED )
{
print( "Unexpected blkif-DISCONNECTED message"
" in state %d\n", blkif_state);
print( "VBD driver recovery in progress\n");
/* Prevent new requests being issued until we fix things up. */
ilock(&io_request_lock);
recovery = 1;
blkif_state = BLKIF_STATE_DISCONNECTED;
iunlock(&io_request_lock);
/* Free resources associated with old device channel. */
// free_page((unsigned long)blk_ring);
// free_irq(blkif_irq, NULL);
// unbind_evtchn_from_irq(blkif_evtchn);
}
/* Move from CLOSED to DISCONNECTED state. */
blk_ring = (blkif_ring_t *)xspanalloc(BY2PG, BY2PG, 0);
/* avoid xspanalloc bug */
memset(blk_ring, 0, 4096);
blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
blkif_state = BLKIF_STATE_DISCONNECTED;
/* Construct an interface-CONNECT message for the domain controller. */
cmsg.type = CMSG_BLKIF_FE;
cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT;
cmsg.length = sizeof(blkif_fe_interface_connect_t);
diskup.handle = 0;
diskup.shmem_frame = xen_mm_mfn(blk_ring) >> PGSHIFT;
memmove(cmsg.msg, &diskup, sizeof(diskup));
/* Tell the controller to bring up the interface. */
ctrl_if_send_message_block(&cmsg, nil, 0, 0);
break;
case BLKIF_INTERFACE_STATUS_CONNECTED:
if ( blkif_state == BLKIF_STATE_CLOSED )
{
print( "Unexpected blkif-CONNECTED message"
" in state %d\n", blkif_state);
break;
}
blkif_evtchn = status->evtchn;
print("===========> evtchn for blkif is %d\n", blkif_evtchn);
blkif_irq = bind_evtchn_to_irq(blkif_evtchn, 0);
intrenable(blkif_irq, blkif_int, 0, 0, "blkif");
print("====> enable disk interrupt\n");
if ( recovery )
{
int i;
/* Shouldn't need the io_request_lock here - the device is
* plugged and the recovery flag prevents the interrupt handler
* changing anything. */
/* Reissue requests from the private block ring. */
for ( i = 0;
resp_cons_rec < blk_ring_rec->req_prod;
resp_cons_rec++, i++ )
{
blk_ring->ring[i].req
= blk_ring_rec->ring[MASK_BLKIF_IDX(resp_cons_rec)].req;
}
/* Reset the private block ring to match the new ring. */
memmove(blk_ring_rec, blk_ring, sizeof(*blk_ring));
resp_cons_rec = 0;
/* blk_ring->req_prod will be set when we flush_requests().*/
blk_ring_rec->req_prod = req_prod = i;
wmb();
/* Switch off recovery mode, using a memory barrier to ensure that
* it's seen before we flush requests - we don't want to miss any
* interrupts. */
recovery = 0;
wmb();
/* Kicks things back into life. */
flush_requests();
}
else
{
/* Probe for discs that are attached to the interface. */
dp("======> PROBE\n");
xlvbd_init();
}
blkif_state = BLKIF_STATE_CONNECTED;
/* Kick pending requests. *
ilock(&io_request_lock);
kick_pending_request_queues();
iunlock(&io_request_lock);
*/
print("=========> controller connected\n");
break;
default:
print( "Status change to unknown value %d\n",
status->status);
break;
}
}
static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long)
{
dp("================>blkif_ctrlif_rx subtype %d\n", msg->subtype);
switch ( msg->subtype )
{
case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED:
print("===========> status changed! msg length %d want %d\n",
msg->length, sizeof(blkif_fe_interface_status_changed_t));
if ( msg->length != 12 ) //SHITsizeof(blkif_fe_interface_status_changed_t) )
goto parse_error;
blkif_status_change((blkif_fe_interface_status_changed_t *)
&msg->msg[0]);
break;
#ifdef NOTNOWAY
case CMSG_BLKIF_FE_VBD_STATUS_CHANGED:
update_tq.routine = update_vbds_task;
schedule_task(&update_tq);
break;
#endif
default:
dp("==========> PARSE ERROR!\n");
goto parse_error;
}
ctrl_if_send_response(msg);
return;
parse_error:
dp("============> PARSE ERROR!\n");
msg->length = 0;
ctrl_if_send_response(msg);
}
int xlblk_init(void)
{
ctrl_msg_t cmsg;
blkif_fe_driver_status_changed_t st;
LOG( dp("xlblk_init\n");)
/*
if ( (start_info.flags & SIF_INITDOMAIN)
|| (start_info.flags & SIF_BLK_BE_DOMAIN) )
return 0;
*/
print( "Initialising Xen virtual block device\n");
blk_ring_rec = mallocalign(BY2PG, BY2PG, 0, 0);
memset(blk_ring_rec, 0, sizeof(*blk_ring_rec));
ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
CALLBACK_IN_BLOCKING_CONTEXT);
/* Send a driver-UP notification to the domain controller. */
cmsg.type = CMSG_BLKIF_FE;
cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED;
cmsg.length = sizeof(blkif_fe_driver_status_changed_t);
st.status = BLKIF_DRIVER_STATUS_UP;
memmove(cmsg.msg, &st, sizeof(st));
ctrl_if_send_message_block(&cmsg, nil, 0, 0);
LOG( dp("============>Sent a message to fire it up\n");)
/*
* We should read 'nr_interfaces' from response message and wait
* for notifications before proceeding. For now we assume that we
* will be notified of exactly one interface.
*/
/* put this in verify later! */
dp("============> DONE xlblk_init\n");
#ifdef NOT
while ( blkif_state != BLKIF_STATE_CONNECTED )
{
print("Sched() until it is ready\n");
sched();
}
#endif
return 0;
}
/* end of the Xen code */
extern SDifc sdxenifc;
typedef struct Ctlr Ctlr;
struct Ctlr {
int readonly;
SDev* sdev;
};
/* we don't do more than on page? or do we? */
static long
sdxenbio(SDunit* unit, int, int write, void* data, long nb, long bno)
{
long rlen;
unsigned char *pages, *datap = data;
int offset, nsects;
if (unit->subno) {
//print("%s: %d blocks, bno 0x%x\n", write ? "W" : "R", nb, bno);
}
pages = mallocalign(BY2PG, BY2PG, 0, 0);
/* do waserror()/poperror() thing */
if (waserror()) {
dp("free pages for error\n");
free(pages);
nexterror();
}
LOG(dp("sdxenbio, pages %p\n", pages);)
/* eek. copy data */
if (write){
// dumpit(data, nb*512);
}
LOG(dp("sdxenbio: unit %p, write %d, data %p, nb %lud, bno 0x%lux\n",
unit, write, data, nb, bno);)
USED(unit);
rlen = 0;
while(nb) {
int done;
/* figure out the nsects given the offset into the page */
offset = bno & 7;
nsects = 8 - offset;
if (nsects > nb)
nsects = nb;
if (write)
memmove(pages + offset*512, datap, nsects * 512);
/* queue a request for this page, for nsects sectors */
LOG(dp("queue page %p, nsects %d, offset 0x%ux, bno 0x%lux\n", pages, nsects, offset, bno);)
/**/
done = 0;
blkif_queue_request((unsigned long) &done,
write? BLKIF_OP_WRITE : BLKIF_OP_READ,
(void *) ((unsigned long)pages +
offset*512),
bno,
nsects,
vbd_info[unit->subno].device);
while (! done)
sched();
/**/
/* if not ready, sched() until so */
nb -= nsects;
LOG(dp("FIXME rlen computed wrong\n");)
if (! write)
memmove(datap, pages+offset * 512, nsects * 512);
datap += nsects * 512;
rlen += nsects * 512;
bno += nsects;
}
if (! write){
// dumpit(data, rlen);
}
LOG(dp("ALL DONE sdxen bio!\n");)
poperror();
free(pages);
return rlen;
}
static int
sdxenrio(SDreq*)
{
dp("sdxenrio will return -1\n");
/*
* Useful at some point?
*/
return -1;
}
static int
sdxenonline(SDunit* unit)
{
LOG(dp("sdxenonline\n");)
/*
* Set the unit sizes to whatever Xen gives you.
*/
//print("unit%d: nr_vbds %d\n", unit->subno, nr_vbds);
if(nr_vbds <= 0 || unit->subno >= nr_vbds)
return 0;
unit->sectors = vbd_info[unit->subno].capacity;
unit->secsize = 512;
// if (VDISK_READONLY(vbd_info[0].info))
// unit->wpunit = 1;
return 1;
}
static int
sdxenenable(SDev*)
{
dp("sdxenenable\n");
/*
* Stuff here to hook to interrupts, etc.
*/
return 1;
}
int
sdxenverify(SDunit* unit)
{
/*
* It's always there, right?
*/
print("sdxenverify+%d: nr %d\n", unit->subno, nr_vbds);
while (blkif_state != BLKIF_STATE_CONNECTED)
{
dp("========> sched until bllkif is ready\n");
sched();
}
dp("=========> BLKIF ready\n");
print("sdxenverify-%d: nr %d\n", unit->subno, nr_vbds);
if(nr_vbds <= 0 || unit->subno >= nr_vbds)
return 0;
return 1;
}
static SDev*
sdxenid(SDev* sdev)
{
/*
* This could maybe be simpler if you know
* how many devices, and so on in advance.
* Then there might not be any need for the
* sdscsi routines to be included at all.
* If there's only one then you could just
* do something like:
char name[32];
snprint(name, sizeof(name), "sdX");
kstrdup(&sdev->name, name);
return;
*/
dp("sdxenid\n");
/*
* scsiid restricts the max. number of units
* to 16 (see below in sdxenpnp).
*/
return scsiid(sdev, &sdxenifc);
}
static SDev*
sdxenpnp(void)
{
Ctlr *ctlr;
SDev *sdev;
print("sdxenpnp\n");
/*
* Probably don't need a Ctlr struct at all,
* you may not have any state that isn't all held
* in the SDev and SDunit structs.
*/
if((ctlr = malloc(sizeof(Ctlr))) == nil)
return nil;
if((sdev = malloc(sizeof(SDev))) == nil){
free(ctlr);
return nil;
}
sdev->ifc = &sdxenifc;
sdev->ctlr = ctlr;
sdev->nunit = 16;
xlblk_init();
if(sdev->nunit > 16)
sdev->nunit = 16;
ctlr->sdev = sdev;
print("sdxenpnp returns %p, %d\n", sdev, sdev->nunit);
return sdev;
}
SDifc sdxenifc = {
"sdxen", /* name */
sdxenpnp, /* pnp */
nil, /* legacy */
sdxenid, /* id */
sdxenenable, /* enable */
nil, /* disable */
sdxenverify, /* verify */
sdxenonline, /* online */
sdxenrio, /* rio */
nil, /* rctl */
nil, /* wctl */
sdxenbio, /* bio */
nil, /* probe */
nil, /* clear */
nil, /* stat */
};
|