blkback/blktap
{
int i, mmap_pages;
int rc = 0;
if (!xen_pv_domain())
return -ENODEV;
mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
blkif_reqs, GFP_KERNEL);
pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
mmap_pages, GFP_KERNEL);
pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
if (blkback_pagemap_init(mmap_pages))
goto out_of_memory;
if (!pending_reqs || !pending_grant_handles || !pending_pages) {
rc = -ENOMEM;
goto out_of_memory;
}
for (i = 0; i < mmap_pages; i++)
pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
rc = blkif_interface_init();
if (rc)
goto failed_init;
memset(pending_reqs, 0, sizeof(pending_reqs));
INIT_LIST_HEAD(&pending_free);
for (i = 0; i < blkif_reqs; i++)
list_add_tail(&pending_reqs[i].free_list, &pending_free);
rc = blkif_xenbus_init();
if (rc)
goto failed_init;
return 0;
out_of_memory:
printk(KERN_ERR "%s: out of memory\n", __func__);
failed_init:
kfree(pending_reqs);
kfree(pending_grant_handles);
free_empty_pages_and_pagevec(pending_pages, mmap_pages);
return rc;
}
据说4K的sector会大大提高硬盘的IO性能,我猜是不是因为这个原因,所以一共分配了blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST个page,为了支持4k大小的sector读写 (??不太确定)
/* Unique identifier for this interface. */
domid_t domid;
unsigned int handle;
/* Physical parameters of the comms window. */
unsigned int irq;
/* Comms information. */
enum blkif_protocol blk_protocol;
union blkif_back_rings blk_rings;
struct vm_struct *blk_ring_area;
/* The VBD attached to this interface. */
struct vbd vbd;
/* Back pointer to the backend_info. */
struct backend_info *be;
/* Private fields. */
spinlock_t blk_ring_lock;
atomic_t refcnt;
wait_queue_head_t wq;
struct task_struct *xenblkd;
unsigned int waiting_reqs;
struct request_queue *plug;
/* statistics */
unsigned long st_print;
int st_rd_req;
int st_wr_req;
int st_oo_req;
int st_br_req;
int st_rd_sect;
int st_wr_sect;
wait_queue_head_t waiting_to_free;
grant_handle_t shmem_handle;
grant_ref_t shmem_ref;
} blkif_t;
.name = "vbd",
.owner = THIS_MODULE,
.ids = blkback_ids,
.probe = blkback_probe,
.remove = blkback_remove,
.otherend_changed = frontend_changed
};
int blkif_xenbus_init(void)
{
return xenbus_register_backend(&blkback);
}
blkif_t *blkif;
u64 id;
int nr_pages;
atomic_t pendcnt;
unsigned short operation;
int status;
struct list_head free_list;
} pending_req_t;
.name = "vbd",
.owner = THIS_MODULE,
.ids = blkback_ids,
.probe = blkback_probe,
.remove = blkback_remove,
.otherend_changed = frontend_changed
};
int blkif_xenbus_init(void)
{
return xenbus_register_backend(&blkback);
}
const struct xenbus_device_id *id)
{
int err;
struct backend_info *be = kzalloc(sizeof(struct backend_info),
GFP_KERNEL);
if (!be) {
xenbus_dev_fatal(dev, -ENOMEM,
"allocating backend structure");
return -ENOMEM;
}
be->dev = dev;
dev_set_drvdata(&dev->dev, be);
be->blkif = blkif_alloc(dev->otherend_id);
if (IS_ERR(be->blkif)) {
err = PTR_ERR(be->blkif);
be->blkif = NULL;
xenbus_dev_fatal(dev, err, "creating block interface");
goto fail;
}
/* setup back pointer */
be->blkif->be = be;
err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
"%s/%s", dev->nodename, "physical-device");
if (err)
goto fail;
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto fail;
return 0;
fail:
DPRINTK("failed");
blkback_remove(dev);
return err;
}
enum xenbus_state frontend_state)
{
struct backend_info *be = dev_get_drvdata(&dev->dev);
int err;
DPRINTK("%s", xenbus_strstate(frontend_state));
switch (frontend_state) {
case XenbusStateInitialising:
if (dev->state == XenbusStateClosed) {
printk(KERN_INFO "%s: %s: prepare for reconnect\n",
__FUNCTION__, dev->nodename);
xenbus_switch_state(dev, XenbusStateInitWait);
}
break;
case XenbusStateInitialised:
case XenbusStateConnected:
/* Ensure we connect even when two watches fire in
close successsion and we miss the intermediate value
of frontend_state. */
if (dev->state == XenbusStateConnected)
break;
err = connect_ring(be);
if (err)
break;
update_blkif_status(be->blkif);
break;
case XenbusStateClosing:
blkif_disconnect(be->blkif);
xenbus_switch_state(dev, XenbusStateClosing);
break;
case XenbusStateClosed:
xenbus_switch_state(dev, XenbusStateClosed);
if (xenbus_dev_is_online(dev))
break;
/* fall through if not online */
case XenbusStateUnknown:
device_unregister(&dev->dev);
break;
default:
xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
frontend_state);
break;
}
}
irq上。
* Callback received when the hotplug scripts have placed the physical-device
* node. Read it and the mode node, and create a vbd. If the frontend is
* ready, connect.
*/
static void backend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
int err;
unsigned major;
unsigned minor;
struct backend_info *be
= container_of(watch, struct backend_info, backend_watch);
struct xenbus_device *dev = be->dev;
int cdrom = 0;
char *device_type;
DPRINTK("");
err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
&major, &minor);
if (XENBUS_EXIST_ERR(err)) {
/* Since this watch will fire once immediately after it is
registered, we expect this. Ignore it, and wait for the
hotplug scripts. */
return;
}
if (err != 2) {
xenbus_dev_fatal(dev, err, "reading physical-device");
return;
}
if ((be->major || be->minor) &&
((be->major != major) || (be->minor != minor))) {
printk(KERN_WARNING
"blkback: changing physical device (from %x:%x to "
"%x:%x) not supported.\n", be->major, be->minor,
major, minor);
return;
}
be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
if (IS_ERR(be->mode)) {
err = PTR_ERR(be->mode);
be->mode = NULL;
xenbus_dev_fatal(dev, err, "reading mode");
return;
}
device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
if (!IS_ERR(device_type)) {
cdrom = strcmp(device_type, "cdrom") == 0;
kfree(device_type);
}
if (be->major == 0 && be->minor == 0) {
/* Front end dir is a number, which is used as the handle. */
char *p = strrchr(dev->otherend, '/') + 1;
long handle = simple_strtoul(p, NULL, 0);
be->major = major;
be->minor = minor;
err = vbd_create(be->blkif, handle, major, minor,
(NULL == strchr(be->mode, 'w')), cdrom);
if (err) {
be->major = be->minor = 0;
xenbus_dev_fatal(dev, err, "creating vbd structure");
return;
}
err = xenvbd_sysfs_addif(dev);
if (err) {
vbd_free(&be->blkif->vbd);
be->major = be->minor = 0;
xenbus_dev_fatal(dev, err, "creating sysfs entries");
return;
}
/* We're potentially connected now */
update_blkif_status(be->blkif);
}
}
{
int err;
char name[TASK_COMM_LEN];
/* Not ready to connect? */
if (!blkif->irq || !blkif->vbd.bdev)
return;
/* Already connected? */
if (blkif->be->dev->state == XenbusStateConnected)
return;
/* Attempt to connect: exit if we fail to. */
connect(blkif->be);
if (blkif->be->dev->state != XenbusStateConnected)
return;
err = blkback_name(blkif, name);
if (err) {
xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
return;
}
err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
if (err) {
xenbus_dev_error(blkif->be->dev, err, "block flush");
return;
}
invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
if (IS_ERR(blkif->xenblkd)) {
err = PTR_ERR(blkif->xenblkd);
blkif->xenblkd = NULL;
xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
}
}
{
blkif_t *blkif = arg;
struct vbd *vbd = &blkif->vbd;
blkif_get(blkif);
if (debug_lvl)
printk(KERN_DEBUG "%s: started\n", current->comm);
while (!kthread_should_stop()) {
if (try_to_freeze())
continue;
if (unlikely(vbd->size != vbd_size(vbd)))
vbd_resize(blkif);
wait_event_interruptible(
blkif->wq,
blkif->waiting_reqs || kthread_should_stop());
wait_event_interruptible(
pending_free_wq,
!list_empty(&pending_free) || kthread_should_stop());
blkif->waiting_reqs = 0;
smp_mb(); /* clear flag *before* checking for work */
if (do_block_io_op(blkif))
blkif->waiting_reqs = 1;
unplug_queue(blkif);
if (log_stats && time_after(jiffies, blkif->st_print))
print_stats(blkif);
}
if (log_stats)
print_stats(blkif);
if (debug_lvl)
printk(KERN_DEBUG "%s: exiting\n", current->comm);
blkif->xenblkd = NULL;
blkif_put(blkif);
return 0;
}