int dm_io_async_bvec(unsigned int num_regions, #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26) struct dm_io_region *where, #else struct io_region *where, #endif int rw, struct bio_vec *bvec, io_notify_fn fn, void *context) { struct dm_io_request iorq; iorq.bi_rw = rw; iorq.mem.type = DM_IO_BVEC; iorq.mem.ptr.bvec = bvec; iorq.notify.fn = fn; iorq.notify.context = context; iorq.client = flashcache_io_client; return dm_io(&iorq, num_regions, where, NULL); }
The user must set up an io_region structure to describe the desired location of the I/O. Each io_region indicates a block-device along with the starting sector and size of the region.
但是不同的内核版本io_region 结构体的表示不同。
2.6.26以后的版本用dm_io_region表示如下:
struct dm_io_region { struct block_device *bdev; sector_t sector; sector_t count; /* If this is zero the region is ignored. */ };
2.6.26之前的版本用io_region表示如下:
struct io_region { struct block_device *bdev; sector_t sector; sector_t count; };
虽然形式不同,但是里面的内容是一样的,都含有一个指向block_device的指针,以及区域的起始扇区和区域的大小。
bio_vec结构体如下:
struct bio_vec { struct page *bv_page;//指向段的页框中页描述符的指针 unsigned int bv_len;//段的字节长度 unsigned int bv_offset;//页框中段数据的偏移量 };
io_notify_fn是一个回调函数指针的类型,其定义如下:
typedef void (*io_notify_fn)(unsigned long error, void *context);
The "error" parameter in this callback,, is a bitset (instead of a simple error value). In the case of an write-I/O to multiple regions, this bitset allows dm-io to indicate success or failure on each individual region.
dm_io_request结构体如下:(通过dm_io_request结构来封装请求的类型,如果设置了dm_io_notify.fn则是异步IO,否则是同步IO。)
struct dm_io_request { int bi_rw; /* READ|WRITE - not READA */ struct dm_io_memory mem; /* Memory to use for io */ struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */ struct dm_io_client *client; /* Client memory handler */ };
从上面的分析可以看出,dm_io_async_bvec通过io_notify_fn fn来确定是同步操作还是异步操作,通过bio_vec *bvec确定dm_io的服务类型,dm_io有3种服务类型:
//The first I/O service type takes a list of memory pages as the data buffer for the I/O, along with an offset into the first page. struct page_list { struct page_list *next; struct page *page; }; int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw, struct page_list *pl, unsigned int offset, unsigned long *error_bits); int dm_io_async(unsigned int num_regions, struct io_region *where, int rw, struct page_list *pl, unsigned int offset, io_notify_fn fn, void *context);
//The second I/O service type takes an array of bio vectors as the data buffer for the I/O. This service can be handy if the caller has a pre-assembled bio, but wants to direct different portions of the bio to different devices. int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw, struct bio_vec *bvec, unsigned long *error_bits); int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw, struct bio_vec *bvec, io_notify_fn fn, void *context);
//The third I/O service type takes a pointer to a vmalloc'd memory buffer as the data buffer for the I/O. This service can be handy if the caller needs to do I/O to a large region but doesn't want to allocate a large number of individual memory pages. int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw, void *data, unsigned long *error_bits); int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw, void *data, io_notify_fn fn, void *context);
dm_io_async_bvec通过dm_io_request封装请求之后,确定了请求的各种类型,然后由dm_io()函数来完成操作。dm_io函数如下:
int dm_io(struct dm_io_request *io_req, unsigned num_regions, struct dm_io_region *where, unsigned long *sync_error_bits) { int r; struct dpages dp; r = dp_init(io_req, &dp); if (r) return r; if (!io_req->notify.fn) return sync_io(io_req->client, num_regions, where, io_req->bi_rw, &dp, sync_error_bits); return async_io(io_req->client, num_regions, where, io_req->bi_rw, &dp, io_req->notify.fn, io_req->notify.context); }
dpages结构体如下:
struct dpages { void (*get_page)(struct dpages *dp, struct page **p, unsigned long *len, unsigned *offset);//是一种函数指针的形式,根据参数获取某一区域,区域类型由上下文确定 void (*next_page)(struct dpages *dp);//同样是函数指针的形式,根据参数获取某一区域的下一区域 unsigned context_u; void *context_ptr;//私有数据成员,可根据上下文确定其类型,也即上面的区域类型 };
dpages结构体provide an abstraction for getting a new destination page for io.
dp_init()函数如下:
static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
{
/* Set up dpages based on memory type */
switch (io_req->mem.type) {
case DM_IO_PAGE_LIST:
list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
break;
case DM_IO_BVEC: //可以看到我们的io_req->mem.type是此种类型
bvec_dp_init(dp, io_req->mem.ptr.bvec);
break;
case DM_IO_VMA:
vm_dp_init(dp, io_req->mem.ptr.vma);
break;
case DM_IO_KMEM:
km_dp_init(dp, io_req->mem.ptr.addr);
break;
default:
return -EINVAL;
}
return 0;
}
bvce_dp_init()函数如下:
static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec) { dp->get_page = bvec_get_page;//获取某一个bio_vec dp->next_page = bvec_next_page;//获取下一个bio_vec dp->context_ptr = bvec;//确定区域类型为bio_vec }
bvec_get_page()函数如下:
static void bvec_get_page(struct dpages *dp, struct page **p, unsigned long *len, unsigned *offset) { struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; *p = bvec->bv_page; *len = bvec->bv_len; *offset = bvec->bv_offset; }
bvec_next_page()函数如下:
static void bvec_next_page(struct dpages *dp) { struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr; dp->context_ptr = bvec + 1; }
处理完dm_io的服务类型之后,然后根据io_req->notify.fn是否设置,来确定dm_io的操作类型是同步的还是异步的。
同步操作调用sync_io;异步操作调用async_io。
sync_io()函数如下:
static int sync_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, int rw, struct dpages *dp, unsigned long *error_bits) { struct io io;//将dm_io_request请求进一步封装 if (num_regions > 1 && (rw & RW_MASK) != WRITE) {//dm_io不能读多个io_region WARN_ON(1); return -EIO; } retry: io.error_bits = 0; io.eopnotsupp_bits = 0; atomic_set(&io.count, 1); /* see dispatch_io() */ io.sleeper = current; io.client = client; dispatch_io(rw, num_regions, where, dp, &io, 1); while (1) { set_current_state(TASK_UNINTERRUPTIBLE); if (!atomic_read(&io.count)) break; io_schedule(); } set_current_state(TASK_RUNNING); if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { rw &= ~(1 << BIO_RW_BARRIER); goto retry; } if (error_bits) *error_bits = io.error_bits; return io.error_bits ? -EIO : 0; }
async_io()函数如下:
static int async_io(struct dm_io_client *client, unsigned int num_regions, struct dm_io_region *where, int rw, struct dpages *dp, io_notify_fn fn, void *context) { struct io *io;//之所以加入struct io,是为了将上面的dm_io_request重新封装,加入线程,以便io分发和处理 if (num_regions > 1 && (rw & RW_MASK) != WRITE) {//Dm-io can read from one io_region or write to one or more io_regions. Writes to multiple regions are specified by an array of io_region structures,dm_io不能读多个io_region WARN_ON(1); fn(1, context); return -EIO; } io = mempool_alloc(client->pool, GFP_NOIO); io->error_bits = 0; io->eopnotsupp_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ io->sleeper = NULL; io->client = client; io->callback = fn; io->context = context; dispatch_io(rw, num_regions, where, dp, io, 0); return 0; }