Commit aae5e04f authored by Artur Paszkiewicz's avatar Artur Paszkiewicz Committed by Tomasz Zawadzki
Browse files

module/raid: raid_io handling during rebuild



Handle I/Os for the processed (rebuilt) range differently than for the
unprocessed range. Use a copy of raid_bdev_io_channel, corresponding to
the processed range, and assign it to the I/Os for this range. I/Os that
span both ranges will be queued by the bdev layer if the process window
range is quiesced. Otherwise, split them and handle separately.

Change-Id: I802fd5071d37fec07aa43d5dc8ce353aba41a2fe
Signed-off-by: default avatarArtur Paszkiewicz <artur.paszkiewicz@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/18741


Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent 1c715097
Loading
Loading
Loading
Loading
+162 −4
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ struct raid_bdev_io_channel {
	struct {
		uint64_t offset;
		struct spdk_io_channel *target_ch;
		struct raid_bdev_io_channel *ch_processed;
	} process;
};

@@ -121,11 +122,21 @@ raid_bdev_ch_process_cleanup(struct raid_bdev_io_channel *raid_ch)
		spdk_put_io_channel(raid_ch->process.target_ch);
		raid_ch->process.target_ch = NULL;
	}

	if (raid_ch->process.ch_processed != NULL) {
		free(raid_ch->process.ch_processed->base_channel);
		free(raid_ch->process.ch_processed);
		raid_ch->process.ch_processed = NULL;
	}
}

static int
raid_bdev_ch_process_setup(struct raid_bdev_io_channel *raid_ch, struct raid_bdev_process *process)
{
	struct raid_bdev *raid_bdev = process->raid_bdev;
	struct raid_bdev_io_channel *raid_ch_processed;
	struct raid_base_bdev_info *base_info;

	raid_ch->process.offset = process->window_offset;

	/* In the future we may have other types of processes which don't use a target bdev,
@@ -135,11 +146,38 @@ raid_bdev_ch_process_setup(struct raid_bdev_io_channel *raid_ch, struct raid_bde

	raid_ch->process.target_ch = spdk_bdev_get_io_channel(process->target->desc);
	if (raid_ch->process.target_ch == NULL) {
		raid_bdev_ch_process_cleanup(raid_ch);
		return -ENOMEM;
		goto err;
	}

	raid_ch_processed = calloc(1, sizeof(*raid_ch_processed));
	if (raid_ch_processed == NULL) {
		goto err;
	}
	raid_ch->process.ch_processed = raid_ch_processed;

	raid_ch_processed->base_channel = calloc(raid_bdev->num_base_bdevs,
					  sizeof(*raid_ch_processed->base_channel));
	if (raid_ch_processed->base_channel == NULL) {
		goto err;
	}

	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
		uint8_t slot = raid_bdev_base_bdev_slot(base_info);

		if (base_info != process->target) {
			raid_ch_processed->base_channel[slot] = raid_ch->base_channel[slot];
		} else {
			raid_ch_processed->base_channel[slot] = raid_ch->process.target_ch;
		}
	}

	raid_ch_processed->module_channel = raid_ch->module_channel;
	raid_ch_processed->process.offset = RAID_OFFSET_BLOCKS_INVALID;

	return 0;
err:
	raid_bdev_ch_process_cleanup(raid_ch);
	return -ENOMEM;
}

/*
@@ -416,6 +454,45 @@ raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status sta
{
	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);

	if (raid_io->split.offset != RAID_OFFSET_BLOCKS_INVALID) {
		struct iovec *split_iov = raid_io->split.iov;
		const struct iovec *split_iov_orig = &raid_io->split.iov_copy;

		/*
		 * Non-zero offset here means that this is the completion of the first part of the
		 * split I/O (the higher LBAs). Then, we submit the second part and set offset to 0.
		 */
		if (raid_io->split.offset != 0) {
			raid_io->offset_blocks = bdev_io->u.bdev.offset_blocks;
			raid_io->md_buf = bdev_io->u.bdev.md_buf;

			if (status == SPDK_BDEV_IO_STATUS_SUCCESS) {
				raid_io->num_blocks = raid_io->split.offset;
				raid_io->iovcnt = raid_io->iovs - bdev_io->u.bdev.iovs;
				raid_io->iovs = bdev_io->u.bdev.iovs;
				if (split_iov != NULL) {
					raid_io->iovcnt++;
					split_iov->iov_len = split_iov->iov_base - split_iov_orig->iov_base;
					split_iov->iov_base = split_iov_orig->iov_base;
				}

				raid_io->split.offset = 0;
				raid_io->base_bdev_io_submitted = 0;
				raid_io->raid_ch = raid_io->raid_ch->process.ch_processed;

				raid_io->raid_bdev->module->submit_rw_request(raid_io);
				return;
			}
		}

		raid_io->num_blocks = bdev_io->u.bdev.num_blocks;
		raid_io->iovcnt = bdev_io->u.bdev.iovcnt;
		raid_io->iovs = bdev_io->u.bdev.iovs;
		if (split_iov != NULL) {
			*split_iov = *split_iov_orig;
		}
	}

	if (spdk_unlikely(raid_io->completion_cb != NULL)) {
		raid_io->completion_cb(raid_io, status);
	} else {
@@ -553,6 +630,77 @@ raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io)
	}
}

static void
raid_bdev_io_split(struct raid_bdev_io *raid_io, uint64_t split_offset)
{
	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
	size_t iov_offset = (split_offset << raid_bdev->blocklen_shift);
	int i;

	assert(split_offset != 0);
	assert(raid_io->split.offset == RAID_OFFSET_BLOCKS_INVALID);
	raid_io->split.offset = split_offset;

	raid_io->offset_blocks += split_offset;
	raid_io->num_blocks -= split_offset;
	if (raid_io->md_buf != NULL) {
		raid_io->md_buf += (split_offset * raid_bdev->bdev.md_len);
	}

	for (i = 0; i < raid_io->iovcnt; i++) {
		struct iovec *iov = &raid_io->iovs[i];

		if (iov_offset < iov->iov_len) {
			if (iov_offset == 0) {
				raid_io->split.iov = NULL;
			} else {
				raid_io->split.iov = iov;
				raid_io->split.iov_copy = *iov;
				iov->iov_base += iov_offset;
				iov->iov_len -= iov_offset;
			}
			raid_io->iovs += i;
			raid_io->iovcnt -= i;
			break;
		}

		iov_offset -= iov->iov_len;
	}
}

static void
raid_bdev_submit_rw_request(struct raid_bdev_io *raid_io)
{
	struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;

	if (raid_ch->process.offset != RAID_OFFSET_BLOCKS_INVALID) {
		uint64_t offset_begin = raid_io->offset_blocks;
		uint64_t offset_end = offset_begin + raid_io->num_blocks;

		if (offset_end > raid_ch->process.offset) {
			if (offset_begin < raid_ch->process.offset) {
				/*
				 * If the I/O spans both the processed and unprocessed ranges,
				 * split it and first handle the unprocessed part. After it
				 * completes, the rest will be handled.
				 * This situation occurs when the process thread is not active
				 * or is waiting for the process window range to be locked
				 * (quiesced). When a window is being processed, such I/Os will be
				 * deferred by the bdev layer until the window is unlocked.
				 */
				SPDK_DEBUGLOG(bdev_raid, "split: process_offset: %lu offset_begin: %lu offset_end: %lu\n",
					      raid_ch->process.offset, offset_begin, offset_end);
				raid_bdev_io_split(raid_io, raid_ch->process.offset - offset_begin);
			}
		} else {
			/* Use the child channel, which corresponds to the already processed range */
			raid_io->raid_ch = raid_ch->process.ch_processed;
		}
	}

	raid_io->raid_bdev->module->submit_rw_request(raid_io);
}

/*
 * brief:
 * Callback function to spdk_bdev_io_get_buf.
@@ -574,7 +722,7 @@ raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
		return;
	}

	raid_io->raid_bdev->module->submit_rw_request(raid_io);
	raid_bdev_submit_rw_request(raid_io);
}

void
@@ -601,6 +749,7 @@ raid_bdev_io_init(struct raid_bdev_io *raid_io, struct raid_bdev_io_channel *rai
	raid_io->base_bdev_io_submitted = 0;
	raid_io->base_bdev_io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
	raid_io->completion_cb = NULL;
	raid_io->split.offset = RAID_OFFSET_BLOCKS_INVALID;
}

/*
@@ -630,7 +779,7 @@ raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_i
				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
		break;
	case SPDK_BDEV_IO_TYPE_WRITE:
		raid_io->raid_bdev->module->submit_rw_request(raid_io);
		raid_bdev_submit_rw_request(raid_io);
		break;

	case SPDK_BDEV_IO_TYPE_RESET:
@@ -639,6 +788,11 @@ raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_i

	case SPDK_BDEV_IO_TYPE_FLUSH:
	case SPDK_BDEV_IO_TYPE_UNMAP:
		if (raid_io->raid_bdev->process != NULL) {
			/* TODO: rebuild support */
			raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
			return;
		}
		raid_io->raid_bdev->module->submit_null_payload_request(raid_io);
		break;

@@ -1539,6 +1693,10 @@ raid_bdev_channel_remove_base_bdev(struct spdk_io_channel_iter *i)
		raid_ch->base_channel[idx] = NULL;
	}

	if (raid_ch->process.ch_processed != NULL) {
		raid_ch->process.ch_processed->base_channel[idx] = NULL;
	}

	spdk_for_each_channel_continue(i, 0);
}

+6 −0
Original line number Diff line number Diff line
@@ -144,6 +144,12 @@ struct raid_bdev_io {

	/* Custom completion callback. Overrides bdev_io completion if set. */
	raid_bdev_io_completion_cb	completion_cb;

	struct {
		uint64_t		offset;
		struct iovec		*iov;
		struct iovec		iov_copy;
	} split;
};

struct raid_bdev_process_request {
+328 −15
Original line number Diff line number Diff line
@@ -73,6 +73,8 @@ struct raid_io_ranges g_io_ranges[MAX_TEST_IO_RANGE];
uint32_t g_io_range_idx;
uint64_t g_lba_offset;
uint64_t g_bdev_ch_io_device;
bool g_bdev_io_defer_completion;
TAILQ_HEAD(, spdk_bdev_io) g_deferred_ios = TAILQ_HEAD_INITIALIZER(g_deferred_ios);

DEFINE_STUB_V(spdk_bdev_module_examine_done, (struct spdk_bdev_module *module));
DEFINE_STUB_V(spdk_bdev_module_list_add, (struct spdk_bdev_module *bdev_module));
@@ -205,6 +207,7 @@ set_globals(void)
	g_json_decode_obj_err = 0;
	g_json_decode_obj_create = 0;
	g_lba_offset = 0;
	g_bdev_io_defer_completion = false;
}

static void
@@ -281,6 +284,29 @@ set_io_output(struct io_output *output,
	output->iotype = iotype;
}

static void
child_io_complete(struct spdk_bdev_io *child_io, spdk_bdev_io_completion_cb cb, void *cb_arg)
{
	if (g_bdev_io_defer_completion) {
		child_io->internal.cb = cb;
		child_io->internal.caller_ctx = cb_arg;
		TAILQ_INSERT_TAIL(&g_deferred_ios, child_io, internal.link);
	} else {
		cb(child_io, g_child_io_status_flag, cb_arg);
	}
}

static void
complete_deferred_ios(void)
{
	struct spdk_bdev_io *child_io, *tmp;

	TAILQ_FOREACH_SAFE(child_io, &g_deferred_ios, internal.link, tmp) {
		TAILQ_REMOVE(&g_deferred_ios, child_io, internal.link);
		child_io->internal.cb(child_io, g_child_io_status_flag, child_io->internal.caller_ctx);
	}
}

/* It will cache the split IOs for verification */
int
spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
@@ -307,7 +333,7 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,

		child_io = calloc(1, sizeof(struct spdk_bdev_io));
		SPDK_CU_ASSERT_FATAL(child_io != NULL);
		cb(child_io, g_child_io_status_flag, cb_arg);
		child_io_complete(child_io, cb, cb_arg);
	}

	return g_bdev_io_submit_status;
@@ -349,7 +375,7 @@ spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,

		child_io = calloc(1, sizeof(struct spdk_bdev_io));
		SPDK_CU_ASSERT_FATAL(child_io != NULL);
		cb(child_io, g_child_io_status_flag, cb_arg);
		child_io_complete(child_io, cb, cb_arg);
	}

	return g_bdev_io_submit_status;
@@ -374,7 +400,7 @@ spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,

		child_io = calloc(1, sizeof(struct spdk_bdev_io));
		SPDK_CU_ASSERT_FATAL(child_io != NULL);
		cb(child_io, g_child_io_status_flag, cb_arg);
		child_io_complete(child_io, cb, cb_arg);
	}

	return g_bdev_io_submit_status;
@@ -505,7 +531,7 @@ spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,

		child_io = calloc(1, sizeof(struct spdk_bdev_io));
		SPDK_CU_ASSERT_FATAL(child_io != NULL);
		cb(child_io, g_child_io_status_flag, cb_arg);
		child_io_complete(child_io, cb, cb_arg);
	}

	return g_bdev_io_submit_status;
@@ -671,8 +697,10 @@ static void
bdev_io_cleanup(struct spdk_bdev_io *bdev_io)
{
	if (bdev_io->u.bdev.iovs) {
		if (bdev_io->u.bdev.iovs->iov_base) {
			free(bdev_io->u.bdev.iovs->iov_base);
		int i;

		for (i = 0; i < bdev_io->u.bdev.iovcnt; i++) {
			free(bdev_io->u.bdev.iovs[i].iov_base);
		}
		free(bdev_io->u.bdev.iovs);
	}
@@ -680,27 +708,55 @@ bdev_io_cleanup(struct spdk_bdev_io *bdev_io)
}

static void
bdev_io_initialize(struct spdk_bdev_io *bdev_io, struct spdk_io_channel *ch, struct spdk_bdev *bdev,
		   uint64_t lba, uint64_t blocks, int16_t iotype)
_bdev_io_initialize(struct spdk_bdev_io *bdev_io, struct spdk_io_channel *ch,
		    struct spdk_bdev *bdev, uint64_t lba, uint64_t blocks, int16_t iotype,
		    int iovcnt, size_t iov_len)
{
	struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
	int i;

	bdev_io->bdev = bdev;
	bdev_io->u.bdev.offset_blocks = lba;
	bdev_io->u.bdev.num_blocks = blocks;
	bdev_io->type = iotype;
	bdev_io->internal.ch = channel;
	bdev_io->u.bdev.iovcnt = iovcnt;

	if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP || bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) {
	if (iovcnt == 0) {
		bdev_io->u.bdev.iovs = NULL;
		return;
	}

	bdev_io->u.bdev.iovcnt = 1;
	bdev_io->u.bdev.iovs = calloc(1, sizeof(struct iovec));
	SPDK_CU_ASSERT_FATAL(iov_len * iovcnt == blocks * g_block_len);

	bdev_io->u.bdev.iovs = calloc(iovcnt, sizeof(struct iovec));
	SPDK_CU_ASSERT_FATAL(bdev_io->u.bdev.iovs != NULL);
	bdev_io->u.bdev.iovs->iov_base = calloc(1, bdev_io->u.bdev.num_blocks * g_block_len);
	SPDK_CU_ASSERT_FATAL(bdev_io->u.bdev.iovs->iov_base != NULL);
	bdev_io->u.bdev.iovs->iov_len = bdev_io->u.bdev.num_blocks * g_block_len;
	bdev_io->internal.ch = channel;

	for (i = 0; i < iovcnt; i++) {
		struct iovec *iov = &bdev_io->u.bdev.iovs[i];

		iov->iov_base = calloc(1, iov_len);
		SPDK_CU_ASSERT_FATAL(iov->iov_base != NULL);
		iov->iov_len = iov_len;
	}
}

static void
bdev_io_initialize(struct spdk_bdev_io *bdev_io, struct spdk_io_channel *ch, struct spdk_bdev *bdev,
		   uint64_t lba, uint64_t blocks, int16_t iotype)
{
	int iovcnt;
	size_t iov_len;

	if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP || bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH) {
		iovcnt = 0;
		iov_len = 0;
	} else {
		iovcnt = 1;
		iov_len = blocks * g_block_len;
	}

	_bdev_io_initialize(bdev_io, ch, bdev, lba, blocks, iotype, iovcnt, iov_len);
}

static void
@@ -2070,6 +2126,262 @@ test_raid_process(void)
	reset_globals();
}

static void
test_raid_io_split(void)
{
	struct rpc_bdev_raid_create req;
	struct rpc_bdev_raid_delete destroy_req;
	struct raid_bdev *pbdev;
	struct spdk_io_channel *ch;
	struct raid_bdev_io_channel *raid_ch;
	struct spdk_bdev_io *bdev_io;
	struct raid_bdev_io *raid_io;
	uint64_t split_offset;
	struct iovec iovs_orig[4];
	struct raid_bdev_process process = { };

	set_globals();
	CU_ASSERT(raid_bdev_init() == 0);

	verify_raid_bdev_present("raid1", false);
	create_raid_bdev_create_req(&req, "raid1", 0, true, 0, false);
	rpc_bdev_raid_create(NULL, NULL);
	CU_ASSERT(g_rpc_err == 0);
	verify_raid_bdev(&req, true, RAID_BDEV_STATE_ONLINE);

	TAILQ_FOREACH(pbdev, &g_raid_bdev_list, global_link) {
		if (strcmp(pbdev->bdev.name, "raid1") == 0) {
			break;
		}
	}
	CU_ASSERT(pbdev != NULL);
	pbdev->bdev.md_len = 8;

	process.raid_bdev = pbdev;
	process.target = &pbdev->base_bdev_info[0];
	pbdev->process = &process;
	ch = spdk_get_io_channel(pbdev);
	SPDK_CU_ASSERT_FATAL(ch != NULL);
	raid_ch = spdk_io_channel_get_ctx(ch);
	g_bdev_io_defer_completion = true;

	/* test split of bdev_io with 1 iovec */
	bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
	SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
	bdev_io_initialize(bdev_io, ch, &pbdev->bdev, 0, g_strip_size, SPDK_BDEV_IO_TYPE_WRITE);
	memcpy(iovs_orig, bdev_io->u.bdev.iovs, sizeof(*iovs_orig) * bdev_io->u.bdev.iovcnt);
	memset(g_io_output, 0, ((g_max_io_size / g_strip_size) + 1) * sizeof(struct io_output));
	bdev_io->u.bdev.md_buf = (void *)0x1000000;
	g_io_output_index = 0;

	split_offset = 1;
	raid_ch->process.offset = split_offset;
	raid_bdev_submit_request(ch, bdev_io);
	CU_ASSERT(raid_io->num_blocks == g_strip_size - split_offset);
	CU_ASSERT(raid_io->offset_blocks == split_offset);
	CU_ASSERT(raid_io->iovcnt == 1);
	CU_ASSERT(raid_io->iovs == bdev_io->u.bdev.iovs);
	CU_ASSERT(raid_io->iovs == raid_io->split.iov);
	CU_ASSERT(raid_io->iovs[0].iov_base == iovs_orig->iov_base + split_offset * g_block_len);
	CU_ASSERT(raid_io->iovs[0].iov_len == iovs_orig->iov_len - split_offset * g_block_len);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf + split_offset * pbdev->bdev.md_len);
	complete_deferred_ios();
	CU_ASSERT(raid_io->num_blocks == split_offset);
	CU_ASSERT(raid_io->offset_blocks == 0);
	CU_ASSERT(raid_io->iovcnt == 1);
	CU_ASSERT(raid_io->iovs[0].iov_base == iovs_orig->iov_base);
	CU_ASSERT(raid_io->iovs[0].iov_len == split_offset * g_block_len);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf);
	complete_deferred_ios();
	CU_ASSERT(raid_io->num_blocks == g_strip_size);
	CU_ASSERT(raid_io->offset_blocks == 0);
	CU_ASSERT(raid_io->iovcnt == 1);
	CU_ASSERT(raid_io->iovs[0].iov_base == iovs_orig->iov_base);
	CU_ASSERT(raid_io->iovs[0].iov_len == iovs_orig->iov_len);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf);

	CU_ASSERT(g_io_comp_status == g_child_io_status_flag);
	CU_ASSERT(g_io_output_index == 2);
	CU_ASSERT(g_io_output[0].offset_blocks == split_offset);
	CU_ASSERT(g_io_output[0].num_blocks == g_strip_size - split_offset);
	CU_ASSERT(g_io_output[1].offset_blocks == 0);
	CU_ASSERT(g_io_output[1].num_blocks == split_offset);
	bdev_io_cleanup(bdev_io);

	/* test split of bdev_io with 4 iovecs */
	bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
	SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
	_bdev_io_initialize(bdev_io, ch, &pbdev->bdev, 0, g_strip_size, SPDK_BDEV_IO_TYPE_WRITE,
			    4, g_strip_size / 4 * g_block_len);
	memcpy(iovs_orig, bdev_io->u.bdev.iovs, sizeof(*iovs_orig) * bdev_io->u.bdev.iovcnt);
	memset(g_io_output, 0, ((g_max_io_size / g_strip_size) + 1) * sizeof(struct io_output));
	bdev_io->u.bdev.md_buf = (void *)0x1000000;
	g_io_output_index = 0;

	split_offset = 1; /* split at the first iovec */
	raid_ch->process.offset = split_offset;
	raid_bdev_submit_request(ch, bdev_io);
	CU_ASSERT(raid_io->num_blocks == g_strip_size - split_offset);
	CU_ASSERT(raid_io->offset_blocks == split_offset);
	CU_ASSERT(raid_io->iovcnt == 4);
	CU_ASSERT(raid_io->split.iov == &bdev_io->u.bdev.iovs[0]);
	CU_ASSERT(raid_io->iovs == &bdev_io->u.bdev.iovs[0]);
	CU_ASSERT(raid_io->iovs[0].iov_base == iovs_orig[0].iov_base + g_block_len);
	CU_ASSERT(raid_io->iovs[0].iov_len == iovs_orig[0].iov_len -  g_block_len);
	CU_ASSERT(memcmp(raid_io->iovs + 1, iovs_orig + 1, sizeof(*iovs_orig) * 3) == 0);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf + split_offset * pbdev->bdev.md_len);
	complete_deferred_ios();
	CU_ASSERT(raid_io->num_blocks == split_offset);
	CU_ASSERT(raid_io->offset_blocks == 0);
	CU_ASSERT(raid_io->iovcnt == 1);
	CU_ASSERT(raid_io->iovs == bdev_io->u.bdev.iovs);
	CU_ASSERT(raid_io->iovs[0].iov_base == iovs_orig[0].iov_base);
	CU_ASSERT(raid_io->iovs[0].iov_len == g_block_len);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf);
	complete_deferred_ios();
	CU_ASSERT(raid_io->num_blocks == g_strip_size);
	CU_ASSERT(raid_io->offset_blocks == 0);
	CU_ASSERT(raid_io->iovcnt == 4);
	CU_ASSERT(raid_io->iovs == bdev_io->u.bdev.iovs);
	CU_ASSERT(memcmp(raid_io->iovs, iovs_orig, sizeof(*iovs_orig) * raid_io->iovcnt) == 0);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf);

	CU_ASSERT(g_io_comp_status == g_child_io_status_flag);
	CU_ASSERT(g_io_output_index == 2);
	CU_ASSERT(g_io_output[0].offset_blocks == split_offset);
	CU_ASSERT(g_io_output[0].num_blocks == g_strip_size - split_offset);
	CU_ASSERT(g_io_output[1].offset_blocks == 0);
	CU_ASSERT(g_io_output[1].num_blocks == split_offset);

	memset(g_io_output, 0, ((g_max_io_size / g_strip_size) + 1) * sizeof(struct io_output));
	g_io_output_index = 0;

	split_offset = g_strip_size / 2; /* split exactly between second and third iovec */
	raid_ch->process.offset = split_offset;
	raid_bdev_submit_request(ch, bdev_io);
	CU_ASSERT(raid_io->num_blocks == g_strip_size - split_offset);
	CU_ASSERT(raid_io->offset_blocks == split_offset);
	CU_ASSERT(raid_io->iovcnt == 2);
	CU_ASSERT(raid_io->split.iov == NULL);
	CU_ASSERT(raid_io->iovs == &bdev_io->u.bdev.iovs[2]);
	CU_ASSERT(memcmp(raid_io->iovs, iovs_orig + 2, sizeof(*iovs_orig) * raid_io->iovcnt) == 0);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf + split_offset * pbdev->bdev.md_len);
	complete_deferred_ios();
	CU_ASSERT(raid_io->num_blocks == split_offset);
	CU_ASSERT(raid_io->offset_blocks == 0);
	CU_ASSERT(raid_io->iovcnt == 2);
	CU_ASSERT(raid_io->iovs == bdev_io->u.bdev.iovs);
	CU_ASSERT(memcmp(raid_io->iovs, iovs_orig, sizeof(*iovs_orig) * raid_io->iovcnt) == 0);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf);
	complete_deferred_ios();
	CU_ASSERT(raid_io->num_blocks == g_strip_size);
	CU_ASSERT(raid_io->offset_blocks == 0);
	CU_ASSERT(raid_io->iovcnt == 4);
	CU_ASSERT(raid_io->iovs == bdev_io->u.bdev.iovs);
	CU_ASSERT(memcmp(raid_io->iovs, iovs_orig, sizeof(*iovs_orig) * raid_io->iovcnt) == 0);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf);

	CU_ASSERT(g_io_comp_status == g_child_io_status_flag);
	CU_ASSERT(g_io_output_index == 2);
	CU_ASSERT(g_io_output[0].offset_blocks == split_offset);
	CU_ASSERT(g_io_output[0].num_blocks == g_strip_size - split_offset);
	CU_ASSERT(g_io_output[1].offset_blocks == 0);
	CU_ASSERT(g_io_output[1].num_blocks == split_offset);

	memset(g_io_output, 0, ((g_max_io_size / g_strip_size) + 1) * sizeof(struct io_output));
	g_io_output_index = 0;

	split_offset = g_strip_size / 2 + 1; /* split at the third iovec */
	raid_ch->process.offset = split_offset;
	raid_bdev_submit_request(ch, bdev_io);
	CU_ASSERT(raid_io->num_blocks == g_strip_size - split_offset);
	CU_ASSERT(raid_io->offset_blocks == split_offset);
	CU_ASSERT(raid_io->iovcnt == 2);
	CU_ASSERT(raid_io->split.iov == &bdev_io->u.bdev.iovs[2]);
	CU_ASSERT(raid_io->iovs == &bdev_io->u.bdev.iovs[2]);
	CU_ASSERT(raid_io->iovs[0].iov_base == iovs_orig[2].iov_base + g_block_len);
	CU_ASSERT(raid_io->iovs[0].iov_len == iovs_orig[2].iov_len - g_block_len);
	CU_ASSERT(raid_io->iovs[1].iov_base == iovs_orig[3].iov_base);
	CU_ASSERT(raid_io->iovs[1].iov_len == iovs_orig[3].iov_len);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf + split_offset * pbdev->bdev.md_len);
	complete_deferred_ios();
	CU_ASSERT(raid_io->num_blocks == split_offset);
	CU_ASSERT(raid_io->offset_blocks == 0);
	CU_ASSERT(raid_io->iovcnt == 3);
	CU_ASSERT(raid_io->iovs == bdev_io->u.bdev.iovs);
	CU_ASSERT(memcmp(raid_io->iovs, iovs_orig, sizeof(*iovs_orig) * 2) == 0);
	CU_ASSERT(raid_io->iovs[2].iov_base == iovs_orig[2].iov_base);
	CU_ASSERT(raid_io->iovs[2].iov_len == g_block_len);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf);
	complete_deferred_ios();
	CU_ASSERT(raid_io->num_blocks == g_strip_size);
	CU_ASSERT(raid_io->offset_blocks == 0);
	CU_ASSERT(raid_io->iovcnt == 4);
	CU_ASSERT(raid_io->iovs == bdev_io->u.bdev.iovs);
	CU_ASSERT(memcmp(raid_io->iovs, iovs_orig, sizeof(*iovs_orig) * raid_io->iovcnt) == 0);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf);

	CU_ASSERT(g_io_comp_status == g_child_io_status_flag);
	CU_ASSERT(g_io_output_index == 2);
	CU_ASSERT(g_io_output[0].offset_blocks == split_offset);
	CU_ASSERT(g_io_output[0].num_blocks == g_strip_size - split_offset);
	CU_ASSERT(g_io_output[1].offset_blocks == 0);
	CU_ASSERT(g_io_output[1].num_blocks == split_offset);

	memset(g_io_output, 0, ((g_max_io_size / g_strip_size) + 1) * sizeof(struct io_output));
	g_io_output_index = 0;

	split_offset = g_strip_size - 1; /* split at the last iovec */
	raid_ch->process.offset = split_offset;
	raid_bdev_submit_request(ch, bdev_io);
	CU_ASSERT(raid_io->num_blocks == g_strip_size - split_offset);
	CU_ASSERT(raid_io->offset_blocks == split_offset);
	CU_ASSERT(raid_io->iovcnt == 1);
	CU_ASSERT(raid_io->split.iov == &bdev_io->u.bdev.iovs[3]);
	CU_ASSERT(raid_io->iovs == &bdev_io->u.bdev.iovs[3]);
	CU_ASSERT(raid_io->iovs[0].iov_base == iovs_orig[3].iov_base + iovs_orig[3].iov_len - g_block_len);
	CU_ASSERT(raid_io->iovs[0].iov_len == g_block_len);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf + split_offset * pbdev->bdev.md_len);
	complete_deferred_ios();
	CU_ASSERT(raid_io->num_blocks == split_offset);
	CU_ASSERT(raid_io->offset_blocks == 0);
	CU_ASSERT(raid_io->iovcnt == 4);
	CU_ASSERT(raid_io->iovs == bdev_io->u.bdev.iovs);
	CU_ASSERT(memcmp(raid_io->iovs, iovs_orig, sizeof(*iovs_orig) * 3) == 0);
	CU_ASSERT(raid_io->iovs[3].iov_base == iovs_orig[3].iov_base);
	CU_ASSERT(raid_io->iovs[3].iov_len == iovs_orig[3].iov_len - g_block_len);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf);
	complete_deferred_ios();
	CU_ASSERT(raid_io->num_blocks == g_strip_size);
	CU_ASSERT(raid_io->offset_blocks == 0);
	CU_ASSERT(raid_io->iovcnt == 4);
	CU_ASSERT(raid_io->iovs == bdev_io->u.bdev.iovs);
	CU_ASSERT(memcmp(raid_io->iovs, iovs_orig, sizeof(*iovs_orig) * raid_io->iovcnt) == 0);
	CU_ASSERT(raid_io->md_buf == bdev_io->u.bdev.md_buf);

	CU_ASSERT(g_io_comp_status == g_child_io_status_flag);
	CU_ASSERT(g_io_output_index == 2);
	CU_ASSERT(g_io_output[0].offset_blocks == split_offset);
	CU_ASSERT(g_io_output[0].num_blocks == g_strip_size - split_offset);
	CU_ASSERT(g_io_output[1].offset_blocks == 0);
	CU_ASSERT(g_io_output[1].num_blocks == split_offset);
	bdev_io_cleanup(bdev_io);

	spdk_put_io_channel(ch);
	free_test_req(&req);

	create_raid_bdev_delete_req(&destroy_req, "raid1", 0);
	rpc_bdev_raid_delete(NULL, NULL);
	CU_ASSERT(g_rpc_err == 0);
	verify_raid_bdev_present("raid1", false);

	raid_bdev_exit();
	base_bdevs_cleanup();
	reset_globals();
}

static int
test_bdev_ioch_create(void *io_device, void *ctx_buf)
{
@@ -2109,6 +2421,7 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, test_context_size);
	CU_ADD_TEST(suite, test_raid_level_conversions);
	CU_ADD_TEST(suite, test_raid_process);
	CU_ADD_TEST(suite, test_raid_io_split);

	allocate_threads(1);
	set_thread(0);