Commit 2e6aac52 authored by Jim Harris's avatar Jim Harris
Browse files

bdev/raid: use split_on_optimal_io_boundary



Set the bdev->optimal_io_boundary to the strip size, and
set split_on_optimal_io_boundary = true.  This will ensure
that all I/O submitted to the raid module do not cross
a strip boundary, meaning it does not need to be split
across multiple member disks.

This is a step towards removing the iovcnt == 1
limitation.  Further improvements and simplifications
will be made in future patches before removing this
restriction.

Unit tests need to be adjusted here to not span
boundaries either.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I08943805def673288f552a1b7662a4fbe16f25eb

Reviewed-on: https://review.gerrithub.io/423323


Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 4f860d7e
Loading
Loading
Loading
Loading
+53 −67
Original line number Diff line number Diff line
@@ -327,32 +327,18 @@ raid_bdev_submit_children(struct spdk_bdev_io *bdev_io,
	uint64_t                      pd_lba;
	uint64_t                      pd_blocks;
	uint32_t                      pd_idx;
	int                           ret;
	int                           ret = 0;

	for (uint64_t strip = cur_strip; strip <= end_strip; strip++) {
		/*
		 * For each strip of parent bdev io, process for each strip and submit
		 * child io to bdev layer. Calculate base bdev level start lba, length
		 * and buffer for this child io
		 */
		pd_strip = strip / raid_bdev->num_base_bdevs;
		pd_idx = strip % raid_bdev->num_base_bdevs;
		if (strip == start_strip) {
	if (start_strip != end_strip) {
		SPDK_ERRLOG("I/O spans strip boundary\n");
		assert(false);
	}

	pd_strip = start_strip / raid_bdev->num_base_bdevs;
	pd_idx = start_strip % raid_bdev->num_base_bdevs;
	offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1);
	pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip;
			if (strip == end_strip) {
	pd_blocks = bdev_io->u.bdev.num_blocks;
			} else {
				pd_blocks = raid_bdev->strip_size - offset_in_strip;
			}
		} else if (strip == end_strip) {
			pd_lba = pd_strip << raid_bdev->strip_size_shift;
			pd_blocks = ((bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) &
				     (raid_bdev->strip_size - 1)) + 1;
		} else {
			pd_lba = pd_strip << raid_bdev->strip_size_shift;
			pd_blocks = raid_bdev->strip_size;
		}
	raid_io->splits_comp_outstanding++;
	assert(raid_io->splits_pending);
	raid_io->splits_pending--;
@@ -371,7 +357,6 @@ raid_bdev_submit_children(struct spdk_bdev_io *bdev_io,
					    raid_ch->base_channel[pd_idx],
					    buf, pd_lba, pd_blocks, raid_bdev_io_completion,
					    bdev_io);

	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
		ret = spdk_bdev_write_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
					     raid_ch->base_channel[pd_idx],
@@ -395,8 +380,6 @@ raid_bdev_submit_children(struct spdk_bdev_io *bdev_io,
		raid_io->splits_pending++;
		return ret;
	}
		buf += (pd_blocks << raid_bdev->blocklen_shift);
	}

	return 0;
}
@@ -1257,11 +1240,14 @@ raid_bdev_configure(struct raid_bdev *raid_bdev)
	raid_bdev_gen = &raid_bdev->bdev;
	raid_bdev_gen->write_cache = 0;
	raid_bdev_gen->blocklen = blocklen;
	raid_bdev_gen->optimal_io_boundary = 0;

	raid_bdev_gen->ctxt = raid_bdev;
	raid_bdev_gen->fn_table = &g_raid_bdev_fn_table;
	raid_bdev_gen->module = &g_raid_if;
	raid_bdev->strip_size = (raid_bdev->strip_size * 1024) / blocklen;
	raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
	raid_bdev->blocklen_shift = spdk_u32log2(blocklen);
	raid_bdev_gen->optimal_io_boundary = raid_bdev->strip_size;
	raid_bdev_gen->split_on_optimal_io_boundary = true;

	/*
	 * RAID bdev logic is for striping so take the minimum block count based
+14 −13
Original line number Diff line number Diff line
@@ -863,7 +863,8 @@ verify_raid_bdev(struct rpc_construct_raid_bdev *r, bool presence, uint32_t raid
			CU_ASSERT(strcmp(pbdev->bdev.product_name, "Pooled Device") == 0);
			CU_ASSERT(pbdev->bdev.write_cache == 0);
			CU_ASSERT(pbdev->bdev.blocklen == g_block_len);
			CU_ASSERT(pbdev->bdev.optimal_io_boundary == 0);
			CU_ASSERT(pbdev->bdev.optimal_io_boundary == pbdev->strip_size);
			CU_ASSERT(pbdev->bdev.split_on_optimal_io_boundary == true);
			CU_ASSERT(pbdev->bdev.ctxt == pbdev);
			CU_ASSERT(pbdev->bdev.fn_table == &g_raid_bdev_fn_table);
			CU_ASSERT(pbdev->bdev.module == &g_raid_if);
@@ -1375,9 +1376,9 @@ test_write_io(void)
	for (count = 0; count < g_max_qd; count++) {
		bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
		SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
		io_len = (rand() % g_max_io_size) + 1;
		io_len = (rand() % g_strip_size) + 1;
		bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_WRITE);
		lba += io_len;
		lba += g_strip_size;
		memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
		g_io_output_index = 0;
		raid_bdev_submit_request(ch, bdev_io);
@@ -1454,9 +1455,9 @@ test_read_io(void)
	for (count = 0; count < g_max_qd; count++) {
		bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
		SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
		io_len = (rand() % g_max_io_size) + 1;
		io_len = (rand() % g_strip_size) + 1;
		bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_READ);
		lba += io_len;
		lba += g_strip_size;
		memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
		g_io_output_index = 0;
		raid_bdev_submit_request(ch, bdev_io);
@@ -1533,9 +1534,9 @@ test_io_failure(void)
	for (count = 0; count < 1; count++) {
		bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
		SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
		io_len = (rand() % g_max_io_size) + 1;
		io_len = (rand() % g_strip_size) + 1;
		bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_INVALID);
		lba += io_len;
		lba += g_strip_size;
		memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
		g_io_output_index = 0;
		raid_bdev_submit_request(ch, bdev_io);
@@ -1551,9 +1552,9 @@ test_io_failure(void)
	for (count = 0; count < 1; count++) {
		bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
		SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
		io_len = (rand() % g_max_io_size) + 1;
		io_len = (rand() % g_strip_size) + 1;
		bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_WRITE);
		lba += io_len;
		lba += g_strip_size;
		memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
		g_io_output_index = 0;
		raid_bdev_submit_request(ch, bdev_io);
@@ -1635,10 +1636,10 @@ test_io_waitq(void)
		bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
		SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
		TAILQ_INSERT_TAIL(&head_io, bdev_io, module_link);
		io_len = (rand() % g_max_io_size) + 1;
		io_len = (rand() % g_strip_size) + 1;
		bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, SPDK_BDEV_IO_TYPE_WRITE);
		g_bdev_io_submit_status = -ENOMEM;
		lba += io_len;
		lba += g_strip_size;
		raid_bdev_submit_request(ch, bdev_io);
	}

@@ -1869,7 +1870,7 @@ test_multi_raid_with_io(void)
	for (count = 0; count < g_max_qd; count++) {
		bdev_io = calloc(1, sizeof(struct spdk_bdev_io) + sizeof(struct raid_bdev_io));
		SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
		io_len = (rand() % g_max_io_size) + 1;
		io_len = (rand() % g_strip_size) + 1;
		iotype = (rand() % 2) ? SPDK_BDEV_IO_TYPE_WRITE : SPDK_BDEV_IO_TYPE_READ;
		memset(g_io_output, 0, (g_max_io_size / g_strip_size) + 1 * sizeof(struct io_output));
		g_io_output_index = 0;
@@ -1882,7 +1883,7 @@ test_multi_raid_with_io(void)
			}
		}
		bdev_io_initialize(bdev_io, &pbdev->bdev, lba, io_len, iotype);
		lba += io_len;
		lba += g_strip_size;
		CU_ASSERT(pbdev != NULL);
		raid_bdev_submit_request(ch_random, bdev_io);
		verify_io(bdev_io, g_max_base_drives, ch_ctx_random, pbdev,