Commit 4f860d7e authored by Jim Harris's avatar Jim Harris
Browse files

bdev: only apply split_on_optimal_io_boundary to R/W



Splitting a 1TB unmap into individual 64KB unmap commands
(for a RAID volume with 64KB strip size) would be awful -
the RAID module can be much smarter about this.

So back out the changes for splitting I/O without payload.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I24fe6d911f4e3c9db4b2cb5d66c7236a5596e0d9

Reviewed-on: https://review.gerrithub.io/424103


Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
parent eb6a2cb8
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -254,8 +254,11 @@ struct spdk_bdev {
	/**
	 * Specifies whether the optimal_io_boundary is mandatory or
	 * only advisory.  If set to true, the bdev layer will split
	 * I/O that span the optimal_io_boundary before submitting them
	 * to the bdev module.
	 * READ and WRITE I/O that span the optimal_io_boundary before
	 * submitting them to the bdev module.
	 *
	 * Note that this field cannot be used to force splitting of
	 * UNMAP, WRITE_ZEROES or FLUSH I/O.
	 */
	bool split_on_optimal_io_boundary;

+13 −65
Original line number Diff line number Diff line
@@ -1056,20 +1056,21 @@ _spdk_bdev_io_type_can_split(uint8_t type)
	assert(type != SPDK_BDEV_IO_TYPE_INVALID);
	assert(type < SPDK_BDEV_NUM_IO_TYPES);

	switch (type) {
	case SPDK_BDEV_IO_TYPE_RESET:
	case SPDK_BDEV_IO_TYPE_NVME_ADMIN:
	case SPDK_BDEV_IO_TYPE_NVME_IO:
	case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
		/* These types of bdev_io do not specify an LBA offset/length. */
		return false;
	default:
	/* Only split READ and WRITE I/O.  Theoretically other types of I/O like
	 * UNMAP could be split, but these types of I/O are typically much larger
	 * in size (sometimes the size of the entire block device), and the bdev
	 * module can more efficiently split these types of I/O.  Plus those types
	 * of I/O do not have a payload, which makes the splitting process simpler.
	 */
	if (type == SPDK_BDEV_IO_TYPE_READ || type == SPDK_BDEV_IO_TYPE_WRITE) {
		return true;
	} else {
		return false;
	}
}

static bool
_spdk_bdev_io_spans_boundary(struct spdk_bdev_io *bdev_io)
_spdk_bdev_io_should_split(struct spdk_bdev_io *bdev_io)
{
	uint64_t start_stripe, end_stripe;
	uint32_t io_boundary = bdev_io->bdev->optimal_io_boundary;
@@ -1180,51 +1181,6 @@ _spdk_bdev_io_split_with_payload(void *_bdev_io)
	}
}

static void
_spdk_bdev_io_split_no_payload(void *_bdev_io)
{
	struct spdk_bdev_io *bdev_io = _bdev_io;
	uint64_t current_offset, remaining;
	uint32_t to_next_boundary;
	int rc;

	remaining = bdev_io->u.bdev.split_remaining_num_blocks;
	current_offset = bdev_io->u.bdev.split_current_offset_blocks;

	to_next_boundary = _to_next_boundary(current_offset, bdev_io->bdev->optimal_io_boundary);
	to_next_boundary = spdk_min(remaining, to_next_boundary);

	if (bdev_io->type == SPDK_BDEV_IO_TYPE_UNMAP) {
		rc = spdk_bdev_unmap_blocks(bdev_io->internal.desc,
					    spdk_io_channel_from_ctx(bdev_io->internal.ch),
					    current_offset, to_next_boundary,
					    _spdk_bdev_io_split_done, bdev_io);
	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE_ZEROES) {
		rc = spdk_bdev_write_zeroes_blocks(bdev_io->internal.desc,
						   spdk_io_channel_from_ctx(bdev_io->internal.ch),
						   current_offset, to_next_boundary,
						   _spdk_bdev_io_split_done, bdev_io);
	} else {
		assert(bdev_io->type == SPDK_BDEV_IO_TYPE_FLUSH);
		rc = spdk_bdev_flush_blocks(bdev_io->internal.desc,
					    spdk_io_channel_from_ctx(bdev_io->internal.ch),
					    current_offset, to_next_boundary,
					    _spdk_bdev_io_split_done, bdev_io);
	}

	if (rc == 0) {
		bdev_io->u.bdev.split_current_offset_blocks += to_next_boundary;
		bdev_io->u.bdev.split_remaining_num_blocks -= to_next_boundary;
	} else {
		assert(rc == -ENOMEM);
		bdev_io->internal.waitq_entry.bdev = bdev_io->bdev;
		bdev_io->internal.waitq_entry.cb_fn = _spdk_bdev_io_split_with_payload;
		bdev_io->internal.waitq_entry.cb_arg = bdev_io;
		spdk_bdev_queue_io_wait(bdev_io->bdev, spdk_io_channel_from_ctx(bdev_io->internal.ch),
					&bdev_io->internal.waitq_entry);
	}
}

static void
_spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
@@ -1248,11 +1204,7 @@ _spdk_bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_ar
	 * Continue with the splitting process.  This function will complete the parent I/O if the
	 * splitting is done.
	 */
	if (parent_io->type == SPDK_BDEV_IO_TYPE_READ || parent_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
	_spdk_bdev_io_split_with_payload(parent_io);
	} else {
		_spdk_bdev_io_split_no_payload(parent_io);
	}
}

static void
@@ -1263,11 +1215,7 @@ _spdk_bdev_io_split(struct spdk_bdev_io *bdev_io)
	bdev_io->u.bdev.split_current_offset_blocks = bdev_io->u.bdev.offset_blocks;
	bdev_io->u.bdev.split_remaining_num_blocks = bdev_io->u.bdev.num_blocks;

	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ || bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
	_spdk_bdev_io_split_with_payload(bdev_io);
	} else {
		_spdk_bdev_io_split_no_payload(bdev_io);
	}
}

static void
@@ -1314,7 +1262,7 @@ spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io)
	assert(thread != NULL);
	assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);

	if (bdev->split_on_optimal_io_boundary && _spdk_bdev_io_spans_boundary(bdev_io)) {
	if (bdev->split_on_optimal_io_boundary && _spdk_bdev_io_should_split(bdev_io)) {
		_spdk_bdev_io_split(bdev_io);
		return;
	}
+11 −43
Original line number Diff line number Diff line
@@ -747,25 +747,25 @@ bdev_io_spans_boundary_test(void)
	bdev_io.bdev = &bdev;

	/* bdev has no optimal_io_boundary set - so this should return false. */
	CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == false);
	CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == false);

	bdev.optimal_io_boundary = 32;
	bdev_io.type = SPDK_BDEV_IO_TYPE_RESET;

	/* RESETs are not based on LBAs - so this should return false. */
	CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == false);
	CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == false);

	bdev_io.type = SPDK_BDEV_IO_TYPE_READ;
	bdev_io.u.bdev.offset_blocks = 0;
	bdev_io.u.bdev.num_blocks = 32;

	/* This I/O run right up to, but does not cross, the boundary - so this should return false. */
	CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == false);
	CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == false);

	bdev_io.u.bdev.num_blocks = 33;

	/* This I/O spans a boundary. */
	CU_ASSERT(_spdk_bdev_io_spans_boundary(&bdev_io) == true);
	CU_ASSERT(_spdk_bdev_io_should_split(&bdev_io) == true);
}

static void
@@ -907,81 +907,49 @@ bdev_io_split(void)
	stub_complete_io(1);
	CU_ASSERT(g_io_done == true);

	/* Test a WRITE_ZEROES that needs to be split.  This is an I/O type that does not have iovecs.
	 * Have this I/O end right on a boundary.  Use a non-standard optimal_io_boundary to test the
	 * non-power-of-2 path.
	/* Test a WRITE_ZEROES that would span an I/O boundary.  WRITE_ZEROES should not be
	 * split, so test that.
	 */
	bdev->optimal_io_boundary = 15;
	g_io_done = false;
	g_bdev_ut_channel->expected_iotype = SPDK_BDEV_IO_TYPE_WRITE_ZEROES;
	g_bdev_ut_channel->expected_offset = 9;
	g_bdev_ut_channel->expected_length = 6;
	g_bdev_ut_channel->expected_length = 36;
	g_bdev_ut_channel->expected_iovcnt = 0;

	rc = spdk_bdev_write_zeroes_blocks(desc, io_ch, 9, 36, io_done, NULL);
	CU_ASSERT(rc == 0);
	CU_ASSERT(g_io_done == false);

	g_bdev_ut_channel->expected_offset = 15;
	g_bdev_ut_channel->expected_length = 15;

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
	stub_complete_io(1);
	CU_ASSERT(g_io_done == false);

	g_bdev_ut_channel->expected_offset = 30;
	g_bdev_ut_channel->expected_length = 15;

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
	stub_complete_io(1);
	CU_ASSERT(g_io_done == false);

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
	stub_complete_io(1);
	CU_ASSERT(g_io_done == true);

	/* Test an UNMAP that needs to be split.  This is an I/O type that does not have iovecs. */
	/* Test an UNMAP.  This should also not be split. */
	bdev->optimal_io_boundary = 16;
	g_io_done = false;
	g_bdev_ut_channel->expected_iotype = SPDK_BDEV_IO_TYPE_UNMAP;
	g_bdev_ut_channel->expected_offset = 15;
	g_bdev_ut_channel->expected_length = 1;
	g_bdev_ut_channel->expected_length = 2;
	g_bdev_ut_channel->expected_iovcnt = 0;

	rc = spdk_bdev_unmap_blocks(desc, io_ch, 15, 2, io_done, NULL);
	CU_ASSERT(rc == 0);
	CU_ASSERT(g_io_done == false);

	g_bdev_ut_channel->expected_offset = 16;
	g_bdev_ut_channel->expected_length = 1;

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
	stub_complete_io(1);
	CU_ASSERT(g_io_done == false);

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
	stub_complete_io(1);
	CU_ASSERT(g_io_done == true);

	/* Test an FLUSH that needs to be split.  This is an I/O type that does not have iovecs. */
	/* Test a FLUSH.  This should also not be split. */
	bdev->optimal_io_boundary = 16;
	g_io_done = false;
	g_bdev_ut_channel->expected_iotype = SPDK_BDEV_IO_TYPE_FLUSH;
	g_bdev_ut_channel->expected_offset = 15;
	g_bdev_ut_channel->expected_length = 1;
	g_bdev_ut_channel->expected_length = 2;
	g_bdev_ut_channel->expected_iovcnt = 0;

	rc = spdk_bdev_flush_blocks(desc, io_ch, 15, 2, io_done, NULL);
	CU_ASSERT(rc == 0);
	CU_ASSERT(g_io_done == false);

	g_bdev_ut_channel->expected_offset = 16;
	g_bdev_ut_channel->expected_length = 1;

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
	stub_complete_io(1);
	CU_ASSERT(g_io_done == false);

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
	stub_complete_io(1);
	CU_ASSERT(g_io_done == true);