Commit 44011493 authored by Jim Harris's avatar Jim Harris
Browse files

Revert "lib/bdev: Prevent write_zeroes from consuming extra IO resources."



This reverts commit d539bb1e.

This patch does not consider case where WRITE_ZEROES did not require
split, but WRITE does.

We could re-calculate the split flag again, but it seems we are then
back to the original problem.

So revert this for now so we can reassess the problem. This also needs
better unit tests for the tricky situations.

Signed-off-by: default avatarJim Harris <jim.harris@nvidia.com>
Change-Id: I8c9b227db2dc55352d1ba45df6667c94e85d5d5e
parent 850b22ff
Loading
Loading
Loading
Loading
+35 −18
Original line number Diff line number Diff line
@@ -408,6 +408,9 @@ static void bdev_io_push_bounce_md_buf(struct spdk_bdev_io *bdev_io);
static void bdev_io_push_bounce_data(struct spdk_bdev_io *bdev_io);
static void _bdev_io_get_accel_buf(struct spdk_bdev_io *bdev_io);

static void bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
static int bdev_write_zero_buffer(struct spdk_bdev_io *bdev_io);

static void bdev_enable_qos_msg(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
				struct spdk_io_channel *ch, void *_ctx);
static void bdev_enable_qos_done(struct spdk_bdev *bdev, void *_ctx, int status);
@@ -6684,7 +6687,6 @@ spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channe
	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
	struct spdk_bdev_io *bdev_io;
	struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
	void *md_buf = NULL;

	if (!desc->write) {
		return -EBADF;
@@ -6727,25 +6729,9 @@ spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channe
		return 0;
	}

	/* We are now doing an emulated write zeroes, using a write command with a
	 * zeroed buffer. So just change the bdev_io that has already been allocated,
	 * instead of using bdev_write_blocks_with_md() that will allocate a new one.
	 */
	assert(_bdev_get_block_size_with_md(bdev) <= ZERO_BUFFER_SIZE);
	if (spdk_bdev_is_md_separate(bdev_io->bdev)) {
		md_buf = (char *)g_bdev_mgr.zero_buffer +
			 spdk_bdev_get_block_size(bdev_io->bdev) * num_blocks;
	}
	bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
	bdev_io->u.bdev.iovs = &bdev_io->iov;
	bdev_io->u.bdev.iovs[0].iov_base = g_bdev_mgr.zero_buffer;
	bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev_desc_get_block_size(desc);
	bdev_io->u.bdev.iovcnt = 1;
	bdev_io->u.bdev.md_buf = md_buf;
	bdev_io->u.bdev.dif_check_flags = bdev->dif_check_flags;
	bdev_io_submit(bdev_io);

	return 0;
	return bdev_write_zero_buffer(bdev_io);
}

int
@@ -9728,6 +9714,37 @@ spdk_bdev_module_list_find(const char *name)
	return bdev_module;
}

static int
bdev_write_zero_buffer(struct spdk_bdev_io *bdev_io)
{
	uint64_t num_blocks;
	void *md_buf = NULL;

	num_blocks = bdev_io->u.bdev.num_blocks;

	if (spdk_bdev_is_md_separate(bdev_io->bdev)) {
		md_buf = (char *)g_bdev_mgr.zero_buffer +
			 spdk_bdev_get_block_size(bdev_io->bdev) * num_blocks;
	}

	return bdev_write_blocks_with_md(bdev_io->internal.desc,
					 spdk_io_channel_from_ctx(bdev_io->internal.ch),
					 g_bdev_mgr.zero_buffer, md_buf,
					 bdev_io->u.bdev.offset_blocks, num_blocks,
					 bdev_write_zero_buffer_done, bdev_io);
}

static void
bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
	struct spdk_bdev_io *parent_io = cb_arg;

	spdk_bdev_free_io(bdev_io);

	parent_io->internal.status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
	parent_io->internal.cb(parent_io, success, parent_io->internal.caller_ctx);
}

static void
bdev_set_qos_limit_done(struct set_qos_limit_ctx *ctx, int status)
{
+0 −29
Original line number Diff line number Diff line
@@ -1164,17 +1164,6 @@ io_wait_cb(void *arg)
	entry->submitted = true;
}

static void
write_zeros_io_wait_cb(void *arg)
{
	struct bdev_ut_io_wait_entry *entry = arg;
	int rc;

	rc = spdk_bdev_write_zeroes_blocks(entry->desc, entry->io_ch, 0, 1, io_done, NULL);
	CU_ASSERT(rc == 0);
	entry->submitted = true;
}

static void
bdev_io_types_test(void)
{
@@ -1236,7 +1225,6 @@ bdev_io_wait_test(void)
	struct spdk_bdev_opts bdev_opts = {};
	struct bdev_ut_io_wait_entry io_wait_entry;
	struct bdev_ut_io_wait_entry io_wait_entry2;
	struct bdev_ut_io_wait_entry io_wait_entry3;
	int rc;

	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
@@ -1277,11 +1265,6 @@ bdev_io_wait_test(void)
	memcpy(&io_wait_entry2, &io_wait_entry, sizeof(io_wait_entry));
	io_wait_entry2.entry.cb_arg = &io_wait_entry2;

	/* a case about write_zero in wait queue */
	memcpy(&io_wait_entry3, &io_wait_entry, sizeof(io_wait_entry));
	io_wait_entry3.entry.cb_fn = write_zeros_io_wait_cb;
	io_wait_entry3.entry.cb_arg = &io_wait_entry3;

	/* Queue two I/O waits. */
	rc = spdk_bdev_queue_io_wait(bdev, io_ch, &io_wait_entry.entry);
	CU_ASSERT(rc == 0);
@@ -1299,18 +1282,6 @@ bdev_io_wait_test(void)
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
	CU_ASSERT(io_wait_entry2.submitted == true);

	/* Queue a write_zero I/O wait. */
	rc = spdk_bdev_queue_io_wait(bdev, io_ch, &io_wait_entry3.entry);
	CU_ASSERT(rc == 0);
	CU_ASSERT(io_wait_entry3.submitted == false);

	/* make writezero would be convert to write */
	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, false);
	stub_complete_io(1);
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
	CU_ASSERT(io_wait_entry3.submitted == true);
	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, true);

	stub_complete_io(4);
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);