Commit d539bb1e authored by Yalong Wang's avatar Yalong Wang Committed by Jim Harris
Browse files

lib/bdev: Prevent write_zeroes from consuming extra IO resources.



When write_zeroes enters io_wait_queue due to resource exhaustion,
it may lead to all io resource leaked and IO hangs.

The issue occurs because spdk_bdev_write_zeroes_blocks acquires an IO resource,
but bdev_write_blocks_with_md requests another, fails to obtain it,
and re-enters the wait queue without releasing the previously acquired resource.

Change-Id: I8c9b227db2dc55352d1ba45df6667c94e85d5d5f
Signed-off-by: default avatarYalong Wang <yalong9@staff.sina.com.cn>
Reviewed-on: https://review.spdk.io/c/spdk/spdk/+/25663


Tested-by: default avatarSPDK Automated Test System <spdkbot@gmail.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: default avatarChangpeng Liu <changpeliu@tencent.com>
Reviewed-by: default avatarJim Harris <jim.harris@nvidia.com>
parent 27a3d248
Loading
Loading
Loading
Loading
+18 −35
Original line number Diff line number Diff line
@@ -408,9 +408,6 @@ static void bdev_io_push_bounce_md_buf(struct spdk_bdev_io *bdev_io);
static void bdev_io_push_bounce_data(struct spdk_bdev_io *bdev_io);
static void _bdev_io_get_accel_buf(struct spdk_bdev_io *bdev_io);

static void bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
static int bdev_write_zero_buffer(struct spdk_bdev_io *bdev_io);

static void bdev_enable_qos_msg(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
				struct spdk_io_channel *ch, void *_ctx);
static void bdev_enable_qos_done(struct spdk_bdev *bdev, void *_ctx, int status);
@@ -6687,6 +6684,7 @@ spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channe
	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
	struct spdk_bdev_io *bdev_io;
	struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
	void *md_buf = NULL;

	if (!desc->write) {
		return -EBADF;
@@ -6729,9 +6727,25 @@ spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channe
		return 0;
	}

	/* We are now doing an emulated write zeroes, using a write command with a
	 * zeroed buffer. So just change the bdev_io that has already been allocated,
	 * instead of using bdev_write_blocks_with_md() that will allocate a new one.
	 */
	assert(_bdev_get_block_size_with_md(bdev) <= ZERO_BUFFER_SIZE);
	if (spdk_bdev_is_md_separate(bdev_io->bdev)) {
		md_buf = (char *)g_bdev_mgr.zero_buffer +
			 spdk_bdev_get_block_size(bdev_io->bdev) * num_blocks;
	}
	bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
	bdev_io->u.bdev.iovs = &bdev_io->iov;
	bdev_io->u.bdev.iovs[0].iov_base = g_bdev_mgr.zero_buffer;
	bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev_desc_get_block_size(desc);
	bdev_io->u.bdev.iovcnt = 1;
	bdev_io->u.bdev.md_buf = md_buf;
	bdev_io->u.bdev.dif_check_flags = bdev->dif_check_flags;
	bdev_io_submit(bdev_io);

	return bdev_write_zero_buffer(bdev_io);
	return 0;
}

int
@@ -9714,37 +9728,6 @@ spdk_bdev_module_list_find(const char *name)
	return bdev_module;
}

static int
bdev_write_zero_buffer(struct spdk_bdev_io *bdev_io)
{
	uint64_t num_blocks;
	void *md_buf = NULL;

	num_blocks = bdev_io->u.bdev.num_blocks;

	if (spdk_bdev_is_md_separate(bdev_io->bdev)) {
		md_buf = (char *)g_bdev_mgr.zero_buffer +
			 spdk_bdev_get_block_size(bdev_io->bdev) * num_blocks;
	}

	return bdev_write_blocks_with_md(bdev_io->internal.desc,
					 spdk_io_channel_from_ctx(bdev_io->internal.ch),
					 g_bdev_mgr.zero_buffer, md_buf,
					 bdev_io->u.bdev.offset_blocks, num_blocks,
					 bdev_write_zero_buffer_done, bdev_io);
}

static void
bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
	struct spdk_bdev_io *parent_io = cb_arg;

	spdk_bdev_free_io(bdev_io);

	parent_io->internal.status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
	parent_io->internal.cb(parent_io, success, parent_io->internal.caller_ctx);
}

static void
bdev_set_qos_limit_done(struct set_qos_limit_ctx *ctx, int status)
{
+29 −0
Original line number Diff line number Diff line
@@ -1164,6 +1164,17 @@ io_wait_cb(void *arg)
	entry->submitted = true;
}

static void
write_zeros_io_wait_cb(void *arg)
{
	struct bdev_ut_io_wait_entry *entry = arg;
	int rc;

	rc = spdk_bdev_write_zeroes_blocks(entry->desc, entry->io_ch, 0, 1, io_done, NULL);
	CU_ASSERT(rc == 0);
	entry->submitted = true;
}

static void
bdev_io_types_test(void)
{
@@ -1225,6 +1236,7 @@ bdev_io_wait_test(void)
	struct spdk_bdev_opts bdev_opts = {};
	struct bdev_ut_io_wait_entry io_wait_entry;
	struct bdev_ut_io_wait_entry io_wait_entry2;
	struct bdev_ut_io_wait_entry io_wait_entry3;
	int rc;

	spdk_bdev_get_opts(&bdev_opts, sizeof(bdev_opts));
@@ -1265,6 +1277,11 @@ bdev_io_wait_test(void)
	memcpy(&io_wait_entry2, &io_wait_entry, sizeof(io_wait_entry));
	io_wait_entry2.entry.cb_arg = &io_wait_entry2;

	/* a case about write_zero in wait queue */
	memcpy(&io_wait_entry3, &io_wait_entry, sizeof(io_wait_entry));
	io_wait_entry3.entry.cb_fn = write_zeros_io_wait_cb;
	io_wait_entry3.entry.cb_arg = &io_wait_entry3;

	/* Queue two I/O waits. */
	rc = spdk_bdev_queue_io_wait(bdev, io_ch, &io_wait_entry.entry);
	CU_ASSERT(rc == 0);
@@ -1282,6 +1299,18 @@ bdev_io_wait_test(void)
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
	CU_ASSERT(io_wait_entry2.submitted == true);

	/* Queue a write_zero I/O wait. */
	rc = spdk_bdev_queue_io_wait(bdev, io_ch, &io_wait_entry3.entry);
	CU_ASSERT(rc == 0);
	CU_ASSERT(io_wait_entry3.submitted == false);

	/* make writezero would be convert to write */
	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, false);
	stub_complete_io(1);
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 4);
	CU_ASSERT(io_wait_entry3.submitted == true);
	ut_enable_io_type(SPDK_BDEV_IO_TYPE_WRITE_ZEROES, true);

	stub_complete_io(4);
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);