Commit 52a41348 authored by Jinlong Chen's avatar Jinlong Chen Committed by Jim Harris
Browse files

bdev: do not retry nomem I/Os during aborting them



If bdev module reports ENOMEM for the first I/O in an I/O channel, all
subsequent I/Os would be queued in the nomem list. In this case,
io_outstanding and nomem_threshold would remain 0, allowing nomem I/Os
to be resubmitted unconditionally.

Now, a coming reset could trigger nomem I/O resubmission when aborting
nomem I/Os in the following path:

```
bdev_reset_freeze_channel
    -> bdev_abort_all_queued_io
        -> spdk_bdev_io_complete
            -> _bdev_io_handle_no_mem
                -> bdev_ch_retry_io
```

Both bdev_abort_all_queued_io and bdev_ch_retry_io modifies nomem_io
list in this path. Thus, there might be I/Os that are firstly submitted
to the underlying device by bdev_ch_retry_io, and then get aborted by
bdev_abort_all_queued_io, resulting in double-completion of these I/Os
later.

To fix this, just do not resubmit nomem I/Os when aborting is in
progress.

Change-Id: I1f66262216885779d1a883ec9250d58a13d8c228
Signed-off-by: default avatarJinlong Chen <chenjinlong.cjl@alibaba-inc.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/25522


Community-CI: Mellanox Build Bot
Reviewed-by: default avatarJim Harris <jim.harris@nvidia.com>
Community-CI: Community CI Samsung <spdk.community.ci.samsung@gmail.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
parent d1394291
Loading
Loading
Loading
Loading
+29 −2
Original line number Diff line number Diff line
@@ -251,6 +251,12 @@ struct spdk_bdev_shared_resource {
	 */
	uint64_t		nomem_threshold;

	/*
	 * Indicate whether aborting nomem I/Os is in progress.
	 * If true, we should not touch the nomem_io list on I/O completions.
	 */
	bool			nomem_abort_in_progress;

	/* I/O channel allocated by a bdev module */
	struct spdk_io_channel	*shared_ch;

@@ -1623,6 +1629,13 @@ bdev_shared_ch_retry_io(struct spdk_bdev_shared_resource *shared_resource)
{
	struct spdk_bdev_io *bdev_io;

	if (shared_resource->nomem_abort_in_progress) {
		/**
		 * We are aborting nomem I/Os, do not touch nomem_io list now.
		 */
		return;
	}

	if (shared_resource->io_outstanding > shared_resource->nomem_threshold) {
		/*
		 * Allow some more I/O to complete before retrying the nomem_io queue.
@@ -4592,6 +4605,16 @@ bdev_abort_all_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
	}
}

static inline void
bdev_abort_all_nomem_io(struct spdk_bdev_channel *ch)
{
	struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource;

	shared_resource->nomem_abort_in_progress = true;
	bdev_abort_all_queued_io(&shared_resource->nomem_io, ch);
	shared_resource->nomem_abort_in_progress = false;
}

static bool
bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_io *bio_to_abort)
{
@@ -4880,7 +4903,7 @@ bdev_channel_abort_queued_ios(struct spdk_bdev_channel *ch)
	struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource;
	struct spdk_bdev_mgmt_channel *mgmt_ch = shared_resource->mgmt_ch;

	bdev_abort_all_queued_io(&shared_resource->nomem_io, ch);
	bdev_abort_all_nomem_io(ch);
	bdev_abort_all_buf_io(mgmt_ch, ch);
}

@@ -6924,7 +6947,11 @@ bdev_reset_freeze_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bd

	channel->flags |= BDEV_CH_RESET_IN_PROGRESS;

	bdev_abort_all_queued_io(&shared_resource->nomem_io, channel);
	/**
	 * Abort nomem I/Os first so that aborting other queued I/Os won't resubmit
	 * nomem I/Os of this channel.
	 */
	bdev_abort_all_nomem_io(channel);
	bdev_abort_all_buf_io(mgmt_channel, channel);

	if ((channel->flags & BDEV_CH_QOS_ENABLED) != 0) {
+61 −0
Original line number Diff line number Diff line
@@ -1664,6 +1664,66 @@ enomem_multi_io_target(void)
	teardown_test();
}

static void
enomem_retry_during_abort(void)
{
	struct spdk_io_channel *io_ch;
	struct spdk_bdev_channel *bdev_ch;
	struct spdk_bdev_shared_resource *shared_resource;
	struct ut_bdev_channel *ut_ch;
	const uint32_t IO_ARRAY_SIZE = 16;
	enum spdk_bdev_io_status status[IO_ARRAY_SIZE], status_reset;
	uint32_t i;
	int rc;

	setup_test();

	set_thread(0);
	io_ch = spdk_bdev_get_io_channel(g_desc);
	bdev_ch = spdk_io_channel_get_ctx(io_ch);
	shared_resource = bdev_ch->shared_resource;
	ut_ch = spdk_io_channel_get_ctx(bdev_ch->channel);
	ut_ch->avail_cnt = 0;

	/**
	 * Submit a number of IOs.
	 * All of these I/Os will queue in nomem_io list due to ut_ch->avail_cnt == 0.
	 */
	for (i = 0; i < IO_ARRAY_SIZE; i++) {
		status[i] = SPDK_BDEV_IO_STATUS_PENDING;
		rc = spdk_bdev_read_blocks(g_desc, io_ch, NULL, 0, 1, enomem_done, &status[i]);
		CU_ASSERT(rc == 0);
	}
	CU_ASSERT(bdev_io_tailq_cnt(&shared_resource->nomem_io) == IO_ARRAY_SIZE);
	CU_ASSERT(shared_resource->io_outstanding == 0);

	/* Allow some I/Os to be submitted. */
	ut_ch->avail_cnt = IO_ARRAY_SIZE / 2;

	/* Submit a reset to abort the I/Os. */
	status_reset = SPDK_BDEV_IO_STATUS_PENDING;
	rc = spdk_bdev_reset(g_desc, io_ch, enomem_done, &status_reset);
	poll_threads();
	CU_ASSERT(rc == 0);

	/* Complete the reset. */
	stub_complete_io(g_bdev.io_target, 1);
	poll_threads();
	CU_ASSERT(status_reset == SPDK_BDEV_IO_STATUS_SUCCESS);

	/* All I/Os are aborted. */
	for (i = 0; i < IO_ARRAY_SIZE; i++) {
		CU_ASSERT(status[i] == SPDK_BDEV_IO_STATUS_FAILED);
	}

	CU_ASSERT(bdev_io_tailq_cnt(&shared_resource->nomem_io) == 0);
	CU_ASSERT(shared_resource->io_outstanding == 0);

	spdk_put_io_channel(io_ch);
	poll_threads();
	teardown_test();
}

static void
qos_dynamic_enable_done(void *cb_arg, int status)
{
@@ -2830,6 +2890,7 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, enomem_multi_bdev);
	CU_ADD_TEST(suite, enomem_multi_bdev_unregister);
	CU_ADD_TEST(suite, enomem_multi_io_target);
	CU_ADD_TEST(suite, enomem_retry_during_abort);
	CU_ADD_TEST(suite, qos_dynamic_enable);
	CU_ADD_TEST(suite, bdev_histograms_mt);
	CU_ADD_TEST(suite, bdev_set_io_timeout_mt);