Commit a3bc7320 authored by Ben Walker's avatar Ben Walker Committed by Tomasz Zawadzki
Browse files

bdev/nvme: Replace spdk_bdev_io module_link with per-io ctx retry_link



We're removing module_link from spdk_bdev_io, so add a list element to
this module's per-io area.

Change-Id: I771c6d415107cacd797b51b89f9d10b52250d39d
Signed-off-by: default avatarBen Walker <ben@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/21937


Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
parent 5fa0d850
Loading
Loading
Loading
Loading
+28 −37
Original line number Diff line number Diff line
@@ -96,6 +96,9 @@ struct nvme_bdev_io {

	/* Current tsc at submit time. */
	uint64_t submit_tsc;

	/* Used to put nvme_bdev_io into the list */
	TAILQ_ENTRY(nvme_bdev_io) retry_link;
};

struct nvme_probe_skip_entry {
@@ -677,11 +680,9 @@ static void
bdev_nvme_clear_retry_io_path(struct nvme_bdev_channel *nbdev_ch,
			      struct nvme_io_path *io_path)
{
	struct spdk_bdev_io *bdev_io;
	struct nvme_bdev_io *bio;

	TAILQ_FOREACH(bdev_io, &nbdev_ch->retry_io_list, module_link) {
		bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
	TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
		if (bio->io_path == io_path) {
			bio->io_path = NULL;
		}
@@ -1085,29 +1086,25 @@ static int
bdev_nvme_retry_ios(void *arg)
{
	struct nvme_bdev_channel *nbdev_ch = arg;
	struct spdk_bdev_io *bdev_io, *tmp_bdev_io;
	struct nvme_bdev_io *bio;
	struct nvme_bdev_io *bio, *tmp_bio;
	uint64_t now, delay_us;

	now = spdk_get_ticks();

	TAILQ_FOREACH_SAFE(bdev_io, &nbdev_ch->retry_io_list, module_link, tmp_bdev_io) {
		bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
	TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
		if (bio->retry_ticks > now) {
			break;
		}

		TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
		TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);

		bdev_nvme_retry_io(nbdev_ch, bdev_io);
		bdev_nvme_retry_io(nbdev_ch, spdk_bdev_io_from_ctx(bio));
	}

	spdk_poller_unregister(&nbdev_ch->retry_io_poller);

	bdev_io = TAILQ_FIRST(&nbdev_ch->retry_io_list);
	if (bdev_io != NULL) {
		bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;

	bio = TAILQ_FIRST(&nbdev_ch->retry_io_list);
	if (bio != NULL) {
		delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();

		nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
@@ -1121,24 +1118,20 @@ static void
bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
			 struct nvme_bdev_io *bio, uint64_t delay_ms)
{
	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
	struct spdk_bdev_io *tmp_bdev_io;
	struct nvme_bdev_io *tmp_bio;

	bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;

	TAILQ_FOREACH_REVERSE(tmp_bdev_io, &nbdev_ch->retry_io_list, retry_io_head, module_link) {
		tmp_bio = (struct nvme_bdev_io *)tmp_bdev_io->driver_ctx;

	TAILQ_FOREACH_REVERSE(tmp_bio, &nbdev_ch->retry_io_list, retry_io_head, retry_link) {
		if (tmp_bio->retry_ticks <= bio->retry_ticks) {
			TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bdev_io, bdev_io,
					   module_link);
			TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bio, bio,
					   retry_link);
			return;
		}
	}

	/* No earlier I/Os were found. This I/O must be the new head. */
	TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bdev_io, module_link);
	TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bio, retry_link);

	spdk_poller_unregister(&nbdev_ch->retry_io_poller);

@@ -1149,11 +1142,11 @@ bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
static void
bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
{
	struct spdk_bdev_io *bdev_io, *tmp_io;
	struct nvme_bdev_io *bio, *tmp_bio;

	TAILQ_FOREACH_SAFE(bdev_io, &nbdev_ch->retry_io_list, module_link, tmp_io) {
		TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
		__bdev_nvme_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_ABORTED, NULL);
	TAILQ_FOREACH_SAFE(bio, &nbdev_ch->retry_io_list, retry_link, tmp_bio) {
		TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
		__bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
	}

	spdk_poller_unregister(&nbdev_ch->retry_io_poller);
@@ -1163,12 +1156,12 @@ static int
bdev_nvme_abort_retry_io(struct nvme_bdev_channel *nbdev_ch,
			 struct nvme_bdev_io *bio_to_abort)
{
	struct spdk_bdev_io *bdev_io_to_abort;
	struct nvme_bdev_io *bio;

	TAILQ_FOREACH(bdev_io_to_abort, &nbdev_ch->retry_io_list, module_link) {
		if ((struct nvme_bdev_io *)bdev_io_to_abort->driver_ctx == bio_to_abort) {
			TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io_to_abort, module_link);
			__bdev_nvme_io_complete(bdev_io_to_abort, SPDK_BDEV_IO_STATUS_ABORTED, NULL);
	TAILQ_FOREACH(bio, &nbdev_ch->retry_io_list, retry_link) {
		if (bio == bio_to_abort) {
			TAILQ_REMOVE(&nbdev_ch->retry_io_list, bio, retry_link);
			__bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), SPDK_BDEV_IO_STATUS_ABORTED, NULL);
			return 0;
		}
	}
@@ -1777,16 +1770,16 @@ bdev_nvme_complete_pending_resets(struct spdk_io_channel_iter *i)
	struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
	struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(_ch);
	enum spdk_bdev_io_status status = SPDK_BDEV_IO_STATUS_SUCCESS;
	struct spdk_bdev_io *bdev_io;
	struct nvme_bdev_io *bio;

	if (spdk_io_channel_iter_get_ctx(i) != NULL) {
		status = SPDK_BDEV_IO_STATUS_FAILED;
	}

	while (!TAILQ_EMPTY(&ctrlr_ch->pending_resets)) {
		bdev_io = TAILQ_FIRST(&ctrlr_ch->pending_resets);
		TAILQ_REMOVE(&ctrlr_ch->pending_resets, bdev_io, module_link);
		__bdev_nvme_io_complete(bdev_io, status, NULL);
		bio = TAILQ_FIRST(&ctrlr_ch->pending_resets);
		TAILQ_REMOVE(&ctrlr_ch->pending_resets, bio, retry_link);
		__bdev_nvme_io_complete(spdk_bdev_io_from_ctx(bio), status, NULL);
	}

	spdk_for_each_channel_continue(i, 0);
@@ -2767,7 +2760,6 @@ static int
_bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
{
	struct nvme_ctrlr_channel *ctrlr_ch;
	struct spdk_bdev_io *bdev_io;
	int rc;

	rc = nvme_ctrlr_op(io_path->qpair->ctrlr, NVME_CTRLR_OP_RESET,
@@ -2783,8 +2775,7 @@ _bdev_nvme_reset_io(struct nvme_io_path *io_path, struct nvme_bdev_io *bio)
		 * we don't interfere with the app framework reset strategy. i.e. we are deferring to the
		 * upper level. If they are in the middle of a reset, we won't try to schedule another one.
		 */
		bdev_io = spdk_bdev_io_from_ctx(bio);
		TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bdev_io, module_link);
		TAILQ_INSERT_TAIL(&ctrlr_ch->pending_resets, bio, retry_link);
		rc = 0;
	}

+2 −2
Original line number Diff line number Diff line
@@ -199,7 +199,7 @@ struct nvme_qpair {

struct nvme_ctrlr_channel {
	struct nvme_qpair		*qpair;
	TAILQ_HEAD(, spdk_bdev_io)	pending_resets;
	TAILQ_HEAD(, nvme_bdev_io)	pending_resets;

	struct spdk_io_channel_iter	*reset_iter;
	struct spdk_poller		*connect_poller;
@@ -225,7 +225,7 @@ struct nvme_bdev_channel {
	uint32_t				rr_min_io;
	uint32_t				rr_counter;
	STAILQ_HEAD(, nvme_io_path)		io_path_list;
	TAILQ_HEAD(retry_io_head, spdk_bdev_io)	retry_io_list;
	TAILQ_HEAD(retry_io_head, nvme_bdev_io)	retry_io_list;
	struct spdk_poller			*retry_io_poller;
};

+22 −19
Original line number Diff line number Diff line
@@ -1955,7 +1955,7 @@ test_pending_reset(void)
	set_thread(0);

	bdev_nvme_submit_request(ch1, second_bdev_io);
	CU_ASSERT(TAILQ_FIRST(&ctrlr_ch1->pending_resets) == second_bdev_io);
	CU_ASSERT(spdk_bdev_io_from_ctx(TAILQ_FIRST(&ctrlr_ch1->pending_resets)) == second_bdev_io);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
@@ -1980,7 +1980,7 @@ test_pending_reset(void)
	set_thread(0);

	bdev_nvme_submit_request(ch1, second_bdev_io);
	CU_ASSERT(TAILQ_FIRST(&ctrlr_ch1->pending_resets) == second_bdev_io);
	CU_ASSERT(spdk_bdev_io_from_ctx(TAILQ_FIRST(&ctrlr_ch1->pending_resets)) == second_bdev_io);

	ctrlr->fail_reset = true;

@@ -2838,7 +2838,7 @@ test_abort(void)
	bdev_nvme_submit_request(ch1, write_io);

	CU_ASSERT(write_io->internal.in_submit_request == true);
	CU_ASSERT(write_io == TAILQ_FIRST(&nbdev_ch1->retry_io_list));
	CU_ASSERT(write_io == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch1->retry_io_list)));

	/* Aborting the queued write request should succeed immediately. */
	abort_io->internal.ch = (struct spdk_bdev_channel *)ch1;
@@ -4211,7 +4211,8 @@ test_reset_bdev_ctrlr(void)

	CU_ASSERT(nvme_ctrlr1->resetting == true);
	CU_ASSERT(nvme_ctrlr1->ctrlr_op_cb_arg == first_bio);
	CU_ASSERT(TAILQ_FIRST(&io_path21->qpair->ctrlr_ch->pending_resets) == second_bdev_io);
	CU_ASSERT(TAILQ_FIRST(&io_path21->qpair->ctrlr_ch->pending_resets) ==
		  (struct nvme_bdev_io *)second_bdev_io->driver_ctx);

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
@@ -4405,7 +4406,7 @@ test_retry_io_if_ana_state_is_updating(void)

	CU_ASSERT(nvme_qpair->qpair->num_outstanding_reqs == 0);
	CU_ASSERT(bdev_io1->internal.in_submit_request == true);
	CU_ASSERT(bdev_io1 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io1 == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));

	/* ANA state became accessible while I/O was queued. */
	nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
@@ -4558,7 +4559,7 @@ test_retry_io_for_io_path_error(void)

	CU_ASSERT(nvme_qpair1->qpair->num_outstanding_reqs == 0);
	CU_ASSERT(bdev_io->internal.in_submit_request == true);
	CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));

	poll_threads();

@@ -4619,7 +4620,7 @@ test_retry_io_for_io_path_error(void)
	CU_ASSERT(nvme_qpair1->qpair->num_outstanding_reqs == 0);
	CU_ASSERT(nvme_qpair2->qpair->num_outstanding_reqs == 0);
	CU_ASSERT(bdev_io->internal.in_submit_request == true);
	CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));

	spdk_nvme_ctrlr_free_io_qpair(nvme_qpair1->qpair);
	nvme_qpair1->qpair = NULL;
@@ -4785,7 +4786,7 @@ test_retry_io_count(void)

	CU_ASSERT(nvme_qpair->qpair->num_outstanding_reqs == 0);
	CU_ASSERT(bdev_io->internal.in_submit_request == true);
	CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));

	poll_threads();

@@ -4816,7 +4817,7 @@ test_retry_io_count(void)

	CU_ASSERT(nvme_qpair->qpair->num_outstanding_reqs == 0);
	CU_ASSERT(bdev_io->internal.in_submit_request == true);
	CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));

	poll_threads();

@@ -5029,7 +5030,7 @@ test_retry_io_for_ana_error(void)

	CU_ASSERT(nvme_qpair->qpair->num_outstanding_reqs == 0);
	CU_ASSERT(bdev_io->internal.in_submit_request == true);
	CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));
	/* I/O should be retried immediately. */
	CU_ASSERT(bio->retry_ticks == now);
	CU_ASSERT(nvme_ns->ana_state_updating == true);
@@ -5040,7 +5041,7 @@ test_retry_io_for_ana_error(void)
	/* Namespace is inaccessible, and hence I/O should be queued again. */
	CU_ASSERT(nvme_qpair->qpair->num_outstanding_reqs == 0);
	CU_ASSERT(bdev_io->internal.in_submit_request == true);
	CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));
	/* I/O should be retried after a second if no I/O path was found but
	 * any I/O path may become available.
	 */
@@ -5217,8 +5218,10 @@ test_retry_io_if_ctrlr_is_resetting(void)

	CU_ASSERT(bdev_io1->internal.in_submit_request == true);
	CU_ASSERT(bdev_io2->internal.in_submit_request == true);
	CU_ASSERT(bdev_io1 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io2 == TAILQ_NEXT(bdev_io1, module_link));
	CU_ASSERT(bdev_io1 == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));
	CU_ASSERT(bdev_io2 == spdk_bdev_io_from_ctx(
			  TAILQ_NEXT((struct nvme_bdev_io *)bdev_io1->driver_ctx,
				     retry_link)));

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
@@ -5234,7 +5237,7 @@ test_retry_io_if_ctrlr_is_resetting(void)
	CU_ASSERT(nvme_qpair->qpair->num_outstanding_reqs == 1);
	CU_ASSERT(bdev_io1->internal.in_submit_request == true);
	CU_ASSERT(bdev_io2->internal.in_submit_request == true);
	CU_ASSERT(bdev_io2 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io2 == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));

	poll_threads();

@@ -5242,7 +5245,7 @@ test_retry_io_if_ctrlr_is_resetting(void)
	CU_ASSERT(bdev_io1->internal.in_submit_request == false);
	CU_ASSERT(bdev_io1->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
	CU_ASSERT(bdev_io2->internal.in_submit_request == true);
	CU_ASSERT(bdev_io2 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io2 == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));

	spdk_delay_us(1);

@@ -5669,7 +5672,7 @@ test_fail_path(void)
	bdev_nvme_submit_request(ch, bdev_io);

	CU_ASSERT(bdev_io->internal.in_submit_request == true);
	CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));

	/* After a second, the I/O should be still queued and the ctrlr should be
	 * still recovering.
@@ -5678,7 +5681,7 @@ test_fail_path(void)
	poll_threads();

	CU_ASSERT(bdev_io->internal.in_submit_request == true);
	CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));

	CU_ASSERT(nvme_ctrlr->resetting == false);
	CU_ASSERT(ctrlr->is_failed == false);
@@ -6530,7 +6533,7 @@ test_retry_io_to_same_path(void)

	CU_ASSERT(io_path2->qpair->qpair->num_outstanding_reqs == 0);
	CU_ASSERT(bdev_io->internal.in_submit_request == true);
	CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));

	/* The 2nd I/O should keep caching io_path2. */
	CU_ASSERT(bio->io_path == io_path2);
@@ -6558,7 +6561,7 @@ test_retry_io_to_same_path(void)

	CU_ASSERT(io_path2->qpair->qpair->num_outstanding_reqs == 0);
	CU_ASSERT(bdev_io->internal.in_submit_request == true);
	CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
	CU_ASSERT(bdev_io == spdk_bdev_io_from_ctx(TAILQ_FIRST(&nbdev_ch->retry_io_list)));

	/* The 2nd I/O should keep caching io_path2. */
	CU_ASSERT(bio->io_path == io_path2);