Commit 3950cd1b authored by Evgeniy Kochetov's avatar Evgeniy Kochetov Committed by Konrad Sztyber
Browse files

bdev/nvme: Change spdk_bdev_reset() to succeed if at least one nvme_ctrlr is reconnected



This patch fixes issue with spdk_bdev_reset() on multipath nvme bdev
with at least one but not all failed nvme_ctrlrs. For multipath nvme bdev
reset sequentially reconnects all nvme_ctrlrs. Before
this patch, if at least one of nvme_ctrlrs can't be reconnected, the whole
reset request is completed with errors. It doesn't make sense because
bdev may still be operational. This patch changes behaviour to
complete reset request successfully if at least one nvme_ctrlr was
reconnected. Reset will still be failed if all nvme_ctrlrs failed to
reconnect.

Change-Id: I992a9a44f26f45970f9daa120a93be9bb4604091
Signed-off-by: default avatarEvgeniy Kochetov <evgeniik@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/24974


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
parent f9141d27
Loading
Loading
Loading
Loading
+10 −8
Original line number Diff line number Diff line
@@ -2905,10 +2905,6 @@ _bdev_nvme_reset_io_continue(void *ctx)
	prev_io_path = bio->io_path;
	bio->io_path = NULL;

	if (bio->cpl.cdw0 != 0) {
		goto complete;
	}

	next_io_path = STAILQ_NEXT(prev_io_path, stailq);
	if (next_io_path == NULL) {
		goto complete;
@@ -2919,8 +2915,6 @@ _bdev_nvme_reset_io_continue(void *ctx)
		return;
	}

	bio->cpl.cdw0 = 1;

complete:
	bdev_nvme_reset_io_complete(bio);
}
@@ -2931,7 +2925,12 @@ bdev_nvme_reset_io_continue(void *cb_arg, int rc)
	struct nvme_bdev_io *bio = cb_arg;
	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);

	bio->cpl.cdw0 = (rc == 0) ? 0 : 1;
	/* Reset status is initialized as "failed". Set to "success" once we have at least one
	 * successfully reset nvme_ctrlr.
	 */
	if (rc == 0) {
		bio->cpl.cdw0 = 0;
	}

	spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io), _bdev_nvme_reset_io_continue, bio);
}
@@ -2976,7 +2975,10 @@ bdev_nvme_freeze_bdev_channel_done(struct nvme_bdev *nbdev, void *ctx, int statu

	nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));

	bio->cpl.cdw0 = 0;
	/* Initialize with failed status. With multipath it is enough to have at least one successful
	 * nvme_ctrlr reset. If there is none, reset status will remain failed.
	 */
	bio->cpl.cdw0 = 1;

	/* Reset all nvme_ctrlrs of a bdev controller sequentially. */
	io_path = STAILQ_FIRST(&nbdev_ch->io_path_list);
+160 −0
Original line number Diff line number Diff line
@@ -4304,6 +4304,166 @@ test_reset_bdev_ctrlr(void)
	CU_ASSERT(first_bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
	CU_ASSERT(second_bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);

	/* Reset of the first path succeeds, reset of the second path fails.
	 * Since we have at least one working path we should not fail RESET IO.
	 */
	ctrlr1->is_failed = true;
	curr_path1->last_failed_tsc = spdk_get_ticks();
	ctrlr2->is_failed = true;
	curr_path2->last_failed_tsc = spdk_get_ticks();
	first_bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
	second_bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;

	set_thread(0);
	bdev_nvme_submit_request(ch1, first_bdev_io);

	set_thread(1);
	bdev_nvme_submit_request(ch2, second_bdev_io);

	poll_thread_times(0, 1);
	poll_thread_times(1, 1);
	poll_thread_times(0, 2);
	poll_thread_times(1, 1);
	poll_thread_times(0, 1);
	poll_thread_times(1, 1);

	CU_ASSERT(nvme_ctrlr1->resetting == true);
	CU_ASSERT(nvme_ctrlr1->ctrlr_op_cb_arg == first_bio);
	CU_ASSERT(TAILQ_FIRST(&io_path21->qpair->ctrlr_ch->pending_resets) ==
		  (struct nvme_bdev_io *)second_bdev_io->driver_ctx);

	ctrlr2->fail_reset = true;

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(ctrlr1->is_failed == false);
	CU_ASSERT(curr_path1->last_failed_tsc == 0);
	CU_ASSERT(ctrlr2->is_failed == true);
	CU_ASSERT(curr_path2->last_failed_tsc != 0);
	CU_ASSERT(first_bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
	CU_ASSERT(second_bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);

	/* Path 2 recovers */
	ctrlr2->fail_reset = false;
	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(ctrlr2->is_failed == false);
	CU_ASSERT(curr_path2->last_failed_tsc == 0);

	/* Reset of the first path fails, reset of the second path succeeds.
	 * Since we have at least one working path we should not fail RESET IO.
	 */
	ctrlr1->is_failed = true;
	curr_path1->last_failed_tsc = spdk_get_ticks();
	ctrlr2->is_failed = true;
	curr_path2->last_failed_tsc = spdk_get_ticks();
	first_bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
	second_bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;

	set_thread(0);
	bdev_nvme_submit_request(ch1, first_bdev_io);

	set_thread(1);
	bdev_nvme_submit_request(ch2, second_bdev_io);

	poll_thread_times(0, 1);
	poll_thread_times(1, 1);
	poll_thread_times(0, 2);
	poll_thread_times(1, 1);
	poll_thread_times(0, 1);
	poll_thread_times(1, 1);

	CU_ASSERT(nvme_ctrlr1->resetting == true);
	CU_ASSERT(nvme_ctrlr1->ctrlr_op_cb_arg == first_bio);
	CU_ASSERT(TAILQ_FIRST(&io_path21->qpair->ctrlr_ch->pending_resets) ==
		  (struct nvme_bdev_io *)second_bdev_io->driver_ctx);

	ctrlr1->fail_reset = true;

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(ctrlr1->is_failed == true);
	CU_ASSERT(curr_path1->last_failed_tsc != 0);
	CU_ASSERT(ctrlr2->is_failed == false);
	CU_ASSERT(curr_path2->last_failed_tsc == 0);
	CU_ASSERT(first_bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
	CU_ASSERT(second_bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);

	/* Path 1 recovers */
	ctrlr1->fail_reset = false;
	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(ctrlr1->is_failed == false);
	CU_ASSERT(curr_path1->last_failed_tsc == 0);

	/* Reset of both paths fail.
	 * Since we have no working paths we should fail RESET IO.
	 */
	ctrlr1->is_failed = true;
	curr_path1->last_failed_tsc = spdk_get_ticks();
	ctrlr2->is_failed = true;
	curr_path2->last_failed_tsc = spdk_get_ticks();
	first_bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
	second_bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;

	set_thread(0);
	bdev_nvme_submit_request(ch1, first_bdev_io);

	set_thread(1);
	bdev_nvme_submit_request(ch2, second_bdev_io);

	poll_thread_times(0, 1);
	poll_thread_times(1, 1);
	poll_thread_times(0, 2);
	poll_thread_times(1, 1);
	poll_thread_times(0, 1);
	poll_thread_times(1, 1);

	CU_ASSERT(nvme_ctrlr1->resetting == true);
	CU_ASSERT(nvme_ctrlr1->ctrlr_op_cb_arg == first_bio);
	CU_ASSERT(TAILQ_FIRST(&io_path21->qpair->ctrlr_ch->pending_resets) ==
		  (struct nvme_bdev_io *)second_bdev_io->driver_ctx);

	ctrlr1->fail_reset = true;
	ctrlr2->fail_reset = true;

	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(ctrlr1->is_failed == true);
	CU_ASSERT(curr_path1->last_failed_tsc != 0);
	CU_ASSERT(ctrlr2->is_failed == true);
	CU_ASSERT(curr_path2->last_failed_tsc != 0);
	CU_ASSERT(first_bdev_io->internal.status == SPDK_BDEV_IO_STATUS_FAILED);
	CU_ASSERT(second_bdev_io->internal.status == SPDK_BDEV_IO_STATUS_FAILED);

	/* Paths 1 and 2 recover */
	ctrlr1->fail_reset = false;
	ctrlr2->fail_reset = false;
	poll_threads();
	spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
	poll_threads();

	CU_ASSERT(ctrlr1->is_failed == false);
	CU_ASSERT(curr_path1->last_failed_tsc == 0);
	CU_ASSERT(ctrlr2->is_failed == false);
	CU_ASSERT(curr_path2->last_failed_tsc == 0);

	set_thread(0);

	spdk_put_io_channel(ch1);