Commit 494eb6e5 authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

bdev: Fix race among bdev_reset(), bdev_close(), and bdev_unregister()



There is a race condition when a bdev is unregistered while reset is
submitted from the upper layer very frequently.

spdk_io_device_unregister() may fail because it is called while
spdk_for_each_channel() is processed.

    spdk_io_device_unregister io_device bdev_Nvme0n1 (0x7f4be8053aa1)
    has 1 for_each calls outstanding

To avoid this failure, defer calling spdk_io_device_unregister() until
reset completes if reset is in progress when unregistration is ready
to do, and then reset completion calls spdk_io_device_unregister()
later.

A bdev cannot be opened if it is already deleting. So we do not need
to hold mutex.

Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: Ida1681ba9f3096670ff62274b35bb3e4fd69398a
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12222


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarMichael Haeuptle <michaelhaeuptle@gmail.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarPaul Luse <paul.e.luse@intel.com>
parent 2a6a6448
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -5760,10 +5760,13 @@ bdev_io_complete(void *ctx)
			     bdev_io->internal.caller_ctx);
}

static void bdev_destroy_cb(void *io_device);

static void
bdev_reset_complete(struct spdk_io_channel_iter *i, int status)
{
	struct spdk_bdev_io *bdev_io = spdk_io_channel_iter_get_ctx(i);
	struct spdk_bdev *bdev = bdev_io->bdev;

	if (bdev_io->u.reset.ch_ref != NULL) {
		spdk_put_io_channel(bdev_io->u.reset.ch_ref);
@@ -5771,6 +5774,11 @@ bdev_reset_complete(struct spdk_io_channel_iter *i, int status)
	}

	bdev_io_complete(bdev_io);

	if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING &&
	    TAILQ_EMPTY(&bdev->internal.open_descs)) {
		spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb);
	}
}

static void
@@ -6243,6 +6251,13 @@ bdev_unregister_unsafe(struct spdk_bdev *bdev)
		bdev_alias_del(bdev, uuid, bdev_name_del_unsafe);

		spdk_notify_send("bdev_unregister", spdk_bdev_get_name(bdev));

		if (bdev->internal.reset_in_progress != NULL) {
			/* If reset is in progress, let the completion callback for reset
			 * unregister the bdev.
			 */
			rc = -EBUSY;
		}
	}

	return rc;
+68 −1
Original line number Diff line number Diff line
@@ -204,7 +204,6 @@ stub_complete_io(void *io_target, uint32_t num_to_complete)
		ch->avail_cnt++;
		num_completed++;
	}

	spdk_put_io_channel(_ch);
	return num_completed;
}
@@ -1983,6 +1982,73 @@ lock_lba_range_then_submit_io(void)
	teardown_test();
}

/* spdk_bdev_reset() freezes and unfreezes I/O channels by using spdk_for_each_channel().
 * spdk_bdev_unregister() calls spdk_io_device_unregister() in the end. However
 * spdk_io_device_unregister() fails if it is called while executing spdk_for_each_channel().
 * Hence, in this case, spdk_io_device_unregister() is deferred until spdk_bdev_reset()
 * completes. Test this behavior.
 */
static void
unregister_during_reset(void)
{
	struct spdk_io_channel *io_ch[2];
	bool done_reset = false, done_unregister = false;
	int rc;

	setup_test();
	set_thread(0);

	io_ch[0] = spdk_bdev_get_io_channel(g_desc);
	SPDK_CU_ASSERT_FATAL(io_ch[0] != NULL);

	set_thread(1);

	io_ch[1] = spdk_bdev_get_io_channel(g_desc);
	SPDK_CU_ASSERT_FATAL(io_ch[1] != NULL);

	set_thread(0);

	CU_ASSERT(g_bdev.bdev.internal.reset_in_progress == NULL);

	rc = spdk_bdev_reset(g_desc, io_ch[0], reset_done, &done_reset);
	CU_ASSERT(rc == 0);

	set_thread(0);

	poll_thread_times(0, 1);

	spdk_bdev_close(g_desc);
	spdk_bdev_unregister(&g_bdev.bdev, _bdev_unregistered, &done_unregister);

	CU_ASSERT(done_reset == false);
	CU_ASSERT(done_unregister == false);

	poll_threads();

	stub_complete_io(g_bdev.io_target, 0);

	poll_threads();

	CU_ASSERT(done_reset == true);
	CU_ASSERT(done_unregister == false);

	spdk_put_io_channel(io_ch[0]);

	set_thread(1);

	spdk_put_io_channel(io_ch[1]);

	poll_threads();

	CU_ASSERT(done_unregister == true);

	/* Restore the original g_bdev so that we can use teardown_test(). */
	set_thread(0);
	register_bdev(&g_bdev, "ut_bdev", &g_io_device);
	spdk_bdev_open_ext("ut_bdev", true, _bdev_event_cb, NULL, &g_desc);
	teardown_test();
}

int
main(int argc, char **argv)
{
@@ -2009,6 +2075,7 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, bdev_histograms_mt);
	CU_ADD_TEST(suite, bdev_set_io_timeout_mt);
	CU_ADD_TEST(suite, lock_lba_range_then_submit_io);
	CU_ADD_TEST(suite, unregister_during_reset);

	CU_basic_set_mode(CU_BRM_VERBOSE);
	CU_basic_run_tests();