Commit 6fdc71ec authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

lib/thread: Defer exiting thread if thread is unregistering io_device



Current SPDK thread library has a issue which occurs if there is
a race between exiting thread and unregistering io_device.

For example, there are two threads. Thread 1 registers a device
and thread 2 gets a channel of the device. Then if thread 1 starts
exiting and unregisters the device, and then thread 2 puts the channel,
thread 2 sends a message to thread 1 to complete releasing the device,
thread 1 already moved exited. Hence thread 2 failed to send the
message.

This patch fixes the race issue. The code is verified by adding
a unit test case.

In detail, add a count, unregistering_dev, to struct spdk_thread,
increment it if a callback is specified to spdk_io_device_unregister(),
and then decrement it in _finish_unregister(), and thread_exit()
checks if it is zero.

The contents of struct spdk_thread is changed but it is not public
data structure, and hence suppress it for ABI testing.

Signed-off-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Change-Id: Idf5faa55335c3ea89f47ccce32687a6be2e26c68
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5796


Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatar <dongx.yi@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
parent ad583422
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -119,6 +119,7 @@ struct spdk_thread {
	spdk_msg_fn			critical_msg;
	uint64_t			id;
	enum spdk_thread_state		state;
	int				pending_unregister_count;

	TAILQ_HEAD(, spdk_io_channel)	io_channels;
	TAILQ_ENTRY(spdk_thread)	tailq;
+19 −1
Original line number Diff line number Diff line
@@ -393,6 +393,13 @@ thread_exit(struct spdk_thread *thread, uint64_t now)
		return;
	}

	if (thread->pending_unregister_count > 0) {
		SPDK_INFOLOG(thread,
			     "thread %s is still unregistering io_devices\n",
			     thread->name);
		return;
	}

exited:
	thread->state = SPDK_THREAD_STATE_EXITED;
}
@@ -1394,9 +1401,16 @@ static void
_finish_unregister(void *arg)
{
	struct io_device *dev = arg;
	struct spdk_thread *thread;

	thread = spdk_get_thread();
	assert(thread == dev->unregister_thread);

	SPDK_DEBUGLOG(thread, "Finishing unregistration of io_device %s (%p) on thread %s\n",
		      dev->name, dev->io_device, dev->unregister_thread->name);
		      dev->name, dev->io_device, thread->name);

	assert(thread->pending_unregister_count > 0);
	thread->pending_unregister_count--;

	dev->unregister_cb(dev->io_device);
	free(dev);
@@ -1463,6 +1477,10 @@ spdk_io_device_unregister(void *io_device, spdk_io_device_unregister_cb unregist
	SPDK_DEBUGLOG(thread, "Unregistering io_device %s (%p) from thread %s\n",
		      dev->name, dev->io_device, thread->name);

	if (unregister_cb) {
		thread->pending_unregister_count++;
	}

	if (refcnt > 0) {
		/* defer deletion */
		return;
+2 −0
Original line number Diff line number Diff line
@@ -58,6 +58,8 @@ function confirm_abi_deps() {
	name = spdk_env_opts
[suppress_type]
	name = spdk_app_opts
[suppress_type]
	name = spdk_thread
EOF

	for object in "$libdir"/libspdk_*.so; do
+93 −0
Original line number Diff line number Diff line
@@ -1237,6 +1237,98 @@ nested_channel(void)
	CU_ASSERT(TAILQ_EMPTY(&g_threads));
}

static int
create_cb2(void *io_device, void *ctx_buf)
{
	uint64_t *devcnt = (uint64_t *)io_device;

	*devcnt += 1;

	return 0;
}

static void
destroy_cb2(void *io_device, void *ctx_buf)
{
	uint64_t *devcnt = (uint64_t *)io_device;

	CU_ASSERT(*devcnt > 0);
	*devcnt -= 1;
}

static void
unregister_cb2(void *io_device)
{
	uint64_t *devcnt = (uint64_t *)io_device;

	CU_ASSERT(*devcnt == 0);
}

static void
device_unregister_and_thread_exit_race(void)
{
	uint64_t device = 0;
	struct spdk_io_channel *ch1, *ch2;
	struct spdk_thread *thread1, *thread2;

	/* Create two threads and each thread gets a channel from the same device. */
	allocate_threads(2);
	set_thread(0);

	thread1 = spdk_get_thread();
	SPDK_CU_ASSERT_FATAL(thread1 != NULL);

	spdk_io_device_register(&device, create_cb2, destroy_cb2, sizeof(uint64_t), NULL);

	ch1 = spdk_get_io_channel(&device);
	SPDK_CU_ASSERT_FATAL(ch1 != NULL);

	set_thread(1);

	thread2 = spdk_get_thread();
	SPDK_CU_ASSERT_FATAL(thread2 != NULL);

	ch2 = spdk_get_io_channel(&device);
	SPDK_CU_ASSERT_FATAL(ch2 != NULL);

	set_thread(0);

	/* Move thread 0 to the exiting state, but it should keep exiting until two channels
	 * and a device are released.
	 */
	spdk_thread_exit(thread1);
	poll_thread(0);

	spdk_put_io_channel(ch1);

	spdk_io_device_unregister(&device, unregister_cb2);
	poll_thread(0);

	CU_ASSERT(spdk_thread_is_exited(thread1) == false);

	set_thread(1);

	/* Move thread 1 to the exiting state, but it should keep exiting until its channel
	 * is released.
	 */
	spdk_thread_exit(thread2);
	poll_thread(1);

	CU_ASSERT(spdk_thread_is_exited(thread2) == false);

	spdk_put_io_channel(ch2);
	poll_thread(1);

	CU_ASSERT(spdk_thread_is_exited(thread1) == false);
	CU_ASSERT(spdk_thread_is_exited(thread2) == true);

	poll_thread(0);

	CU_ASSERT(spdk_thread_is_exited(thread1) == true);

	free_threads();
}

int
main(int argc, char **argv)
{
@@ -1261,6 +1353,7 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, thread_exit_test);
	CU_ADD_TEST(suite, thread_update_stats_test);
	CU_ADD_TEST(suite, nested_channel);
	CU_ADD_TEST(suite, device_unregister_and_thread_exit_race);

	CU_basic_set_mode(CU_BRM_VERBOSE);
	CU_basic_run_tests();