Commit 9d2b7b41 authored by Artur Paszkiewicz's avatar Artur Paszkiewicz Committed by Tomasz Zawadzki
Browse files

module/raid: stop rebuild before the raid bdev is unregistered



Open the raid bdev internally to receive SPDK_BDEV_EVENT_REMOVE and
stop the rebuild process from the event callback. Unregistering will
be performed after the descriptor is closed. The process needs to be
stopped before the destruct callback, because stopping the rebuild
process may require unquiescing a range, which requires the bdev
spin lock, which is destroyed before the bdev module's destruct()
callback is called.

We will look later at trying to fix this problem at bdev layer, to
avoid bdev modules having to open themselves to avoid these kinds of
problems.

Change-Id: Ic9c5bf14f785aaf7629bf1c9d2e1889e254cec5f
Signed-off-by: default avatarArtur Paszkiewicz <artur.paszkiewicz@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/20830


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
Tested-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
parent 0773385e
Loading
Loading
Loading
Loading
+120 −7
Original line number Diff line number Diff line
@@ -65,6 +65,13 @@ struct raid_bdev_process {
	bool				window_range_locked;
	struct raid_base_bdev_info	*target;
	int				status;
	TAILQ_HEAD(, raid_process_finish_action) finish_actions;
};

struct raid_process_finish_action {
	spdk_msg_fn cb;
	void *cb_ctx;
	TAILQ_ENTRY(raid_process_finish_action) link;
};

static struct raid_bdev_module *
@@ -418,6 +425,8 @@ _raid_bdev_destruct(void *ctxt)

	SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n");

	assert(raid_bdev->process == NULL);

	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
		/*
		 * Close all base bdev descriptors for which call has come from below
@@ -1444,6 +1453,81 @@ raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs,
	return 0;
}

static void
_raid_bdev_unregistering_cont(void *ctx)
{
	struct raid_bdev *raid_bdev = ctx;

	spdk_bdev_close(raid_bdev->self_desc);
	raid_bdev->self_desc = NULL;
}

static void
raid_bdev_unregistering_cont(void *ctx)
{
	spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_unregistering_cont, ctx);
}

static int
raid_bdev_process_add_finish_action(struct raid_bdev_process *process, spdk_msg_fn cb, void *cb_ctx)
{
	struct raid_process_finish_action *finish_action;

	assert(spdk_get_thread() == process->thread);
	assert(process->state < RAID_PROCESS_STATE_STOPPED);

	finish_action = calloc(1, sizeof(*finish_action));
	if (finish_action == NULL) {
		return -ENOMEM;
	}

	finish_action->cb = cb;
	finish_action->cb_ctx = cb_ctx;

	TAILQ_INSERT_TAIL(&process->finish_actions, finish_action, link);

	return 0;
}

static void
raid_bdev_unregistering_stop_process(void *ctx)
{
	struct raid_bdev_process *process = ctx;
	struct raid_bdev *raid_bdev = process->raid_bdev;
	int rc;

	process->state = RAID_PROCESS_STATE_STOPPING;
	if (process->status == 0) {
		process->status = -ECANCELED;
	}

	rc = raid_bdev_process_add_finish_action(process, raid_bdev_unregistering_cont, raid_bdev);
	if (rc != 0) {
		SPDK_ERRLOG("Failed to add raid bdev '%s' process finish action: %s\n",
			    raid_bdev->bdev.name, spdk_strerror(-rc));
	}
}

static void
raid_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
{
	struct raid_bdev *raid_bdev = event_ctx;

	switch (type) {
	case SPDK_BDEV_EVENT_REMOVE:
		if (raid_bdev->process != NULL) {
			spdk_thread_send_msg(raid_bdev->process->thread, raid_bdev_unregistering_stop_process,
					     raid_bdev->process);
		} else {
			raid_bdev_unregistering_cont(raid_bdev);
		}
		break;
	default:
		SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
		break;
	}
}

static void
raid_bdev_configure_cont(struct raid_bdev *raid_bdev)
{
@@ -1459,17 +1543,38 @@ raid_bdev_configure_cont(struct raid_bdev *raid_bdev)
				raid_bdev_gen->name);
	rc = spdk_bdev_register(raid_bdev_gen);
	if (rc != 0) {
		SPDK_ERRLOG("Unable to register raid bdev and stay at configuring state\n");
		if (raid_bdev->module->stop != NULL) {
			raid_bdev->module->stop(raid_bdev);
		SPDK_ERRLOG("Failed to register raid bdev '%s': %s\n",
			    raid_bdev_gen->name, spdk_strerror(-rc));
		goto err;
	}
		spdk_io_device_unregister(raid_bdev, NULL);
		raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
		return;

	/*
	 * Open the bdev internally to delay unregistering if we need to stop a background process
	 * first. The process may still need to unquiesce a range but it will fail because the
	 * bdev's internal.spinlock is destroyed by the time the destruct callback is reached.
	 * During application shutdown, bdevs automatically get unregistered by the bdev layer
	 * so this is the only way currently to do this correctly.
	 * TODO: try to handle this correctly in bdev layer instead.
	 */
	rc = spdk_bdev_open_ext(raid_bdev_gen->name, false, raid_bdev_event_cb, raid_bdev,
				&raid_bdev->self_desc);
	if (rc != 0) {
		SPDK_ERRLOG("Failed to open raid bdev '%s': %s\n",
			    raid_bdev_gen->name, spdk_strerror(-rc));
		spdk_bdev_unregister(raid_bdev_gen, NULL, NULL);
		goto err;
	}

	SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen);
	SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n",
		      raid_bdev_gen->name, raid_bdev);
	return;
err:
	if (raid_bdev->module->stop != NULL) {
		raid_bdev->module->stop(raid_bdev);
	}
	spdk_io_device_unregister(raid_bdev, NULL);
	raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
}

static void
@@ -1966,6 +2071,13 @@ static void
_raid_bdev_process_finish_done(void *ctx)
{
	struct raid_bdev_process *process = ctx;
	struct raid_process_finish_action *finish_action;

	while ((finish_action = TAILQ_FIRST(&process->finish_actions)) != NULL) {
		TAILQ_REMOVE(&process->finish_actions, finish_action, link);
		finish_action->cb(finish_action->cb_ctx);
		free(finish_action);
	}

	raid_bdev_process_free(process);

@@ -2507,6 +2619,7 @@ raid_bdev_process_alloc(struct raid_bdev *raid_bdev, enum raid_process_type type
	process->max_window_size = spdk_max(RAID_BDEV_PROCESS_WINDOW_SIZE / raid_bdev->bdev.blocklen,
					    raid_bdev->bdev.write_unit_size);
	TAILQ_INIT(&process->requests);
	TAILQ_INIT(&process->finish_actions);

	for (i = 0; i < RAID_BDEV_PROCESS_MAX_QD; i++) {
		process_req = raid_bdev_process_alloc_request(process);
+3 −0
Original line number Diff line number Diff line
@@ -176,6 +176,9 @@ struct raid_bdev {
	/* raid bdev device, this will get registered in bdev layer */
	struct spdk_bdev		bdev;

	/* the raid bdev descriptor, opened for internal use */
	struct spdk_bdev_desc		*self_desc;

	/* link of raid bdev to link it to global raid bdev list */
	TAILQ_ENTRY(raid_bdev)		global_link;

+25 −0
Original line number Diff line number Diff line
@@ -603,6 +603,31 @@ function raid_rebuild_test() {
	# Check if rebuild started
	verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"

	if [ $superblock = true ] && [ $with_io = false ]; then
		# Stop the RAID bdev
		$rpc_py bdev_raid_delete $raid_bdev_name
		[[ $($rpc_py bdev_raid_get_bdevs all | jq 'length') == 0 ]]

		# Remove the passthru base bdevs, then re-add them to assemble the raid bdev again
		for ((i = 0; i < num_base_bdevs; i++)); do
			$rpc_py bdev_passthru_delete ${base_bdevs[$i]}
		done
		for ((i = 0; i < num_base_bdevs; i++)); do
			$rpc_py bdev_passthru_create -b ${base_bdevs[$i]}_malloc -p ${base_bdevs[$i]}
		done

		# Check if the RAID bdev is in online state (degraded)
		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))

		# Check if rebuild is not started
		verify_raid_bdev_process $raid_bdev_name "none" "none"

		# Again, start the rebuild
		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
		sleep 1
		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
	fi

	# Wait for rebuild to finish
	local timeout=$((SECONDS + 30))
	while ((SECONDS < timeout)); do
+11 −1
Original line number Diff line number Diff line
@@ -78,7 +78,6 @@ TAILQ_HEAD(, spdk_bdev_io) g_deferred_ios = TAILQ_HEAD_INITIALIZER(g_deferred_io

DEFINE_STUB_V(spdk_bdev_module_examine_done, (struct spdk_bdev_module *module));
DEFINE_STUB_V(spdk_bdev_module_list_add, (struct spdk_bdev_module *bdev_module));
DEFINE_STUB(spdk_bdev_register, int, (struct spdk_bdev *bdev), 0);
DEFINE_STUB(spdk_bdev_io_type_supported, bool, (struct spdk_bdev *bdev,
		enum spdk_bdev_io_type io_type), true);
DEFINE_STUB_V(spdk_bdev_close, (struct spdk_bdev_desc *desc));
@@ -414,11 +413,21 @@ spdk_bdev_destruct_done(struct spdk_bdev *bdev, int bdeverrno)
	bdev->internal.unregister_cb(bdev->internal.unregister_ctx, bdeverrno);
}

int
spdk_bdev_register(struct spdk_bdev *bdev)
{
	TAILQ_INSERT_TAIL(&g_bdev_list, bdev, internal.link);
	return 0;
}

void
spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
{
	int ret;

	SPDK_CU_ASSERT_FATAL(spdk_bdev_get_by_name(bdev->name) == bdev);
	TAILQ_REMOVE(&g_bdev_list, bdev, internal.link);

	bdev->internal.unregister_cb = cb_fn;
	bdev->internal.unregister_ctx = cb_arg;

@@ -2371,6 +2380,7 @@ test_raid_io_split(void)

	spdk_put_io_channel(ch);
	free_test_req(&req);
	pbdev->process = NULL;

	create_raid_bdev_delete_req(&destroy_req, "raid1", 0);
	rpc_bdev_raid_delete(NULL, NULL);