Commit d6fa87ae authored by Artur Paszkiewicz's avatar Artur Paszkiewicz Committed by Tomasz Zawadzki
Browse files

module/raid: stop rebuild when a base bdev is removed



Change-Id: Idaa594b65b36e4d80349e1fe27a77055addcfb26
Signed-off-by: default avatarArtur Paszkiewicz <artur.paszkiewicz@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/20642


Tested-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Community-CI: Mellanox Build Bot
parent 9d2b7b41
Loading
Loading
Loading
Loading
+113 −20
Original line number Diff line number Diff line
@@ -1758,27 +1758,22 @@ raid_bdev_remove_base_bdev_on_unquiesced(void *ctx, int status)

	if (raid_bdev->sb) {
		struct raid_bdev_superblock *sb = raid_bdev->sb;
		struct raid_bdev_sb_base_bdev *sb_base_bdev = NULL;
		uint8_t slot = raid_bdev_base_bdev_slot(base_info);
		uint8_t i;

		for (i = 0; i < sb->base_bdevs_size; i++) {
			sb_base_bdev = &sb->base_bdevs[i];
			struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i];

			if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED &&
			    sb_base_bdev->slot == slot) {
				break;
			}
		}

		assert(i < sb->base_bdevs_size);

				/* TODO: distinguish between failure and intentional removal */
				sb_base_bdev->state = RAID_SB_BASE_BDEV_FAILED;

				raid_bdev_write_superblock(raid_bdev, raid_bdev_remove_base_bdev_write_sb_cb, base_info);
				return;
			}
		}
	}
out:
	raid_bdev_remove_base_bdev_done(base_info, status);
}
@@ -1832,18 +1827,117 @@ raid_bdev_remove_base_bdev_on_quiesced(void *ctx, int status)
			      raid_bdev_channels_remove_base_bdev_done);
}

static int
raid_bdev_remove_base_bdev_quiesce(struct raid_base_bdev_info *base_info)
{
	assert(spdk_get_thread() == spdk_thread_get_app_thread());

	return spdk_bdev_quiesce(&base_info->raid_bdev->bdev, &g_raid_if,
				 raid_bdev_remove_base_bdev_on_quiesced, base_info);
}

struct raid_bdev_process_base_bdev_remove_ctx {
	struct raid_bdev_process *process;
	struct raid_base_bdev_info *base_info;
	uint8_t num_base_bdevs_operational;
};

static void
_raid_bdev_process_base_bdev_remove_cont(void *ctx)
{
	struct raid_base_bdev_info *base_info = ctx;
	int ret;

	ret = raid_bdev_remove_base_bdev_quiesce(base_info);
	if (ret != 0) {
		raid_bdev_remove_base_bdev_done(base_info, ret);
	}
}

static void
raid_bdev_process_base_bdev_remove_cont(void *_ctx)
{
	struct raid_bdev_process_base_bdev_remove_ctx *ctx = _ctx;
	struct raid_base_bdev_info *base_info = ctx->base_info;

	free(ctx);

	spdk_thread_send_msg(spdk_thread_get_app_thread(), _raid_bdev_process_base_bdev_remove_cont,
			     base_info);
}

static void
_raid_bdev_process_base_bdev_remove(void *_ctx)
{
	struct raid_bdev_process_base_bdev_remove_ctx *ctx = _ctx;
	struct raid_bdev_process *process = ctx->process;
	int ret;

	if (ctx->base_info != process->target &&
	    ctx->num_base_bdevs_operational > process->raid_bdev->min_base_bdevs_operational) {
		/* process doesn't need to be stopped */
		raid_bdev_process_base_bdev_remove_cont(ctx);
		return;
	}

	assert(process->state > RAID_PROCESS_STATE_INIT &&
	       process->state < RAID_PROCESS_STATE_STOPPED);

	ret = raid_bdev_process_add_finish_action(process, raid_bdev_process_base_bdev_remove_cont, ctx);
	if (ret != 0) {
		raid_bdev_remove_base_bdev_done(ctx->base_info, ret);
		free(ctx);
		return;
	}

	process->state = RAID_PROCESS_STATE_STOPPING;

	if (process->status == 0) {
		process->status = -ENODEV;
	}
}

static int
raid_bdev_process_base_bdev_remove(struct raid_bdev_process *process,
				   struct raid_base_bdev_info *base_info)
{
	struct raid_bdev_process_base_bdev_remove_ctx *ctx;

	assert(spdk_get_thread() == spdk_thread_get_app_thread());

	ctx = calloc(1, sizeof(*ctx));
	if (ctx == NULL) {
		return -ENOMEM;
	}

	/*
	 * We have to send the process and num_base_bdevs_operational in the message ctx
	 * because the process thread should not access raid_bdev's properties. Particularly,
	 * raid_bdev->process may be cleared by the time the message is handled, but ctx->process
	 * will still be valid until the process is fully stopped.
	 */
	ctx->base_info = base_info;
	ctx->process = process;
	ctx->num_base_bdevs_operational = process->raid_bdev->num_base_bdevs_operational;

	spdk_thread_send_msg(process->thread, _raid_bdev_process_base_bdev_remove, ctx);

	return 0;
}

static int
_raid_bdev_remove_base_bdev(struct raid_base_bdev_info *base_info,
			    raid_base_bdev_cb cb_fn, void *cb_ctx)
{
	struct raid_bdev *raid_bdev = base_info->raid_bdev;
	int ret = 0;

	SPDK_DEBUGLOG(bdev_raid, "%s\n", base_info->name);

	assert(spdk_get_thread() == spdk_thread_get_app_thread());

	if (base_info->remove_scheduled) {
		return 0;
		return -ENODEV;
	}

	assert(base_info->desc);
@@ -1870,17 +1964,16 @@ _raid_bdev_remove_base_bdev(struct raid_base_bdev_info *base_info,
		 * to keep the raid bdev operational.
		 */
		raid_bdev_deconfigure(raid_bdev, cb_fn, cb_ctx);
	} else if (raid_bdev->process != NULL) {
		ret = raid_bdev_process_base_bdev_remove(raid_bdev->process, base_info);
	} else {
		int ret;
		ret = raid_bdev_remove_base_bdev_quiesce(base_info);
	}

		ret = spdk_bdev_quiesce(&raid_bdev->bdev, &g_raid_if,
					raid_bdev_remove_base_bdev_on_quiesced, base_info);
	if (ret != 0) {
		base_info->remove_scheduled = false;
	}
	}

	return 0;
	return ret;
}

/*
+36 −2
Original line number Diff line number Diff line
@@ -603,6 +603,20 @@ function raid_rebuild_test() {
	# Check if rebuild started
	verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"

	# Remove the rebuild target bdev
	$rpc_py bdev_raid_remove_base_bdev "spare"

	# Check if the RAID bdev is in online state (degraded)
	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))

	# Check if rebuild was stopped
	verify_raid_bdev_process $raid_bdev_name "none" "none"

	# Again, start the rebuild
	$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
	sleep 1
	verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"

	if [ $superblock = true ] && [ $with_io = false ]; then
		# Stop the RAID bdev
		$rpc_py bdev_raid_delete $raid_bdev_name
@@ -628,6 +642,20 @@ function raid_rebuild_test() {
		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
	fi

	local num_base_bdevs_operational=$num_base_bdevs

	if [ $raid_level = "raid1" ] && [ $num_base_bdevs -gt 2 ]; then
		# Remove one more base bdev (not rebuild target)
		$rpc_py bdev_raid_remove_base_bdev ${base_bdevs[1]}

		# Ignore this bdev later when comparing data
		base_bdevs[1]=""
		((num_base_bdevs_operational--))

		# Check if rebuild is still running
		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"
	fi

	# Wait for rebuild to finish
	local timeout=$((SECONDS + 30))
	while ((SECONDS < timeout)); do
@@ -639,7 +667,7 @@ function raid_rebuild_test() {

	# Check if rebuild is not running and the RAID bdev has the correct number of operational devices
	verify_raid_bdev_process $raid_bdev_name "none" "none"
	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
	verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs_operational

	# Stop the RAID bdev
	$rpc_py bdev_raid_delete $raid_bdev_name
@@ -649,6 +677,9 @@ function raid_rebuild_test() {
		# Compare data on the rebuilt and other base bdevs
		nbd_start_disks $rpc_server "spare" "/dev/nbd0"
		for bdev in "${base_bdevs[@]:1}"; do
			if [ -z "$bdev" ]; then
				continue
			fi
			nbd_start_disks $rpc_server $bdev "/dev/nbd1"
			cmp -i $((data_offset * 512)) /dev/nbd0 /dev/nbd1
			nbd_stop_disks $rpc_server "/dev/nbd1"
@@ -664,13 +695,16 @@ function raid_rebuild_test() {
	if [ $superblock = true ]; then
		# Remove the passthru base bdevs, then re-add them to assemble the raid bdev again
		for bdev in "${base_bdevs[@]}"; do
			if [ -z "$bdev" ]; then
				continue
			fi
			$rpc_py bdev_passthru_delete $bdev
			$rpc_py bdev_passthru_create -b ${bdev}_malloc -p $bdev
		done
		$rpc_py bdev_passthru_delete "spare"
		$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"

		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs
		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs_operational
		verify_raid_bdev_process $raid_bdev_name "none" "none"
		[[ $($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[0].name') == "spare" ]]
	fi