Commit f9cccaa8 authored by Artur Paszkiewicz's avatar Artur Paszkiewicz Committed by Jim Harris
Browse files

raid: examine other bdevs when starting from superblock



When a bdev containing a superblock is examined, search for other base
bdevs and claim them, without having to examine every base bdev
separately.

For example, if a raid bdev is stopped and we remove one base bdev, then
add it back, this will now cause the raid bdev to be brought back up.
Earlier, we would have to re-add every base bdev.

Related to #3306

Signed-off-by: default avatarArtur Paszkiewicz <artur.paszkiewicz@intel.com>
Change-Id: I71193de1cdb8b3a451aa8490255d4414d81da159
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/22641


Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
parent 688de1b9
Loading
Loading
Loading
Loading
+100 −3
Original line number Diff line number Diff line
@@ -3376,6 +3376,89 @@ raid_bdev_examine_no_sb(struct spdk_bdev *bdev)
	}
}

struct raid_bdev_examine_others_ctx {
	struct spdk_uuid raid_bdev_uuid;
	uint8_t current_base_bdev_idx;
	raid_base_bdev_cb cb_fn;
	void *cb_ctx;
};

static void
raid_bdev_examine_others_done(void *_ctx, int status)
{
	struct raid_bdev_examine_others_ctx *ctx = _ctx;

	if (ctx->cb_fn != NULL) {
		ctx->cb_fn(ctx->cb_ctx, status);
	}
	free(ctx);
}

typedef void (*raid_bdev_examine_load_sb_cb)(struct spdk_bdev *bdev,
		const struct raid_bdev_superblock *sb, int status, void *ctx);
static int raid_bdev_examine_load_sb(const char *bdev_name, raid_bdev_examine_load_sb_cb cb,
				     void *cb_ctx);
static void raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev,
				 raid_base_bdev_cb cb_fn, void *cb_ctx);
static void raid_bdev_examine_others(void *_ctx, int status);

static void
raid_bdev_examine_others_load_cb(struct spdk_bdev *bdev, const struct raid_bdev_superblock *sb,
				 int status, void *_ctx)
{
	struct raid_bdev_examine_others_ctx *ctx = _ctx;

	if (status != 0) {
		raid_bdev_examine_others_done(ctx, status);
		return;
	}

	raid_bdev_examine_sb(sb, bdev, raid_bdev_examine_others, ctx);
}

static void
raid_bdev_examine_others(void *_ctx, int status)
{
	struct raid_bdev_examine_others_ctx *ctx = _ctx;
	struct raid_bdev *raid_bdev;
	struct raid_base_bdev_info *base_info;
	char uuid_str[SPDK_UUID_STRING_LEN];

	if (status != 0) {
		goto out;
	}

	raid_bdev = raid_bdev_find_by_uuid(&ctx->raid_bdev_uuid);
	if (raid_bdev == NULL) {
		status = -ENODEV;
		goto out;
	}

	for (base_info = &raid_bdev->base_bdev_info[ctx->current_base_bdev_idx];
	     base_info < &raid_bdev->base_bdev_info[raid_bdev->num_base_bdevs];
	     base_info++) {
		if (base_info->is_configured || spdk_uuid_is_null(&base_info->uuid)) {
			continue;
		}

		spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid);

		if (spdk_bdev_get_by_name(uuid_str) == NULL) {
			continue;
		}

		ctx->current_base_bdev_idx = raid_bdev_base_bdev_slot(base_info);

		status = raid_bdev_examine_load_sb(uuid_str, raid_bdev_examine_others_load_cb, ctx);
		if (status != 0) {
			continue;
		}
		return;
	}
out:
	raid_bdev_examine_others_done(ctx, status);
}

static void
raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev,
		     raid_base_bdev_cb cb_fn, void *cb_ctx)
@@ -3443,12 +3526,29 @@ raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bd
	}

	if (!raid_bdev) {
		struct raid_bdev_examine_others_ctx *ctx;

		ctx = calloc(1, sizeof(*ctx));
		if (ctx == NULL) {
			rc = -ENOMEM;
			goto out;
		}

		rc = raid_bdev_create_from_sb(sb, &raid_bdev);
		if (rc != 0) {
			SPDK_ERRLOG("Failed to create raid bdev %s: %s\n",
				    sb->name, spdk_strerror(-rc));
			free(ctx);
			goto out;
		}

		/* after this base bdev is configured, examine other base bdevs that may be present */
		spdk_uuid_copy(&ctx->raid_bdev_uuid, &sb->uuid);
		ctx->cb_fn = cb_fn;
		ctx->cb_ctx = cb_ctx;

		cb_fn = raid_bdev_examine_others;
		cb_ctx = ctx;
	}

	if (raid_bdev->state == RAID_BDEV_STATE_ONLINE) {
@@ -3503,9 +3603,6 @@ out:
	}
}

typedef void (*raid_bdev_examine_load_sb_cb)(struct spdk_bdev *bdev,
		const struct raid_bdev_superblock *sb, int status, void *ctx);

struct raid_bdev_examine_ctx {
	struct spdk_bdev_desc *desc;
	struct spdk_io_channel *ch;
+24 −36
Original line number Diff line number Diff line
@@ -522,7 +522,6 @@ function raid_superblock_test() {
		# Check if the RAID bdev is in online state (degraded)
		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))

		if [ $num_base_bdevs -gt 2 ]; then
		# Stop the RAID bdev
		$rpc_py bdev_raid_delete $raid_bdev_name
		raid_bdev=$($rpc_py bdev_raid_get_bdevs all | jq -r '.[]')
@@ -530,34 +529,30 @@ function raid_superblock_test() {
			return 1
		fi

		if [ $num_base_bdevs -gt 2 ]; then
			# Delete the last base bdev
			i=$((num_base_bdevs - 1))
			$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
		fi

		# Re-add first base bdev
		# This is the "failed" device and contains the "old" version of the superblock
		$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[0]} -p ${base_bdevs_pt[0]} -u ${base_bdevs_pt_uuid[0]}

			# Check if the RAID bdev is in configuring state
			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $num_base_bdevs

			# Delete remaining base bdevs
			for ((i = 1; i < num_base_bdevs; i++)); do
				$rpc_py bdev_passthru_delete ${base_bdevs_pt[$i]}
			done

			# Re-add the last base bdev
			i=$((num_base_bdevs - 1))
			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}

		if [ $num_base_bdevs -gt 2 ]; then
			# Check if the RAID bdev is in configuring state
			# This should use the newer superblock version and have n-1 online base bdevs
			verify_raid_bdev_state $raid_bdev_name "configuring" $raid_level $strip_size $((num_base_bdevs - 1))
			[[ $($rpc_py bdev_raid_get_bdevs configuring | jq -r '.[].base_bdevs_list[0].is_configured') == "false" ]]

			# Re-add remaining base bdevs
			for ((i = 1; i < num_base_bdevs - 1; i++)); do
			# Re-add the last base bdev
			$rpc_py bdev_passthru_create -b ${base_bdevs_malloc[$i]} -p ${base_bdevs_pt[$i]} -u ${base_bdevs_pt_uuid[$i]}
			done
		fi

		# Check if the RAID bdev is in online state (degraded)
		# This should use the newer superblock version and have n-1 online base bdevs
		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs - 1))
		fi
		[[ $($rpc_py bdev_raid_get_bdevs online | jq -r '.[].base_bdevs_list[0].is_configured') == "false" ]]

		# Check if the RAID bdev has the same UUID as when first created
		if [ "$($rpc_py bdev_get_bdevs -b $raid_bdev_name | jq -r '.[] | .uuid')" != "$raid_bdev_uuid" ]; then
@@ -746,14 +741,7 @@ function raid_rebuild_test() {
	fi

	if [ $superblock = true ]; then
		# Remove the passthru base bdevs, then re-add them to assemble the raid bdev again
		for bdev in "${base_bdevs[@]}"; do
			if [ -z "$bdev" ]; then
				continue
			fi
			$rpc_py bdev_passthru_delete $bdev
			$rpc_py bdev_passthru_create -b ${bdev}_malloc -p $bdev
		done
		# Remove then re-add a base bdev to assemble the raid bdev again
		$rpc_py bdev_passthru_delete "spare"
		$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"