Commit dafdb289 authored by Artur Paszkiewicz's avatar Artur Paszkiewicz Committed by Jim Harris
Browse files

raid: allow re-adding a base bdev with superblock



If a raid bdev is in ONLINE state and a base bdev is added that was a
member of this array, rebuild should start automatically.

Signed-off-by: default avatarArtur Paszkiewicz <artur.paszkiewicz@intel.com>
Change-Id: I30583ced59f351658d1c5029faea49fbb2ee9367
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/22501


Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
parent b694ff86
Loading
Loading
Loading
Loading
+30 −1
Original line number Diff line number Diff line
@@ -3008,6 +3008,9 @@ raid_bdev_configure_base_bdev_cont(struct raid_base_bdev_info *base_info)
	}
}

static void raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev,
				 raid_base_bdev_cb cb_fn, void *cb_ctx);

static void
raid_bdev_configure_base_bdev_check_sb_cb(const struct raid_bdev_superblock *sb, int status,
		void *ctx)
@@ -3017,7 +3020,14 @@ raid_bdev_configure_base_bdev_check_sb_cb(const struct raid_bdev_superblock *sb,
	switch (status) {
	case 0:
		/* valid superblock found */
		SPDK_ERRLOG("Existing raid superblock found on bdev %s\n", base_info->name);
		if (spdk_uuid_compare(&base_info->raid_bdev->bdev.uuid, &sb->uuid) == 0) {
			struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(base_info->desc);

			raid_bdev_free_base_bdev_resource(base_info);
			raid_bdev_examine_sb(sb, bdev, base_info->configure_cb, base_info->configure_cb_ctx);
			return;
		}
		SPDK_ERRLOG("Superblock of a different raid bdev found on bdev %s\n", base_info->name);
		status = -EEXIST;
		raid_bdev_free_base_bdev_resource(base_info);
		break;
@@ -3431,6 +3441,25 @@ raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bd
		}
	}

	if (raid_bdev->state == RAID_BDEV_STATE_ONLINE) {
		assert(sb_base_bdev->slot < raid_bdev->num_base_bdevs);
		base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot];
		assert(base_info->desc == NULL);
		assert(sb_base_bdev->state == RAID_SB_BASE_BDEV_MISSING ||
		       sb_base_bdev->state == RAID_SB_BASE_BDEV_FAILED);
		assert(spdk_uuid_is_null(&base_info->uuid));
		spdk_uuid_copy(&base_info->uuid, &sb_base_bdev->uuid);
		SPDK_NOTICELOG("Re-adding bdev %s to raid bdev %s.\n", bdev->name, raid_bdev->bdev.name);
		spdk_spin_lock(&raid_bdev->base_bdev_lock);
		rc = raid_bdev_configure_base_bdev(base_info, true, cb_fn, cb_ctx);
		spdk_spin_unlock(&raid_bdev->base_bdev_lock);
		if (rc != 0) {
			SPDK_ERRLOG("Failed to configure bdev %s as base bdev of raid %s: %s\n",
				    bdev->name, raid_bdev->bdev.name, spdk_strerror(-rc));
		}
		goto out;
	}

	if (sb_base_bdev->state != RAID_SB_BASE_BDEV_CONFIGURED) {
		SPDK_NOTICELOG("Bdev %s is not an active member of raid bdev %s. Ignoring.\n",
			       bdev->name, raid_bdev->bdev.name);
+30 −0
Original line number Diff line number Diff line
@@ -760,6 +760,36 @@ function raid_rebuild_test() {
		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $num_base_bdevs_operational
		verify_raid_bdev_process $raid_bdev_name "none" "none"
		[[ $($rpc_py bdev_raid_get_bdevs all | jq -r '.[].base_bdevs_list[0].name') == "spare" ]]

		# Remove and re-add a base bdev - rebuild should start automatically
		$rpc_py bdev_raid_remove_base_bdev "spare"
		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
		$rpc_py bdev_raid_add_base_bdev $raid_bdev_name "spare"
		sleep 1
		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"

		# Same as above but re-add through examine
		$rpc_py bdev_passthru_delete "spare"
		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
		$rpc_py bdev_passthru_create -b "spare_delay" -p "spare"
		sleep 1
		verify_raid_bdev_process $raid_bdev_name "rebuild" "spare"

		# Stop the rebuild
		$rpc_py bdev_passthru_delete "spare"
		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
		verify_raid_bdev_process $raid_bdev_name "none" "none"

		# Re-adding a base bdev that was replaced (no longer is a member of the array) should not be allowed
		$rpc_py bdev_passthru_delete ${base_bdevs[0]}
		$rpc_py bdev_passthru_create -b ${base_bdevs[0]}_malloc -p ${base_bdevs[0]}
		sleep 1
		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
		verify_raid_bdev_process $raid_bdev_name "none" "none"
		NOT $rpc_py bdev_raid_add_base_bdev $raid_bdev_name ${base_bdevs[0]}
		sleep 1
		verify_raid_bdev_state $raid_bdev_name "online" $raid_level $strip_size $((num_base_bdevs_operational - 1))
		verify_raid_bdev_process $raid_bdev_name "none" "none"
	fi

	killprocess $raid_pid