Commit f1a03e33 authored by Artur Paszkiewicz's avatar Artur Paszkiewicz Committed by Jim Harris
Browse files

raid: add support for failing a base bdev due to io error



Add a function to be used by the raid modules to fail a base bdev when
handling a failed I/O.

Signed-off-by: default avatarArtur Paszkiewicz <artur.paszkiewicz@intel.com>
Change-Id: I5db039772f42be88768450f3bce5602c435e7fad
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/22844


Community-CI: Mellanox Build Bot
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
parent 35a054db
Loading
Loading
Loading
Loading
+43 −2
Original line number Diff line number Diff line
@@ -424,6 +424,7 @@ raid_bdev_free_base_bdev_resource(struct raid_base_bdev_info *base_info)
	if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) {
		spdk_uuid_set_null(&base_info->uuid);
	}
	base_info->is_failed = false;

	if (base_info->desc == NULL) {
		return;
@@ -1987,8 +1988,11 @@ raid_bdev_remove_base_bdev_on_quiesced(void *ctx, int status)

			if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED &&
			    sb_base_bdev->slot == slot) {
				/* TODO: distinguish between failure and intentional removal */
				if (base_info->is_failed) {
					sb_base_bdev->state = RAID_SB_BASE_BDEV_FAILED;
				} else {
					sb_base_bdev->state = RAID_SB_BASE_BDEV_MISSING;
				}

				raid_bdev_write_superblock(raid_bdev, raid_bdev_remove_base_bdev_write_sb_cb, base_info);
				return;
@@ -2191,6 +2195,43 @@ raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_base_bdev_cb cb_fn,
	return _raid_bdev_remove_base_bdev(base_info, cb_fn, cb_ctx);
}

static void
raid_bdev_fail_base_remove_cb(void *ctx, int status)
{
	struct raid_base_bdev_info *base_info = ctx;

	if (status != 0) {
		SPDK_WARNLOG("Failed to remove base bdev %s\n", base_info->name);
		base_info->is_failed = false;
	}
}

static void
_raid_bdev_fail_base_bdev(void *ctx)
{
	struct raid_base_bdev_info *base_info = ctx;
	int rc;

	if (base_info->is_failed) {
		return;
	}
	base_info->is_failed = true;

	SPDK_NOTICELOG("Failing base bdev in slot %d ('%s') of raid bdev '%s'\n",
		       raid_bdev_base_bdev_slot(base_info), base_info->name, base_info->raid_bdev->bdev.name);

	rc = _raid_bdev_remove_base_bdev(base_info, raid_bdev_fail_base_remove_cb, base_info);
	if (rc != 0) {
		raid_bdev_fail_base_remove_cb(base_info, rc);
	}
}

void
raid_bdev_fail_base_bdev(struct raid_base_bdev_info *base_info)
{
	spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_fail_base_bdev, base_info);
}

static void
raid_bdev_resize_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx)
{
+4 −0
Original line number Diff line number Diff line
@@ -103,6 +103,9 @@ struct raid_base_bdev_info {
	/* Set to true if this base bdev is the target of a background process */
	bool			is_process_target;

	/* Set to true to indicate that the base bdev is being removed because of a failure */
	bool			is_failed;

	/* callback for base bdev configuration */
	raid_base_bdev_cb	configure_cb;

@@ -369,6 +372,7 @@ void raid_bdev_io_init(struct raid_bdev_io *raid_io, struct raid_bdev_io_channel
		       enum spdk_bdev_io_type type, uint64_t offset_blocks,
		       uint64_t num_blocks, struct iovec *iovs, int iovcnt, void *md_buf,
		       struct spdk_memory_domain *memory_domain, void *memory_domain_ctx);
void raid_bdev_fail_base_bdev(struct raid_base_bdev_info *base_info);

static inline uint8_t
raid_bdev_base_bdev_slot(struct raid_base_bdev_info *base_info)