Commit c164db9f authored by Jinlong Chen's avatar Jinlong Chen Committed by Konrad Sztyber
Browse files

vbdev_error: use per-channel pending_ios list



Putting all pending I/Os on the same list is wrong. It leads to
concurrent accessing to the list and results in corrupted data
structure.

Move the pending_ios list to io channel to fix it.

Change-Id: I5854b6242e1a954257a64f45eb64809da33664a7
Signed-off-by: default avatarJinlong Chen <chenjinlong.cjl@alibaba-inc.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/25373


Reviewed-by: default avatarChangpeng Liu <changpeliu@tencent.com>
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Community-CI: Community CI Samsung <spdk.community.ci.samsung@gmail.com>
Community-CI: Mellanox Build Bot
parent 3428322b
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -138,7 +138,6 @@ DEPDIRS-bdev_ftl := $(BDEV_DEPS) ftl
endif
DEPDIRS-bdev_gpt := bdev json log thread util

DEPDIRS-bdev_error := $(BDEV_DEPS)
DEPDIRS-bdev_lvol := $(BDEV_DEPS) lvol blob blob_bdev
DEPDIRS-bdev_rpc := $(BDEV_DEPS)
DEPDIRS-bdev_split := $(BDEV_DEPS)
@@ -147,6 +146,7 @@ DEPDIRS-bdev_aio := $(BDEV_DEPS_THREAD)
DEPDIRS-bdev_compress := $(BDEV_DEPS_THREAD) reduce accel
DEPDIRS-bdev_crypto := $(BDEV_DEPS_THREAD) accel
DEPDIRS-bdev_delay := $(BDEV_DEPS_THREAD)
DEPDIRS-bdev_error := $(BDEV_DEPS_THREAD)
DEPDIRS-bdev_iscsi := $(BDEV_DEPS_THREAD)
DEPDIRS-bdev_malloc := $(BDEV_DEPS_THREAD) accel dma
DEPDIRS-bdev_null := $(BDEV_DEPS_THREAD)
+49 −11
Original line number Diff line number Diff line
@@ -45,12 +45,12 @@ struct error_io {
struct error_disk {
	struct spdk_bdev_part		part;
	struct vbdev_error_info		error_vector[SPDK_BDEV_IO_TYPE_RESET];
	TAILQ_HEAD(, error_io)	pending_ios;
};

struct error_channel {
	struct spdk_bdev_part_channel	part_ch;
	uint64_t			io_inflight;
	TAILQ_HEAD(, error_io)		pending_ios;
};

static pthread_mutex_t g_vbdev_error_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -157,15 +157,38 @@ exit:
}

static void
vbdev_error_reset(struct error_disk *error_disk, struct spdk_bdev_io *bdev_io)
vbdev_error_ch_abort_ios(struct spdk_io_channel_iter *i)
{
	struct error_channel *ch = spdk_io_channel_get_ctx(spdk_io_channel_iter_get_channel(i));
	struct error_io *error_io, *tmp;

	TAILQ_FOREACH_SAFE(error_io, &ch->pending_ios, link, tmp) {
		TAILQ_REMOVE(&ch->pending_ios, error_io, link);
		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(error_io), SPDK_BDEV_IO_STATUS_ABORTED);
	}

	spdk_for_each_channel_continue(i, 0);
}

static void
vbdev_error_ch_abort_ios_done(struct spdk_io_channel_iter *i, int status)
{
	struct error_io *pending_io, *tmp;
	struct spdk_bdev_io *reset_io = spdk_io_channel_iter_get_ctx(i);

	TAILQ_FOREACH_SAFE(pending_io, &error_disk->pending_ios, link, tmp) {
		TAILQ_REMOVE(&error_disk->pending_ios, pending_io, link);
		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(pending_io), SPDK_BDEV_IO_STATUS_FAILED);
	if (status != 0) {
		SPDK_ERRLOG("Failed to abort pending I/Os on bdev %s, status = %d\n",
			    reset_io->bdev->name, status);
		spdk_bdev_io_complete(reset_io, SPDK_BDEV_IO_STATUS_FAILED);
	} else {
		spdk_bdev_io_complete(reset_io, SPDK_BDEV_IO_STATUS_SUCCESS);
	}
	spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
}

static void
vbdev_error_reset(struct error_disk *error_disk, struct spdk_bdev_io *bdev_io)
{
	spdk_for_each_channel(&error_disk->part, vbdev_error_ch_abort_ios, bdev_io,
			      vbdev_error_ch_abort_ios_done);
}

static uint32_t
@@ -271,7 +294,7 @@ vbdev_error_submit_request(struct spdk_io_channel *_ch, struct spdk_bdev_io *bde
		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
		break;
	case VBDEV_IO_PENDING:
		TAILQ_INSERT_TAIL(&error_disk->pending_ios, error_io, link);
		TAILQ_INSERT_TAIL(&ch->pending_ios, error_io, link);
		break;
	case VBDEV_IO_CORRUPT_DATA:
		if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
@@ -348,6 +371,22 @@ vbdev_error_base_bdev_hotremove_cb(void *_part_base)
	spdk_bdev_part_base_hotremove(part_base, &g_error_disks);
}

static int
vbdev_error_ch_create_cb(void *io_device, void *ctx_buf)
{
	struct error_channel *ch = ctx_buf;

	ch->io_inflight = 0;
	TAILQ_INIT(&ch->pending_ios);

	return 0;
}

static void
vbdev_error_ch_destroy_cb(void *io_device, void *ctx_buf)
{
}

static int
_vbdev_error_create(const char *base_bdev_name, const struct spdk_uuid *uuid)
{
@@ -361,7 +400,8 @@ _vbdev_error_create(const char *base_bdev_name, const struct spdk_uuid *uuid)
					       vbdev_error_base_bdev_hotremove_cb,
					       &error_if, &vbdev_error_fn_table, &g_error_disks,
					       NULL, NULL, sizeof(struct error_channel),
					       NULL, NULL, &base);
					       vbdev_error_ch_create_cb, vbdev_error_ch_destroy_cb,
					       &base);
	if (rc != 0) {
		if (rc != -ENODEV) {
			SPDK_ERRLOG("could not construct part base for bdev %s\n", base_bdev_name);
@@ -402,8 +442,6 @@ _vbdev_error_create(const char *base_bdev_name, const struct spdk_uuid *uuid)
		return rc;
	}

	TAILQ_INIT(&disk->pending_ios);

	return 0;
}