Commit 43adb646 authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

bdev/nvme: Retry failed I/O up to retry_count times



Add bdev_retry_count to spdk_bdev_nvme_opts and retry_count to
nvme_bdev_io, respectively.

Set type of both to int because we want use -1 for infinite retry.

Set the default value of bdev_retry_count to zero for the backward
compatibility.

bdev_retry_count is configurable by the RPC bdev_nvme_set_options.

Signed-off-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Change-Id: I9bc746fcea54aa8722c76f79c70c2ae2b375aa53
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9864


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
parent 4495bda4
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -5,7 +5,9 @@
### bdev

The parameter `retry_count` of the RPC `bdev_nvme_set_options` was deprecated and will be
removed in SPDK 22.01, and the parameter `transport_retry_count` is added and used instead.
removed in SPDK 22.04, and the parameter `transport_retry_count` is added and used instead.

An new parameter `bdev_retry_count` is added to the RPC `bdev_nvme_set_options`.

## v21.10

+1 −0
Original line number Diff line number Diff line
@@ -2787,6 +2787,7 @@ nvme_ioq_poll_period_us | Optional | number | How often I/O queues are p
io_queue_requests          | Optional | number      | The number of requests allocated for each NVMe I/O queue. Default: 512.
delay_cmd_submit           | Optional | boolean     | Enable delaying NVMe command submission to allow batching of multiple commands. Default: `true`.
transport_retry_count      | Optional | number      | The number of attempts per I/O in the transport layer before an I/O fails.
bdev_retry_count           | Optional | number      | The number of attempts per I/O in the bdev layer before an I/O fails. -1 means infinite retries.

#### Example

+35 −4
Original line number Diff line number Diff line
@@ -107,6 +107,9 @@ struct nvme_bdev_io {

	/** Expiration value in ticks to retry the current I/O. */
	uint64_t retry_ticks;

	/* How many times the current I/O was retried. */
	int32_t retry_count;
};

struct nvme_probe_ctx {
@@ -140,6 +143,7 @@ static struct spdk_bdev_nvme_opts g_opts = {
	.nvme_ioq_poll_period_us = 0,
	.io_queue_requests = 0,
	.delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
	.bdev_retry_count = 0,
};

#define NVME_HOTPLUG_POLL_PERIOD_MAX			10000000ULL
@@ -833,12 +837,16 @@ bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
{
	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
	struct nvme_bdev_channel *nbdev_ch;
	struct nvme_ctrlr *nvme_ctrlr;
	const struct spdk_nvme_ctrlr_data *cdata;
	uint64_t delay_ms;

	if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) {
		goto complete;
	}

	if (cpl->status.dnr != 0 || bdev_nvme_io_type_is_admin(bdev_io->type)) {
	if (cpl->status.dnr != 0 || bdev_nvme_io_type_is_admin(bdev_io->type) ||
	    (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) {
		goto complete;
	}

@@ -850,13 +858,29 @@ bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
	    spdk_nvme_cpl_is_aborted_sq_deletion(cpl) ||
	    !nvme_io_path_is_available(bio->io_path) ||
	    nvme_io_path_is_failed(bio->io_path)) {
		delay_ms = 0;
	} else if (spdk_nvme_cpl_is_aborted_by_request(cpl)) {
		goto complete;
	} else {
		bio->retry_count++;

		nvme_ctrlr = nvme_ctrlr_channel_get_ctrlr(bio->io_path->ctrlr_ch);
		cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr);

		if (cpl->status.crd != 0) {
			delay_ms = cdata->crdt[cpl->status.crd] * 100;
		} else {
			delay_ms = 0;
		}
	}

	if (any_io_path_may_become_available(nbdev_ch)) {
			bdev_nvme_queue_retry_io(nbdev_ch, bio, 0);
		bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms);
		return;
	}
	}

complete:
	bio->retry_count = 0;
	spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc);
}

@@ -889,6 +913,7 @@ bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
		break;
	}

	bio->retry_count = 0;
	spdk_bdev_io_complete(bdev_io, io_status);
}

@@ -3191,6 +3216,11 @@ bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
		return -EINVAL;
	}

	if (opts->bdev_retry_count < -1) {
		SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n");
		return -EINVAL;
	}

	return 0;
}

@@ -4662,6 +4692,7 @@ bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
	spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us);
	spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests);
	spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
	spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
	spdk_json_write_object_end(w);

	spdk_json_write_object_end(w);
+2 −0
Original line number Diff line number Diff line
@@ -223,6 +223,8 @@ struct spdk_bdev_nvme_opts {
	uint64_t nvme_ioq_poll_period_us;
	uint32_t io_queue_requests;
	bool delay_cmd_submit;
	/* The number of attempts per I/O in the bdev layer before an I/O fails. */
	int32_t bdev_retry_count;
};

struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch);
+1 −0
Original line number Diff line number Diff line
@@ -89,6 +89,7 @@ static const struct spdk_json_object_decoder rpc_bdev_nvme_options_decoders[] =
	{"io_queue_requests", offsetof(struct spdk_bdev_nvme_opts, io_queue_requests), spdk_json_decode_uint32, true},
	{"delay_cmd_submit", offsetof(struct spdk_bdev_nvme_opts, delay_cmd_submit), spdk_json_decode_bool, true},
	{"transport_retry_count", offsetof(struct spdk_bdev_nvme_opts, transport_retry_count), spdk_json_decode_uint32, true},
	{"bdev_retry_count", offsetof(struct spdk_bdev_nvme_opts, bdev_retry_count), spdk_json_decode_int32, true},
};

static void
Loading