Commit 0fba8dc8 authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

bdev/nvme: I/O error resiliency can be configured by global options



Add three options for I/O error resiliency to spdk_nvme_bdev_opts.
Then the RPC bdev_nvme_set_options can configure these.
These can be overridden if these are given by the RPC bdev_nvme_attach_controller.

Change-Id: If3ee23aeef8b7585fe0fb5ec4695df5866fc1e74
Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11830


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 6fb6716d
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -31,6 +31,10 @@ A new flag `ACCEL_FLAG_PERSISTENT` was added to indicate the target memory is PM
Added `bdev_nvme_add_error_injection` and `bdev_nvme_remove_error_injection` RPCs to add and
remove NVMe error injections.

New parameters, `ctrlr_loss_timeout_sec`, `reconnect_delay_sec`, and `fast_io_fail_timeout_sec`, are
added to the RPC `bdev_nvme_set_options`. They can be overridden if they are given by the RPC
`bdev_nvme_attach_controller`.

### event

Added `msg_mempool_size` parameter to `spdk_reactors_init` and `spdk_thread_lib_init_ext`.
+6 −0
Original line number Diff line number Diff line
@@ -2888,6 +2888,9 @@ Example response:
Set global parameters for all bdev NVMe. This RPC may only be called before SPDK subsystems have been initialized
or any bdev NVMe has been created.

Parameters, ctrlr_loss_timeout_sec, reconnect_delay_sec, and fast_io_fail_timeout_sec, are for I/O error resiliency.
They can be overridden if they are given by the RPC bdev_nvme_attach_controller.

#### Parameters

Name                       | Optional | Type        | Description
@@ -2908,6 +2911,9 @@ delay_cmd_submit | Optional | boolean | Enable delaying NVMe comma
transport_retry_count      | Optional | number      | The number of attempts per I/O in the transport layer before an I/O fails.
bdev_retry_count           | Optional | number      | The number of attempts per I/O in the bdev layer before an I/O fails. -1 means infinite retries.
transport_ack_timeout      | Optional | number      | Time to wait ack until packet retransmission. RDMA specific. Range 0-31 where 0 is driver-specific default value.
ctrlr_loss_timeout_sec     | Optional | number      | Time to wait until ctrlr is reconnected before deleting ctrlr.  -1 means infinite reconnects. 0 means no reconnect.
reconnect_delay_sec        | Optional | number      | Time to delay a reconnect trial. 0 means no reconnect.
fast_io_fail_timeout_sec   | Optional | number      | Time to wait until ctrlr is reconnected before failing I/O to ctrlr. 0 means no such timeout.

#### Example

+27 −0
Original line number Diff line number Diff line
@@ -139,6 +139,9 @@ static struct spdk_bdev_nvme_opts g_opts = {
	.delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT,
	.bdev_retry_count = 3,
	.transport_ack_timeout = 0,
	.ctrlr_loss_timeout_sec = 0,
	.reconnect_delay_sec = 0,
	.fast_io_fail_timeout_sec = 0,
};

#define NVME_HOTPLUG_POLL_PERIOD_MAX			10000000ULL
@@ -3474,6 +3477,15 @@ err:
	return rc;
}

void
bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts)
{
	opts->prchk_flags = 0;
	opts->ctrlr_loss_timeout_sec = g_opts.ctrlr_loss_timeout_sec;
	opts->reconnect_delay_sec = g_opts.reconnect_delay_sec;
	opts->fast_io_fail_timeout_sec = g_opts.fast_io_fail_timeout_sec;
}

static void
attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
	  struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *drv_opts)
@@ -3587,6 +3599,10 @@ bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts)
	*opts = g_opts;
}

static bool bdev_nvme_check_multipath_params(int32_t ctrlr_loss_timeout_sec,
		uint32_t reconnect_delay_sec,
		uint32_t fast_io_fail_timeout_sec);

static int
bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
{
@@ -3601,6 +3617,12 @@ bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts)
		return -EINVAL;
	}

	if (!bdev_nvme_check_multipath_params(opts->ctrlr_loss_timeout_sec,
					      opts->reconnect_delay_sec,
					      opts->fast_io_fail_timeout_sec)) {
		return -EINVAL;
	}

	return 0;
}

@@ -3979,6 +4001,8 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid,

	if (bdev_opts) {
		memcpy(&ctx->bdev_opts, bdev_opts, sizeof(*bdev_opts));
	} else {
		bdev_nvme_get_default_ctrlr_opts(&ctx->bdev_opts);
	}

	if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
@@ -5678,6 +5702,9 @@ bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w)
	spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit);
	spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count);
	spdk_json_write_named_uint8(w, "transport_ack_timeout", g_opts.transport_ack_timeout);
	spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", g_opts.ctrlr_loss_timeout_sec);
	spdk_json_write_named_uint32(w, "reconnect_delay_sec", g_opts.reconnect_delay_sec);
	spdk_json_write_named_uint32(w, "fast_io_fail_timeout_sec", g_opts.fast_io_fail_timeout_sec);
	spdk_json_write_object_end(w);

	spdk_json_write_object_end(w);
+5 −0
Original line number Diff line number Diff line
@@ -251,6 +251,9 @@ struct spdk_bdev_nvme_opts {
	/* The number of attempts per I/O in the bdev layer before an I/O fails. */
	int32_t bdev_retry_count;
	uint8_t transport_ack_timeout;
	int32_t ctrlr_loss_timeout_sec;
	uint32_t reconnect_delay_sec;
	uint32_t fast_io_fail_timeout_sec;
};

struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch);
@@ -258,6 +261,8 @@ void bdev_nvme_get_opts(struct spdk_bdev_nvme_opts *opts);
int bdev_nvme_set_opts(const struct spdk_bdev_nvme_opts *opts);
int bdev_nvme_set_hotplug(bool enabled, uint64_t period_us, spdk_msg_fn cb, void *cb_ctx);

void bdev_nvme_get_default_ctrlr_opts(struct nvme_ctrlr_opts *opts);

int bdev_nvme_create(struct spdk_nvme_transport_id *trid,
		     const char *base_name,
		     const char **names,
+4 −0
Original line number Diff line number Diff line
@@ -92,6 +92,9 @@ static const struct spdk_json_object_decoder rpc_bdev_nvme_options_decoders[] =
	{"transport_retry_count", offsetof(struct spdk_bdev_nvme_opts, transport_retry_count), spdk_json_decode_uint32, true},
	{"bdev_retry_count", offsetof(struct spdk_bdev_nvme_opts, bdev_retry_count), spdk_json_decode_int32, true},
	{"transport_ack_timeout", offsetof(struct spdk_bdev_nvme_opts, transport_ack_timeout), spdk_json_decode_uint8, true},
	{"ctrlr_loss_timeout_sec", offsetof(struct spdk_bdev_nvme_opts, ctrlr_loss_timeout_sec), spdk_json_decode_int32, true},
	{"reconnect_delay_sec", offsetof(struct spdk_bdev_nvme_opts, reconnect_delay_sec), spdk_json_decode_uint32, true},
	{"fast_io_fail_timeout_sec", offsetof(struct spdk_bdev_nvme_opts, fast_io_fail_timeout_sec), spdk_json_decode_uint32, true},
};

static void
@@ -324,6 +327,7 @@ rpc_bdev_nvme_attach_controller(struct spdk_jsonrpc_request *request,
	}

	spdk_nvme_ctrlr_get_default_ctrlr_opts(&ctx->req.drv_opts, sizeof(ctx->req.drv_opts));
	bdev_nvme_get_default_ctrlr_opts(&ctx->req.bdev_opts);

	if (spdk_json_decode_object(params, rpc_bdev_nvme_attach_controller_decoders,
				    SPDK_COUNTOF(rpc_bdev_nvme_attach_controller_decoders),
Loading