Commit cec5ba28 authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Ben Walker
Browse files

nvme/rdma: Register UMR per IO request



If accel sequence is supported, append a copy
task even if there is no accel sequence. NVME RDMA
driver expects that accel framework registers UMR
for the data buffer. This UMR allows to represent
fragmented payload as a virtually contig one.

Signed-off-by: default avatarAlexey Marchuk <alexeymar@nvidia.com>
Change-Id: I410f991959b08eab033105a7dbb4a9aaba491567
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/24709


Reviewed-by: default avatarBen Walker <ben@nvidia.com>
Community-CI: Community CI Samsung <spdk.community.ci.samsung@gmail.com>
Reviewed-by: default avatarJim Harris <jim.harris@nvidia.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent 7219bd1a
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -4097,6 +4097,7 @@ rdma_max_cq_size | Optional | number | Set the maximum size of a
rdma_cm_event_timeout_ms   | Optional | number      | Time to wait for RDMA CM events. Default: 0 (0 means using default value of driver).
dhchap_digests             | Optional | list        | List of allowed DH-HMAC-CHAP digests.
dhchap_dhgroups            | Optional | list        | List of allowed DH-HMAC-CHAP DH groups.
rdma_umr_per_io            | Optional | boolean     | Enable/disable scatter-gather UMR per IO in RDMA transport if supported by system

#### Example

@@ -4125,7 +4126,8 @@ request:
    "dhchap_dhgroups": [
      "ffdhe6144",
      "ffdhe8192"
    ]
    ],
    "rdma_umr_per_io": false
  },
  "jsonrpc": "2.0",
  "method": "bdev_nvme_set_options",
+3 −1
Original line number Diff line number Diff line
/*   SPDX-License-Identifier: BSD-3-Clause
 *   Copyright (C) 2024 Nutanix Inc. All rights reserved.
 *   Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 */

/** \file
@@ -100,8 +101,9 @@ struct spdk_bdev_nvme_opts {
	uint8_t reserved110[2];
	uint32_t dhchap_digests;
	uint32_t dhchap_dhgroups;
	bool rdma_umr_per_io;
};
SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_nvme_opts) == 120, "Incorrect size");
SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_nvme_opts) == 128, "Incorrect size");

/**
 * Connect to the NVMe controller and populate namespaces as bdevs.
+7 −0
Original line number Diff line number Diff line
@@ -4755,6 +4755,13 @@ struct spdk_nvme_transport_opts {
	 * RDMA CM event timeout in milliseconds.
	 */
	uint16_t rdma_cm_event_timeout_ms;

	/**
	 * It is used for RDMA transport.
	 *
	 * Configure UMR per IO request if supported by the system
	 */
	bool rdma_umr_per_io;
};
SPDK_STATIC_ASSERT(sizeof(struct spdk_nvme_transport_opts) == 24, "Incorrect size");

+17 −5
Original line number Diff line number Diff line
@@ -201,6 +201,9 @@ struct nvme_rdma_qpair {
	uint16_t				num_entries;

	bool					delay_cmd_submit;
	/* Append copy task even if no accel sequence is attached to IO.
	 * Result is UMR configured per IO data buffer */
	bool					append_copy;

	uint32_t				num_completions;
	uint32_t				num_outstanding_reqs;
@@ -1776,11 +1779,16 @@ nvme_rdma_accel_completion_cb(void *cb_arg, int status)
	/* nvme_rdma driver may fail data transfer on WC_FLUSH error completion which is expected.
	 * To prevent false errors from accel, first check if qpair is in the process of disconnect */
	if (spdk_unlikely(!spdk_nvme_qpair_is_connected(&rqpair->qpair))) {
		struct spdk_nvmf_fabric_connect_cmd *cmd = (struct spdk_nvmf_fabric_connect_cmd *)
				&rdma_req->req->cmd;

		if (cmd->opcode != SPDK_NVME_OPC_FABRIC && cmd->fctype != SPDK_NVMF_FABRIC_COMMAND_CONNECT) {
			SPDK_DEBUGLOG(nvme, "qpair %p, req %p accel cpl in disconnecting, outstanding %u\n",
				      rqpair, rdma_req, rqpair->qpair.num_outstanding_reqs);
			sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
			goto fail_req;
		}
	}
	if (spdk_unlikely(status)) {
		SPDK_ERRLOG("qpair %p, req %p, accel sequence status %d\n", rdma_req->req->qpair, rdma_req, status);
		sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
@@ -2004,6 +2012,10 @@ nvme_rdma_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,
	rqpair->num_entries = qsize - 1;
	rqpair->delay_cmd_submit = delay_cmd_submit;
	rqpair->state = NVME_RDMA_QPAIR_STATE_INVALID;
	rqpair->append_copy = g_spdk_nvme_transport_opts.rdma_umr_per_io &&
			      spdk_rdma_provider_accel_sequence_supported() && qid != 0;
	SPDK_DEBUGLOG(nvme, "rqpair %p, append_copy %s\n", rqpair,
		      rqpair->append_copy ? "enabled" : "diabled");
	qpair = &rqpair->qpair;
	rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async);
	if (rc != 0) {
@@ -2535,7 +2547,7 @@ nvme_rdma_qpair_submit_request(struct spdk_nvme_qpair *qpair,
	assert(rdma_req->req == NULL);
	rdma_req->req = req;
	req->cmd.cid = rdma_req->id;
	if (req->accel_sequence) {
	if (req->accel_sequence || rqpair->append_copy) {
		assert(spdk_rdma_provider_accel_sequence_supported());
		assert(rqpair->qpair.poll_group->group);
		assert(rqpair->qpair.poll_group->group->accel_fn_table.append_copy);
+4 −1
Original line number Diff line number Diff line
@@ -28,7 +28,8 @@ static int g_current_transport_index = 0;
struct spdk_nvme_transport_opts g_spdk_nvme_transport_opts = {
	.rdma_srq_size = 0,
	.rdma_max_cq_size = 0,
	.rdma_cm_event_timeout_ms = 1000
	.rdma_cm_event_timeout_ms = 1000,
	.rdma_umr_per_io = false,
};

const struct spdk_nvme_transport *
@@ -899,6 +900,7 @@ spdk_nvme_transport_get_opts(struct spdk_nvme_transport_opts *opts, size_t opts_
	SET_FIELD(rdma_srq_size);
	SET_FIELD(rdma_max_cq_size);
	SET_FIELD(rdma_cm_event_timeout_ms);
	SET_FIELD(rdma_umr_per_io);

	/* Do not remove this statement, you should always update this statement when you adding a new field,
	 * and do not forget to add the SET_FIELD statement for your added field. */
@@ -928,6 +930,7 @@ spdk_nvme_transport_set_opts(const struct spdk_nvme_transport_opts *opts, size_t
	SET_FIELD(rdma_srq_size);
	SET_FIELD(rdma_max_cq_size);
	SET_FIELD(rdma_cm_event_timeout_ms);
	SET_FIELD(rdma_umr_per_io);

	g_spdk_nvme_transport_opts.opts_size = opts->opts_size;

Loading