Commit 935cdbe4 authored by Seth Howell's avatar Seth Howell Committed by Jim Harris
Browse files

lib/nvme: Enable multi SGE support in the NVMe-oF host.



Change-Id: Icbbea0d586ae086314085e682fc13aa63fa3c167
Signed-off-by: default avatarSeth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/427543


Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 16f9d58a
Loading
Loading
Loading
Loading
+81 −36
Original line number Diff line number Diff line
@@ -58,11 +58,19 @@
#define NVME_RDMA_RW_BUFFER_SIZE 131072

/*
NVME RDMA qpair Resource Defaults
 * NVME RDMA qpair Resource Defaults
 */
#define NVME_RDMA_DEFAULT_TX_SGE		2
#define NVME_RDMA_DEFAULT_RX_SGE		1


/* Max number of NVMe-oF SGL descriptors supported by the host */
#define NVME_RDMA_MAX_SGL_DESCRIPTORS		16
struct spdk_nvmf_cmd {
	struct spdk_nvme_cmd cmd;
	struct spdk_nvme_sgl_descriptor sgl[NVME_RDMA_MAX_SGL_DESCRIPTORS];
};

/* Mapping from virtual address to ibv_mr pointer for a protection domain */
struct spdk_nvme_rdma_mr_map {
	struct ibv_pd				*pd;
@@ -103,7 +111,7 @@ struct nvme_rdma_qpair {
	 * Array of num_entries NVMe commands registered as RDMA message buffers.
	 * Indexed by rdma_req->id.
	 */
	struct spdk_nvme_cmd			*cmds;
	struct spdk_nvmf_cmd			*cmds;

	/* Memory region describing all cmds for this qpair */
	struct ibv_mr				*cmd_mr;
@@ -409,12 +417,11 @@ nvme_rdma_alloc_reqs(struct nvme_rdma_qpair *rqpair)
		struct spdk_nvme_cmd		*cmd;

		rdma_req = &rqpair->rdma_reqs[i];
		cmd = &rqpair->cmds[i];
		cmd = &rqpair->cmds[i].cmd;

		rdma_req->id = i;

		rdma_req->send_sgl[0].addr = (uint64_t)cmd;
		rdma_req->send_sgl[0].length = sizeof(*cmd);
		rdma_req->send_sgl[0].lkey = rqpair->cmd_mr->lkey;

		rdma_req->send_wr.wr_id = (uint64_t)rdma_req;
@@ -817,12 +824,14 @@ nvme_rdma_qpair_connect(struct nvme_rdma_qpair *rqpair)
 * Build SGL describing empty payload.
 */
static int
nvme_rdma_build_null_request(struct nvme_request *req)
nvme_rdma_build_null_request(struct spdk_nvme_rdma_req *rdma_req)
{
	struct nvme_request *req = rdma_req->req;
	struct spdk_nvme_sgl_descriptor *nvme_sgl;

	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;

	rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
	nvme_sgl = &req->cmd.dptr.sgl1;
	nvme_sgl->keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
	nvme_sgl->keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
@@ -863,6 +872,7 @@ nvme_rdma_build_contig_inline_request(struct nvme_rdma_qpair *rqpair,
	sge_inline->lkey = mr->lkey;

	rdma_req->send_wr.num_sge = 2;
	rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
	req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
	req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
@@ -897,6 +907,7 @@ nvme_rdma_build_contig_request(struct nvme_rdma_qpair *rqpair,
	}

	rdma_req->send_wr.num_sge = 1;
	rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
	req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
	req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
@@ -915,11 +926,12 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
			    struct spdk_nvme_rdma_req *rdma_req)
{
	struct nvme_request *req = rdma_req->req;
	struct ibv_mr *mr;
	struct spdk_nvmf_cmd *cmd = &rqpair->cmds[rdma_req->id];
	struct ibv_mr *mr = NULL;
	void *virt_addr;
	uint64_t requested_size;
	uint32_t length;
	int rc;
	uint64_t remaining_size;
	uint32_t sge_length, mr_length;
	int rc, max_num_sgl, num_sgl_desc;

	assert(req->payload_size != 0);
	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
@@ -927,30 +939,68 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair,
	assert(req->payload.next_sge_fn != NULL);
	req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);

	/* TODO: for now, we only support a single SGL entry */
	rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &length);
	max_num_sgl = req->qpair->ctrlr->max_sges;

	remaining_size = req->payload_size;
	num_sgl_desc = 0;
	do {
		rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &virt_addr, &sge_length);
		if (rc) {
			return -1;
		}

	if (length < req->payload_size) {
		SPDK_ERRLOG("multi-element SGL currently not supported for RDMA\n");
		sge_length = spdk_min(remaining_size, sge_length);
		mr_length = sge_length;

		mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)virt_addr,
				(uint64_t *)&mr_length);

		if (mr == NULL || mr_length < sge_length) {
			return -1;
		}
	requested_size = req->payload_size;
	mr = (struct ibv_mr *)spdk_mem_map_translate(rqpair->mr_map->map, (uint64_t)virt_addr,
			&requested_size);
	if (mr == NULL || requested_size < req->payload_size) {

		cmd->sgl[num_sgl_desc].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
		cmd->sgl[num_sgl_desc].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
		cmd->sgl[num_sgl_desc].keyed.length = sge_length;
		cmd->sgl[num_sgl_desc].keyed.key = mr->rkey;
		cmd->sgl[num_sgl_desc].address = (uint64_t)virt_addr;

		remaining_size -= sge_length;
		num_sgl_desc++;
	} while (remaining_size > 0 && num_sgl_desc < max_num_sgl);


	/* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */
	if (remaining_size > 0) {
		return -1;
	}

	rdma_req->send_wr.num_sge = 1;
	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
	rdma_req->send_wr.num_sge = 1;

	/*
	 * If only one SGL descriptor is required, it can be embedded directly in the command
	 * as a data block descriptor.
	 */
	if (num_sgl_desc == 1) {
		rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
		req->cmd.dptr.sgl1.keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK;
		req->cmd.dptr.sgl1.keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS;
		req->cmd.dptr.sgl1.keyed.length = req->payload_size;
		req->cmd.dptr.sgl1.keyed.key = mr->rkey;
	req->cmd.dptr.sgl1.address = (uint64_t)virt_addr;
		req->cmd.dptr.sgl1.address = rqpair->cmds[rdma_req->id].sgl[0].address;
	} else {
		/*
		 * Otherwise, The SGL descriptor embedded in the command must point to the list of
		 * SGL descriptors used to describe the operation. In that case it is a last segment descriptor.
		 */
		rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd) + sizeof(struct
					       spdk_nvme_sgl_descriptor) * num_sgl_desc;
		req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_LAST_SEGMENT;
		req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
		req->cmd.dptr.sgl1.unkeyed.length = num_sgl_desc * sizeof(struct spdk_nvme_sgl_descriptor);
		req->cmd.dptr.sgl1.address = (uint64_t)0;
	}

	return 0;
}
@@ -999,6 +1049,7 @@ nvme_rdma_build_sgl_inline_request(struct nvme_rdma_qpair *rqpair,
	sge_inline->lkey = mr->lkey;

	rdma_req->send_wr.num_sge = 2;
	rdma_req->send_sgl[0].length = sizeof(struct spdk_nvme_cmd);
	req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
	req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
	req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
@@ -1027,7 +1078,7 @@ nvme_rdma_req_init(struct nvme_rdma_qpair *rqpair, struct nvme_request *req,
	req->cmd.cid = rdma_req->id;

	if (req->payload_size == 0) {
		rc = nvme_rdma_build_null_request(req);
		rc = nvme_rdma_build_null_request(rdma_req);
	} else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) {
		/*
		 * Check if icdoff is non zero, to avoid interop conflicts with
@@ -1525,13 +1576,7 @@ nvme_rdma_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr)
uint16_t
nvme_rdma_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr)
{
	/*
	 * We do not support >1 SGE in the initiator currently,
	 *  so we can only return 1 here.  Once that support is
	 *  added, this should return ctrlr->cdata.nvmf_specific.msdbd
	 *  instead.
	 */
	return 1;
	return spdk_min(ctrlr->cdata.nvmf_specific.msdbd, NVME_RDMA_MAX_SGL_DESCRIPTORS);
}

void *