Commit 62700dac authored by Seth Howell's avatar Seth Howell Committed by Jim Harris
Browse files

nvmf/rdma: Add support for multiple sgl descriptors to sgl parser



Enable parsing an nvmf request that contains an inline
nvme_sgl_last_segment_descriptor element. This is the next step
towards NVMe-oF SGL support in the NVMe-oF target.

Change-Id: Ia2f1f7054e0de8a9e2bfe4dabe6af4085e3f12c4
Signed-off-by: default avatarSeth Howell <seth.howell@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/428745


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 1e6cdc19
Loading
Loading
Loading
Loading
+175 −3
Original line number Diff line number Diff line
@@ -584,6 +584,46 @@ spdk_nvmf_rdma_set_ibv_state(struct spdk_nvmf_rdma_qpair *rqpair,
	return 0;
}

static void
nvmf_rdma_request_free_data(struct spdk_nvmf_rdma_request *rdma_req,
			    struct spdk_nvmf_rdma_transport *rtransport)
{
	struct spdk_nvmf_rdma_request_data	*current_data_wr = NULL, *next_data_wr = NULL;
	struct ibv_send_wr			*send_wr;
	int					i;

	rdma_req->num_outstanding_data_wr = 0;
	current_data_wr = &rdma_req->data;
	for (i = 0; i < current_data_wr->wr.num_sge; i++) {
		current_data_wr->wr.sg_list[i].addr = 0;
		current_data_wr->wr.sg_list[i].length = 0;
		current_data_wr->wr.sg_list[i].lkey = 0;
	}
	current_data_wr->wr.num_sge = 0;

	send_wr = current_data_wr->wr.next;
	if (send_wr != NULL && send_wr != &rdma_req->rsp.wr) {
		next_data_wr = SPDK_CONTAINEROF(send_wr, struct spdk_nvmf_rdma_request_data, wr);
	}
	while (next_data_wr) {
		current_data_wr = next_data_wr;
		send_wr = current_data_wr->wr.next;
		if (send_wr != NULL && send_wr != &rdma_req->rsp.wr) {
			next_data_wr = SPDK_CONTAINEROF(send_wr, struct spdk_nvmf_rdma_request_data, wr);
		} else {
			next_data_wr = NULL;
		}

		for (i = 0; i < current_data_wr->wr.num_sge; i++) {
			current_data_wr->wr.sg_list[i].addr = 0;
			current_data_wr->wr.sg_list[i].length = 0;
			current_data_wr->wr.sg_list[i].lkey = 0;
		}
		current_data_wr->wr.num_sge = 0;
		spdk_mempool_put(rtransport->data_wr_pool, current_data_wr);
	}
}

static void
nvmf_rdma_dump_request(struct spdk_nvmf_rdma_request *req)
{
@@ -1032,8 +1072,8 @@ request_transfer_out(struct spdk_nvmf_request *req, int *data_posted)
	assert(rqpair->current_recv_depth > 0);
	rqpair->current_recv_depth--;

	/* Build the response which consists of an optional
	 * RDMA WRITE to transfer data, plus an RDMA SEND
	/* Build the response which consists of optional
	 * RDMA WRITEs to transfer data, plus an RDMA SEND
	 * containing the response.
	 */
	send_wr = &rdma_req->rsp.wr;
@@ -1326,6 +1366,46 @@ spdk_nvmf_rdma_request_get_xfer(struct spdk_nvmf_rdma_request *rdma_req)
	return xfer;
}

static int
nvmf_request_alloc_wrs(struct spdk_nvmf_rdma_transport *rtransport,
		       struct spdk_nvmf_rdma_request *rdma_req,
		       uint32_t num_sgl_descriptors)
{
	struct spdk_nvmf_rdma_request_data	*work_requests[SPDK_NVMF_MAX_SGL_ENTRIES];
	struct spdk_nvmf_rdma_request_data	*current_data_wr;
	uint32_t				i;

	if (spdk_mempool_get_bulk(rtransport->data_wr_pool, (void **)work_requests, num_sgl_descriptors)) {
		return -ENOMEM;
	}

	current_data_wr = &rdma_req->data;

	for (i = 0; i < num_sgl_descriptors; i++) {
		if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
			current_data_wr->wr.opcode = IBV_WR_RDMA_WRITE;
		} else if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
			current_data_wr->wr.opcode = IBV_WR_RDMA_READ;
		} else {
			assert(false);
		}
		work_requests[i]->wr.send_flags = IBV_SEND_SIGNALED;
		work_requests[i]->wr.sg_list = work_requests[i]->sgl;
		work_requests[i]->wr.wr_id = rdma_req->data.wr.wr_id;
		current_data_wr->wr.next = &work_requests[i]->wr;
		current_data_wr = work_requests[i];
	}

	if (rdma_req->req.xfer == SPDK_NVME_DATA_CONTROLLER_TO_HOST) {
		current_data_wr->wr.opcode = IBV_WR_RDMA_WRITE;
		current_data_wr->wr.next = &rdma_req->rsp.wr;
	} else if (rdma_req->req.xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
		current_data_wr->wr.opcode = IBV_WR_RDMA_READ;
		current_data_wr->wr.next = NULL;
	}
	return 0;
}

static int
nvmf_rdma_fill_buffers(struct spdk_nvmf_rdma_transport *rtransport,
		       struct spdk_nvmf_rdma_poll_group *rgroup,
@@ -1423,6 +1503,83 @@ err_exit:
	return rc;
}

static int
nvmf_rdma_request_fill_iovs_multi_sgl(struct spdk_nvmf_rdma_transport *rtransport,
				      struct spdk_nvmf_rdma_device *device,
				      struct spdk_nvmf_rdma_request *rdma_req)
{
	struct spdk_nvmf_rdma_qpair		*rqpair;
	struct spdk_nvmf_rdma_poll_group	*rgroup;
	struct ibv_send_wr			*current_wr;
	struct spdk_nvmf_request		*req = &rdma_req->req;
	struct spdk_nvme_sgl_descriptor		*inline_segment, *desc;
	uint32_t				num_sgl_descriptors;
	uint32_t				i;
	int					rc;

	rqpair = SPDK_CONTAINEROF(rdma_req->req.qpair, struct spdk_nvmf_rdma_qpair, qpair);
	rgroup = rqpair->poller->group;

	inline_segment = &req->cmd->nvme_cmd.dptr.sgl1;
	assert(inline_segment->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT);
	assert(inline_segment->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET);

	desc = (struct spdk_nvme_sgl_descriptor *)rdma_req->recv->buf + inline_segment->address;
	num_sgl_descriptors = inline_segment->unkeyed.length / sizeof(struct spdk_nvme_sgl_descriptor);
	assert(num_sgl_descriptors <= SPDK_NVMF_MAX_SGL_ENTRIES);

	if (nvmf_request_alloc_wrs(rtransport, rdma_req, num_sgl_descriptors - 1) != 0) {
		return -ENOMEM;
	}

	/* The first WR must always be the embedded data WR. This is how we unwind them later. */
	current_wr = &rdma_req->data.wr;

	req->iovcnt = 0;
	for (i = 0; i < num_sgl_descriptors; i++) {
		/* The descriptors must be keyed data block descriptors with an address, not an offset. */
		if (spdk_unlikely(desc->generic.type != SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK ||
				  desc->keyed.subtype != SPDK_NVME_SGL_SUBTYPE_ADDRESS)) {
			rc = -EINVAL;
			goto err_exit;
		}

		current_wr->num_sge = 0;
		req->length += desc->keyed.length;

		rc = nvmf_rdma_fill_buffers(rtransport, rgroup, device, rdma_req, current_wr,
					    desc->keyed.length);
		if (rc != 0) {
			rc = -ENOMEM;
			goto err_exit;
		}

		current_wr->wr.rdma.rkey = desc->keyed.key;
		current_wr->wr.rdma.remote_addr = desc->address;
		current_wr = current_wr->next;
		desc++;
	}

#ifdef SPDK_CONFIG_RDMA_SEND_WITH_INVAL
	if ((device->attr.device_cap_flags & IBV_DEVICE_MEM_MGT_EXTENSIONS) != 0) {
		if (desc->keyed.subtype == SPDK_NVME_SGL_SUBTYPE_INVALIDATE_KEY) {
			rdma_req->rsp.wr.opcode = IBV_WR_SEND_WITH_INV;
			rdma_req->rsp.wr.imm_data = desc->keyed.key;
		}
	}
#endif

	rdma_req->num_outstanding_data_wr = num_sgl_descriptors;
	rdma_req->data_from_pool = true;

	return 0;

err_exit:
	spdk_nvmf_rdma_request_free_buffers(rdma_req, &rgroup->group, &rtransport->transport);
	nvmf_rdma_request_free_data(rdma_req, rtransport);
	return rc;
}

static int
spdk_nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
				 struct spdk_nvmf_rdma_device *device,
@@ -1517,6 +1674,20 @@ spdk_nvmf_rdma_request_parse_sgl(struct spdk_nvmf_rdma_transport *rtransport,
		rdma_req->req.iov[0].iov_len = rdma_req->req.length;
		rdma_req->req.iovcnt = 1;

		return 0;
	} else if (sgl->generic.type == SPDK_NVME_SGL_TYPE_LAST_SEGMENT &&
		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
		if (nvmf_rdma_request_fill_iovs_multi_sgl(rtransport, device, rdma_req) < 0) {
			SPDK_DEBUGLOG(SPDK_LOG_RDMA, "No available large data buffers. Queueing request %p\n", rdma_req);
			return 0;
		}

		/* backward compatible */
		rdma_req->req.data = rdma_req->req.iov[0].iov_base;

		SPDK_DEBUGLOG(SPDK_LOG_RDMA, "Request %p took %d buffer/s from central pool\n", rdma_req,
			      rdma_req->req.iovcnt);

		return 0;
	}

@@ -1539,10 +1710,11 @@ nvmf_rdma_request_free(struct spdk_nvmf_rdma_request *rdma_req,

		spdk_nvmf_rdma_request_free_buffers(rdma_req, &rgroup->group, &rtransport->transport);
	}
	rdma_req->num_outstanding_data_wr = 0;
	nvmf_rdma_request_free_data(rdma_req, rtransport);
	rdma_req->req.length = 0;
	rdma_req->req.iovcnt = 0;
	rdma_req->req.data = NULL;
	rdma_req->data.wr.next = NULL;
	rqpair->qd--;

	STAILQ_INSERT_HEAD(&rqpair->resources->free_queue, rdma_req, state_link);
+2 −0
Original line number Diff line number Diff line
@@ -139,6 +139,8 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)
	rdma_req.req.xfer = SPDK_NVME_DATA_CONTROLLER_TO_HOST;

	rtransport.transport.opts = g_rdma_ut_transport_opts;
	rtransport.data_wr_pool = NULL;
	rtransport.transport.data_buf_pool = NULL;

	device.attr.device_cap_flags = 0;
	g_rdma_mr.lkey = 0xABCD;