Commit 6a77723e authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Tomasz Zawadzki
Browse files

rdma: Use WRs chaining when DIF is enabled



This patch adds the following:
1. Change signature of nvmf_rdma_fill_wr_sge - pass ibv_send_wr ** in
order to update caller's variable, add a pointer to the number of extra WRs
2. Add a check for the number of requested WRs to nvmf_request_alloc_wrs
3. Add a function to update remote address offset

Change-Id: I26f6567211b3ebfdb4981a7499f6df25e32cbb3a
Signed-off-by: default avatarAlexey Marchuk <alexeymar@mellanox.com>
Signed-off-by: default avatarSasha Kotchubievsky <sashakot@mellanox.com>
Signed-off-by: default avatarEvgenii Kochetov <evgeniik@mellanox.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/470475


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
parent 653496d2
Loading
Loading
Loading
Loading
+51 −8
Original line number Diff line number Diff line
@@ -1483,6 +1483,12 @@ nvmf_request_alloc_wrs(struct spdk_nvmf_rdma_transport *rtransport,
	struct spdk_nvmf_rdma_request_data	*current_data_wr;
	uint32_t				i;

	if (num_sgl_descriptors > SPDK_NVMF_MAX_SGL_ENTRIES) {
		SPDK_ERRLOG("Requested too much entries (%u), the limit is %u\n",
			    num_sgl_descriptors, SPDK_NVMF_MAX_SGL_ENTRIES);
		return -EINVAL;
	}

	if (spdk_mempool_get_bulk(rtransport->data_wr_pool, (void **)work_requests, num_sgl_descriptors)) {
		return -ENOMEM;
	}
@@ -1513,6 +1519,25 @@ nvmf_rdma_setup_request(struct spdk_nvmf_rdma_request *rdma_req)
	nvmf_rdma_setup_wr(wr, &rdma_req->rsp.wr, rdma_req->req.xfer);
}

static inline void
nvmf_rdma_update_remote_addr(struct spdk_nvmf_rdma_request *rdma_req, uint32_t num_wrs)
{
	struct ibv_send_wr		*wr = &rdma_req->data.wr;
	struct spdk_nvme_sgl_descriptor	*sgl = &rdma_req->req.cmd->nvme_cmd.dptr.sgl1;
	uint32_t			i;
	int				j;
	uint64_t			remote_addr_offset = 0;

	for (i = 0; i < num_wrs; ++i) {
		wr->wr.rdma.rkey = sgl->keyed.key;
		wr->wr.rdma.remote_addr = sgl->address + remote_addr_offset;
		for (j = 0; j < wr->num_sge; ++j) {
			remote_addr_offset += wr->sg_list[j].length;
		}
		wr = wr->next;
	}
}

/* This function is used in the rare case that we have a buffer split over multiple memory regions. */
static int
nvmf_rdma_replace_buffer(struct spdk_nvmf_rdma_poll_group *rgroup, void **buf)
@@ -1568,10 +1593,12 @@ nvmf_rdma_get_lkey(struct spdk_nvmf_rdma_device *device, struct iovec *iov,

static bool
nvmf_rdma_fill_wr_sge(struct spdk_nvmf_rdma_device *device,
		      struct iovec *iov, struct ibv_send_wr *wr,
		      struct iovec *iov, struct ibv_send_wr **_wr,
		      uint32_t *_remaining_data_block, uint32_t *_offset,
		      uint32_t *_num_extra_wrs,
		      const struct spdk_dif_ctx *dif_ctx)
{
	struct ibv_send_wr *wr = *_wr;
	struct ibv_sge	*sg_ele = &wr->sg_list[wr->num_sge];
	uint32_t	lkey = 0;
	uint32_t	remaining, data_block_size, md_size, sge_len;
@@ -1592,7 +1619,18 @@ nvmf_rdma_fill_wr_sge(struct spdk_nvmf_rdma_device *device,
		data_block_size = dif_ctx->block_size - dif_ctx->md_size;
		md_size = dif_ctx->md_size;

		while (remaining && wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES) {
		while (remaining) {
			if (wr->num_sge >= SPDK_NVMF_MAX_SGL_ENTRIES) {
				if (*_num_extra_wrs > 0 && wr->next) {
					*_wr = wr->next;
					wr = *_wr;
					wr->num_sge = 0;
					sg_ele = &wr->sg_list[wr->num_sge];
					(*_num_extra_wrs)--;
				} else {
					break;
				}
			}
			sg_ele->lkey = lkey;
			sg_ele->addr = (uintptr_t)((char *)iov->iov_base + *_offset);
			sge_len = spdk_min(remaining, *_remaining_data_block);
@@ -1628,7 +1666,8 @@ nvmf_rdma_fill_wr_sgl(struct spdk_nvmf_rdma_poll_group *rgroup,
		      struct spdk_nvmf_rdma_device *device,
		      struct spdk_nvmf_rdma_request *rdma_req,
		      struct ibv_send_wr *wr,
		      uint32_t length)
		      uint32_t length,
		      uint32_t num_extra_wrs)
{
	struct spdk_nvmf_request *req = &rdma_req->req;
	struct spdk_dif_ctx *dif_ctx = NULL;
@@ -1642,9 +1681,9 @@ nvmf_rdma_fill_wr_sgl(struct spdk_nvmf_rdma_poll_group *rgroup,

	wr->num_sge = 0;

	while (length && wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES) {
		while (spdk_unlikely(!nvmf_rdma_fill_wr_sge(device, &req->iov[rdma_req->iovpos], wr,
				     &remaining_data_block, &offset, dif_ctx))) {
	while (length && (num_extra_wrs || wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES)) {
		while (spdk_unlikely(!nvmf_rdma_fill_wr_sge(device, &req->iov[rdma_req->iovpos], &wr,
				     &remaining_data_block, &offset, &num_extra_wrs, dif_ctx))) {
			if (nvmf_rdma_replace_buffer(rgroup, &req->buffers[rdma_req->iovpos]) == -ENOMEM) {
				return -ENOMEM;
			}
@@ -1726,11 +1765,15 @@ spdk_nvmf_rdma_request_fill_iovs(struct spdk_nvmf_rdma_transport *rtransport,
		}
	}

	rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, wr, length);
	rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, wr, length, num_wrs - 1);
	if (spdk_unlikely(rc != 0)) {
		goto err_exit;
	}

	if (spdk_unlikely(num_wrs > 1)) {
		nvmf_rdma_update_remote_addr(rdma_req, num_wrs);
	}

	/* set the number of outstanding data WRs for this request. */
	rdma_req->num_outstanding_data_wr = num_wrs;

@@ -1808,7 +1851,7 @@ nvmf_rdma_request_fill_iovs_multi_sgl(struct spdk_nvmf_rdma_transport *rtranspor

		current_wr->num_sge = 0;

		rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, current_wr, lengths[i]);
		rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, current_wr, lengths[i], 0);
		if (rc != 0) {
			rc = -ENOMEM;
			goto err_exit;