Commit 16365fd8 authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Jim Harris
Browse files

nvmf/rdma: Merge filling wr->sg_list of non DIF case and DIF case



This patch merges nvmf_rdma_fill_wr_sgl_with_md_interleave()
into nvmf_rdma_fill_wr_sge(), and then removes
nvmf_rdma_fill_wr_sgl_with_md_interleave().

In nvmf_rdma_fill_wr_sgl(), pass DIF context, remaining data block
size, and offset to nvmf_rdma_fill_wr_sge() in the while loop.
For non DIF case, initialize all of them by zero.

In nvmf_rdma_fill_wr_sge(), classify non-DIF case and DIF case
by checking if DIF context is NULL.

As a minor change of wording, remaining is sufficiently descriptive
and simpler than remaining_io_buffer_length and so use remaining.

Signed-off-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Change-Id: I55ed749c540ef34b9a328dca7fd3b4694e669bfe
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/469350


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom SPDK FC-NVMe CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarAlexey Marchuk <alexeymar@mellanox.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent b48a97d4
Loading
Loading
Loading
Loading
+59 −107
Original line number Diff line number Diff line
@@ -1559,21 +1559,16 @@ nvmf_rdma_get_lkey(struct spdk_nvmf_rdma_device *device, struct iovec *iov,
}

static bool
nvmf_rdma_fill_wr_sge_with_md_interleave(struct spdk_nvmf_rdma_device *device,
		struct spdk_nvmf_request *req,
		struct ibv_send_wr *wr,
nvmf_rdma_fill_wr_sge(struct spdk_nvmf_rdma_device *device,
		      struct spdk_nvmf_request *req, struct ibv_send_wr *wr,
		      int iovpos,
		uint32_t *_remaining_data_block,
		uint32_t *_offset,
		      uint32_t *_remaining_data_block, uint32_t *_offset,
		      const struct spdk_dif_ctx *dif_ctx)
{
	struct iovec	*iov = &req->iov[iovpos];
	struct ibv_sge *sg_ele;
	struct ibv_sge	*sg_ele = &wr->sg_list[wr->num_sge];
	uint32_t	lkey = 0;
	uint32_t sge_len;
	uint32_t remaining_io_buffer_length;
	uint32_t data_block_size = dif_ctx->block_size - dif_ctx->md_size;
	uint32_t md_size = dif_ctx->md_size;
	uint32_t	remaining, data_block_size, md_size, sge_len;

	if (spdk_unlikely(!nvmf_rdma_get_lkey(device, iov, &lkey))) {
		/* This is a very rare case that can occur when using DPDK version < 19.05 */
@@ -1581,59 +1576,69 @@ nvmf_rdma_fill_wr_sge_with_md_interleave(struct spdk_nvmf_rdma_device *device,
		return false;
	}

	remaining_io_buffer_length = iov->iov_len - *_offset;
	if (spdk_likely(!dif_ctx)) {
		sg_ele->lkey = lkey;
		sg_ele->addr = (uintptr_t)(iov->iov_base);
		sg_ele->length = iov->iov_len;
		wr->num_sge++;
	} else {
		remaining = iov->iov_len - *_offset;
		data_block_size = dif_ctx->block_size - dif_ctx->md_size;
		md_size = dif_ctx->md_size;

	while (remaining_io_buffer_length && wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES) {
		sg_ele = &wr->sg_list[wr->num_sge];
		while (remaining && wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES) {
			sg_ele->lkey = lkey;
			sg_ele->addr = (uintptr_t)((char *)iov->iov_base + *_offset);
		sge_len = spdk_min(remaining_io_buffer_length, *_remaining_data_block);
			sge_len = spdk_min(remaining, *_remaining_data_block);
			sg_ele->length = sge_len;
		remaining_io_buffer_length -= sge_len;
			remaining -= sge_len;
			*_remaining_data_block -= sge_len;
			*_offset += sge_len;

			sg_ele++;
			wr->num_sge++;

			if (*_remaining_data_block == 0) {
				/* skip metadata */
				*_offset += md_size;
				/* Metadata that do not fit this IO buffer will be included in the next IO buffer */
			remaining_io_buffer_length -= spdk_min(remaining_io_buffer_length, md_size);
				remaining -= spdk_min(remaining, md_size);
				*_remaining_data_block = data_block_size;
			}

		if (remaining_io_buffer_length == 0) {
			if (remaining == 0) {
				/* By subtracting the size of the last IOV from the offset, we ensure that we skip
				   the remaining metadata bits at the beginning of the next buffer */
				*_offset -= iov->iov_len;
			}
		}
	}

	return true;
}

/*
 * Fills iov and SGL, iov[i] points to buffer[i], SGE[i] is limited in length to data block size
 * and points to part of buffer
 */
static int
nvmf_rdma_fill_wr_sgl_with_md_interleave(struct spdk_nvmf_rdma_poll_group *rgroup,
nvmf_rdma_fill_wr_sgl(struct spdk_nvmf_rdma_poll_group *rgroup,
		      struct spdk_nvmf_rdma_device *device,
		      struct spdk_nvmf_rdma_request *rdma_req,
		      struct ibv_send_wr *wr,
		uint32_t length,
		const struct spdk_dif_ctx *dif_ctx)
		      uint32_t length)
{
	struct spdk_nvmf_request *req = &rdma_req->req;
	uint32_t remaining_length = length;
	uint32_t remaining_data_block = dif_ctx->block_size - dif_ctx->md_size;
	struct spdk_dif_ctx *dif_ctx = NULL;
	uint32_t remaining_data_block = 0;
	uint32_t offset = 0;

	if (spdk_unlikely(rdma_req->dif_insert_or_strip)) {
		dif_ctx = &rdma_req->dif_ctx;
		remaining_data_block = dif_ctx->block_size - dif_ctx->md_size;
	}

	wr->num_sge = 0;

	while (remaining_length && wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES) {
		while (spdk_unlikely(!nvmf_rdma_fill_wr_sge_with_md_interleave(device, req, wr,
				     rdma_req->iovpos, &remaining_data_block, &offset, dif_ctx))) {
	while (length && wr->num_sge < SPDK_NVMF_MAX_SGL_ENTRIES) {
		while (spdk_unlikely(!nvmf_rdma_fill_wr_sge(device, req, wr, rdma_req->iovpos,
				     &remaining_data_block, &offset, dif_ctx))) {
			if (nvmf_rdma_replace_buffer(rgroup, &req->buffers[rdma_req->iovpos]) == -ENOMEM) {
				return -ENOMEM;
			}
@@ -1642,11 +1647,11 @@ nvmf_rdma_fill_wr_sgl_with_md_interleave(struct spdk_nvmf_rdma_poll_group *rgrou
							      ~NVMF_DATA_BUFFER_MASK);
		}

		remaining_length -= req->iov[rdma_req->iovpos].iov_len;
		length -= req->iov[rdma_req->iovpos].iov_len;
		rdma_req->iovpos++;
	}

	if (remaining_length) {
	if (length) {
		SPDK_ERRLOG("Not enough SG entries to hold data buffer\n");
		return -EINVAL;
	}
@@ -1654,54 +1659,6 @@ nvmf_rdma_fill_wr_sgl_with_md_interleave(struct spdk_nvmf_rdma_poll_group *rgrou
	return 0;
}

static bool
nvmf_rdma_fill_wr_sge(struct spdk_nvmf_rdma_device *device,
		      struct spdk_nvmf_request *req, struct ibv_send_wr *wr,
		      int iovpos)
{
	struct iovec	*iov = &req->iov[iovpos];
	struct ibv_sge	*sg_ele = &wr->sg_list[wr->num_sge];

	if (spdk_unlikely(!nvmf_rdma_get_lkey(device, iov, &sg_ele->lkey))) {
		/* This is a very rare case that can occur when using DPDK version < 19.05 */
		SPDK_ERRLOG("Data buffer split over multiple RDMA Memory Regions. Removing it from circulation.\n");
		return false;
	}

	sg_ele->addr = (uintptr_t)(iov->iov_base);
	sg_ele->length = iov->iov_len;
	wr->num_sge++;

	return true;
}

static int
nvmf_rdma_fill_wr_sgl(struct spdk_nvmf_rdma_poll_group *rgroup,
		      struct spdk_nvmf_rdma_device *device,
		      struct spdk_nvmf_rdma_request *rdma_req,
		      struct ibv_send_wr *wr,
		      uint32_t length)
{
	struct spdk_nvmf_request *req = &rdma_req->req;

	wr->num_sge = 0;
	while (length) {
		while (spdk_unlikely(!nvmf_rdma_fill_wr_sge(device, req, wr, rdma_req->iovpos))) {
			if (nvmf_rdma_replace_buffer(rgroup, &req->buffers[rdma_req->iovpos]) == -ENOMEM) {
				return -ENOMEM;
			}
			req->iov[rdma_req->iovpos].iov_base = (void *)((uintptr_t)(req->buffers[rdma_req->iovpos] +
							      NVMF_DATA_BUFFER_MASK) &
							      ~NVMF_DATA_BUFFER_MASK);
		}

		length -= req->iov[rdma_req->iovpos].iov_len;
		rdma_req->iovpos++;
	}

	return 0;
}

static void
nvmf_rdma_fill_buffers(struct spdk_nvmf_rdma_transport *rtransport,
		       struct spdk_nvmf_request *req,
@@ -1743,12 +1700,7 @@ spdk_nvmf_rdma_request_fill_iovs(struct spdk_nvmf_rdma_transport *rtransport,

	nvmf_rdma_fill_buffers(rtransport, req, length);

	if (spdk_unlikely(rdma_req->dif_insert_or_strip)) {
		rc = nvmf_rdma_fill_wr_sgl_with_md_interleave(rgroup, device, rdma_req,
				wr, length, &rdma_req->dif_ctx);
	} else {
	rc = nvmf_rdma_fill_wr_sgl(rgroup, device, rdma_req, wr, length);
	}
	if (rc != 0) {
		goto err_exit;
	}