Commit 08c8ab2b authored by Jacek Kalwas's avatar Jacek Kalwas Committed by Jim Harris
Browse files

nvme/tcp: support memory domain translate/invalidate



Invalidate operation allows to release the data buffer sooner hence
increases the chance to re-use it which is more cache friendly.

Change-Id: I6c883efd2d00f856841eb2a7a4e77acb35fd8677
Signed-off-by: default avatarBen Walker <ben@nvidia.com>
Signed-off-by: default avatarJacek Kalwas <jacek.kalwas@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/21916


Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
parent d6ad7ba4
Loading
Loading
Loading
Loading
+71 −5
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@
#include "spdk/trace.h"
#include "spdk/util.h"
#include "spdk/nvmf.h"
#include "spdk/dma.h"

#include "spdk_internal/nvme_tcp.h"
#include "spdk_internal/trace_defs.h"
@@ -720,6 +721,33 @@ nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair,
	return 0;
}

static int
nvme_tcp_try_memory_translation(struct nvme_tcp_req *tcp_req, void **addr, uint32_t length)
{
	struct nvme_request *req = tcp_req->req;
	struct spdk_memory_domain_translation_result translation = {
		.iov_count = 0,
		.size = sizeof(translation)
	};
	int rc;

	if (!(req->payload.opts && req->payload.opts->memory_domain)) {
		return 0;
	}

	rc = spdk_memory_domain_translate_data(req->payload.opts->memory_domain,
					       req->payload.opts->memory_domain_ctx, spdk_memory_domain_get_system_domain(), NULL, *addr, length,
					       &translation);
	if (spdk_unlikely(rc || translation.iov_count != 1)) {
		SPDK_ERRLOG("DMA memory translation failed, rc %d, iov_count %u\n", rc, translation.iov_count);
		return -EFAULT;
	}

	assert(length == translation.iov.iov_len);
	*addr = translation.iov.iov_base;
	return 0;
}

/*
 * Build SGL describing contiguous payload buffer.
 */
@@ -730,14 +758,21 @@ nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req

	/* ubsan complains about applying zero offset to null pointer if contig_or_cb_arg is NULL,
	 * so just double cast it to make it go away */
	tcp_req->iov[0].iov_base = (void *)((uintptr_t)req->payload.contig_or_cb_arg + req->payload_offset);
	tcp_req->iov[0].iov_len = req->payload_size;
	tcp_req->iovcnt = 1;
	void *addr = (void *)((uintptr_t)req->payload.contig_or_cb_arg + req->payload_offset);
	size_t length = req->payload_size;
	int rc;

	SPDK_DEBUGLOG(nvme, "enter\n");

	assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
	rc = nvme_tcp_try_memory_translation(tcp_req, &addr, length);
	if (spdk_unlikely(rc)) {
		return rc;
	}

	tcp_req->iov[0].iov_base = addr;
	tcp_req->iov[0].iov_len = length;
	tcp_req->iovcnt = 1;
	return 0;
}

@@ -763,13 +798,20 @@ nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *t
	remaining_size = req->payload_size;

	do {
		rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base,
					      &length);
		void *addr;

		rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &addr, &length);
		if (rc) {
			return -1;
		}

		rc = nvme_tcp_try_memory_translation(tcp_req, &addr, length);
		if (spdk_unlikely(rc)) {
			return rc;
		}

		length = spdk_min(length, remaining_size);
		tcp_req->iov[iovcnt].iov_base = addr;
		tcp_req->iov[iovcnt].iov_len = length;
		remaining_size -= length;
		iovcnt++;
@@ -883,6 +925,12 @@ nvme_tcp_qpair_cmd_send_complete(void *cb_arg)
		SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req);
		nvme_tcp_send_h2c_data(tcp_req);
	} else {
		if (tcp_req->in_capsule_data && tcp_req->req->payload.opts &&
		    tcp_req->req->payload.opts->memory_domain) {
			spdk_memory_domain_invalidate_data(tcp_req->req->payload.opts->memory_domain,
							   tcp_req->req->payload.opts->memory_domain_ctx, tcp_req->iov, tcp_req->iovcnt);
		}

		nvme_tcp_req_complete_safe(tcp_req);
	}
}
@@ -1751,6 +1799,11 @@ nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg)
			return;
		}

		if (tcp_req->req->payload.opts && tcp_req->req->payload.opts->memory_domain) {
			spdk_memory_domain_invalidate_data(tcp_req->req->payload.opts->memory_domain,
							   tcp_req->req->payload.opts->memory_domain_ctx, tcp_req->iov, tcp_req->iovcnt);
		}

		/* Need also call this function to free the resource */
		nvme_tcp_req_complete_safe(tcp_req);
	}
@@ -2889,6 +2942,17 @@ nvme_tcp_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup,
	free(stats);
}

static int
nvme_tcp_ctrlr_get_memory_domains(const struct spdk_nvme_ctrlr *ctrlr,
				  struct spdk_memory_domain **domains, int array_size)
{
	if (domains && array_size > 0) {
		domains[0] = spdk_memory_domain_get_system_domain();
	}

	return 1;
}

const struct spdk_nvme_transport_ops tcp_ops = {
	.name = "TCP",
	.type = SPDK_NVME_TRANSPORT_TCP,
@@ -2914,6 +2978,8 @@ const struct spdk_nvme_transport_ops tcp_ops = {
	.ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair,
	.ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair,

	.ctrlr_get_memory_domains = nvme_tcp_ctrlr_get_memory_domains,

	.qpair_abort_reqs = nvme_tcp_qpair_abort_reqs,
	.qpair_reset = nvme_tcp_qpair_reset,
	.qpair_submit_request = nvme_tcp_qpair_submit_request,
+2 −2
Original line number Diff line number Diff line
@@ -39,12 +39,12 @@ DEPDIRS-rdma := log util
DEPDIRS-reduce := log util
DEPDIRS-thread := log util trace

DEPDIRS-nvme := log sock util trace
DEPDIRS-nvme := log sock util trace dma
ifeq ($(OS),Linux)
DEPDIRS-nvme += vfio_user
endif
ifeq ($(CONFIG_RDMA),y)
DEPDIRS-nvme += rdma dma
DEPDIRS-nvme += rdma
endif

DEPDIRS-blob := log util thread dma
+8 −0
Original line number Diff line number Diff line
@@ -49,6 +49,14 @@ DEFINE_STUB_V(spdk_nvme_qpair_print_command, (struct spdk_nvme_qpair *qpair,
DEFINE_STUB_V(spdk_nvme_qpair_print_completion, (struct spdk_nvme_qpair *qpair,
		struct spdk_nvme_cpl *cpl));

DEFINE_STUB(spdk_memory_domain_get_system_domain, struct spdk_memory_domain *, (void), NULL);
DEFINE_STUB(spdk_memory_domain_translate_data, int,
	    (struct spdk_memory_domain *src_domain, void *src_domain_ctx,
	     struct spdk_memory_domain *dst_domain, struct spdk_memory_domain_translation_ctx *dst_domain_ctx,
	     void *addr, size_t len, struct spdk_memory_domain_translation_result *result), 0);
DEFINE_STUB_V(spdk_memory_domain_invalidate_data, (struct spdk_memory_domain *domain,
		void *domain_ctx, struct iovec *iov, uint32_t iovcnt));

static void
nvme_transport_ctrlr_disconnect_qpair_done_mocked(struct spdk_nvme_qpair *qpair)
{