Commit 85fa4324 authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Tomasz Zawadzki
Browse files

nvmf/tcp: Support ICD for fabric/admin commands



According to the SPEC we should support up to 8192 bytes
of ICD for admin and fabric commands. Transport configuration
parameter in_capsule_data_size is applied to all qpair types -
admin and IO. Also we allocate resources when we get a connection
request, so we don't know qpair type at this moment.
Create a list of buffer in TCP poll group to support ICD up
to 8192 bytes when configuration ICD is less than this value.
The number of elements in this pool is hardcoded, it is planned
to add a new configuration parameter later.

Fixes issue #1569
Change-Id: I8589e3e2ea95d515f5503c6de7c1ee40aaf7b6da
Signed-off-by: default avatarAlexey Marchuk <alexeymar@mellanox.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4754


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
parent c31ad668
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -43,7 +43,7 @@
#define SPDK_NVME_TCP_DIGEST_ALIGNMENT		4
#define SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT	30
#define SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR	8

#define SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE	8192u
/*
 * Maximum number of SGL elements.
 */
+1 −2
Original line number Diff line number Diff line
@@ -57,7 +57,6 @@
#define NVME_TCP_HPDA_DEFAULT			0
#define NVME_TCP_MAX_R2T_DEFAULT		1
#define NVME_TCP_PDU_H2C_MIN_DATA_SIZE		4096
#define NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE	8192

/* NVMe TCP transport extensions for spdk_nvme_ctrlr */
struct nvme_tcp_ctrlr {
@@ -518,7 +517,7 @@ nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req,
	if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
		max_incapsule_data_size = ctrlr->ioccsz_bytes;
		if ((req->cmd.opc == SPDK_NVME_OPC_FABRIC) || nvme_qpair_is_admin_queue(&tqpair->qpair)) {
			max_incapsule_data_size = NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE;
			max_incapsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE;
		}

		if (req->payload_size <= max_incapsule_data_size) {
+127 −8
Original line number Diff line number Diff line
@@ -254,12 +254,23 @@ struct spdk_nvmf_tcp_qpair {
	TAILQ_ENTRY(spdk_nvmf_tcp_qpair)	link;
};

struct spdk_nvmf_tcp_control_msg {
	STAILQ_ENTRY(spdk_nvmf_tcp_control_msg) link;
};

struct spdk_nvmf_tcp_control_msg_list {
	void *msg_buf;
	STAILQ_HEAD(, spdk_nvmf_tcp_control_msg) free_msgs;
};

struct spdk_nvmf_tcp_poll_group {
	struct spdk_nvmf_transport_poll_group	group;
	struct spdk_sock_group			*sock_group;

	TAILQ_HEAD(, spdk_nvmf_tcp_qpair)	qpairs;
	TAILQ_HEAD(, spdk_nvmf_tcp_qpair)	await_req;

	struct spdk_nvmf_tcp_control_msg_list	*control_msg_list;
};

struct spdk_nvmf_tcp_port {
@@ -295,6 +306,7 @@ static const struct spdk_json_object_decoder tcp_transport_opts_decoder[] = {

static bool nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
				 struct spdk_nvmf_tcp_req *tcp_req);
static void nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group);

static void
nvmf_tcp_req_set_state(struct spdk_nvmf_tcp_req *tcp_req,
@@ -1001,6 +1013,49 @@ nvmf_tcp_discover(struct spdk_nvmf_transport *transport,
	entry->tsas.tcp.sectype = SPDK_NVME_TCP_SECURITY_NONE;
}

static struct spdk_nvmf_tcp_control_msg_list *
nvmf_tcp_control_msg_list_create(uint16_t num_messages)
{
	struct spdk_nvmf_tcp_control_msg_list *list;
	struct spdk_nvmf_tcp_control_msg *msg;
	uint16_t i;

	list = calloc(1, sizeof(*list));
	if (!list) {
		SPDK_ERRLOG("Failed to allocate memory for list structure\n");
		return NULL;
	}

	list->msg_buf = spdk_zmalloc(num_messages * SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE,
				     NVMF_DATA_BUFFER_ALIGNMENT, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
	if (!list->msg_buf) {
		SPDK_ERRLOG("Failed to allocate memory for control message buffers\n");
		free(list);
		return NULL;
	}

	STAILQ_INIT(&list->free_msgs);

	for (i = 0; i < num_messages; i++) {
		msg = (struct spdk_nvmf_tcp_control_msg *)((char *)list->msg_buf + i *
				SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE);
		STAILQ_INSERT_TAIL(&list->free_msgs, msg, link);
	}

	return list;
}

static void
nvmf_tcp_control_msg_list_free(struct spdk_nvmf_tcp_control_msg_list *list)
{
	if (!list) {
		return;
	}

	spdk_free(list->msg_buf);
	free(list);
}

static struct spdk_nvmf_transport_poll_group *
nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport)
{
@@ -1019,10 +1074,20 @@ nvmf_tcp_poll_group_create(struct spdk_nvmf_transport *transport)
	TAILQ_INIT(&tgroup->qpairs);
	TAILQ_INIT(&tgroup->await_req);

	if (transport->opts.in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) {
		SPDK_DEBUGLOG(nvmf_tcp, "ICD %u is less than min required for admin/fabric commands (%u). "
			      "Creating control messages list\n", transport->opts.in_capsule_data_size,
			      SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE);
		tgroup->control_msg_list = nvmf_tcp_control_msg_list_create(32);
		if (!tgroup->control_msg_list) {
			goto cleanup;
		}
	}

	return &tgroup->group;

cleanup:
	free(tgroup);
	nvmf_tcp_poll_group_destroy(&tgroup->group);
	return NULL;
}

@@ -1049,6 +1114,9 @@ nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)

	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
	spdk_sock_group_close(&tgroup->sock_group);
	if (tgroup->control_msg_list) {
		nvmf_tcp_control_msg_list_free(tgroup->control_msg_list);
	}

	free(tgroup);
}
@@ -1867,6 +1935,31 @@ nvmf_tcp_sock_process(struct spdk_nvmf_tcp_qpair *tqpair)
	return rc;
}

static inline void *
nvmf_tcp_control_msg_get(struct spdk_nvmf_tcp_control_msg_list *list)
{
	struct spdk_nvmf_tcp_control_msg *msg;

	assert(list);

	msg = STAILQ_FIRST(&list->free_msgs);
	if (!msg) {
		SPDK_DEBUGLOG(nvmf_tcp, "Out of control messages\n");
		return NULL;
	}
	STAILQ_REMOVE_HEAD(&list->free_msgs, link);
	return msg;
}

static inline void
nvmf_tcp_control_msg_put(struct spdk_nvmf_tcp_control_msg_list *list, void *_msg)
{
	struct spdk_nvmf_tcp_control_msg *msg = _msg;

	assert(list);
	STAILQ_INSERT_HEAD(&list->free_msgs, msg, link);
}

static int
nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
		       struct spdk_nvmf_transport *transport,
@@ -1876,6 +1969,7 @@ nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
	struct spdk_nvme_cmd			*cmd;
	struct spdk_nvme_cpl			*rsp;
	struct spdk_nvme_sgl_descriptor		*sgl;
	struct spdk_nvmf_tcp_poll_group		*tgroup;
	uint32_t				length;

	cmd = &req->cmd->nvme_cmd;
@@ -1922,6 +2016,7 @@ nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
		   sgl->unkeyed.subtype == SPDK_NVME_SGL_SUBTYPE_OFFSET) {
		uint64_t offset = sgl->address;
		uint32_t max_len = transport->opts.in_capsule_data_size;
		assert(tcp_req->has_incapsule_data);

		SPDK_DEBUGLOG(nvmf_tcp, "In-capsule data: offset 0x%" PRIx64 ", length 0x%x\n",
			      offset, length);
@@ -1934,16 +2029,33 @@ nvmf_tcp_req_parse_sgl(struct spdk_nvmf_tcp_req *tcp_req,
		}
		max_len -= (uint32_t)offset;

		if (length > max_len) {
		if (spdk_unlikely(length > max_len)) {
			/* According to the SPEC we should support ICD up to 8192 bytes for admin and fabric commands */
			if (length <= SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE &&
			    (cmd->opc == SPDK_NVME_OPC_FABRIC || req->qpair->qid == 0)) {

				/* Get a buffer from dedicated list */
				SPDK_DEBUGLOG(nvmf_tcp, "Getting a buffer from control msg list\n");
				tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
				assert(tgroup->control_msg_list);
				req->data = nvmf_tcp_control_msg_get(tgroup->control_msg_list);
				if (!req->data) {
					/* No available buffers. Queue this request up. */
					SPDK_DEBUGLOG(nvmf_tcp, "No available ICD buffers. Queueing request %p\n", tcp_req);
					return 0;
				}
			} else {
				SPDK_ERRLOG("In-capsule data length 0x%x exceeds capsule length 0x%x\n",
					    length, max_len);
				rsp->status.sc = SPDK_NVME_SC_DATA_SGL_LENGTH_INVALID;
				return -1;
			}
		} else {
			req->data = tcp_req->buf;
		}

		req->data = tcp_req->buf + offset;
		req->data_from_pool = false;
		req->length = length;
		req->data_from_pool = false;

		if (spdk_unlikely(req->dif.dif_insert_or_strip)) {
			length = spdk_dif_get_length_with_md(length, &req->dif.dif_ctx);
@@ -2130,6 +2242,7 @@ nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
	bool					progress = false;
	struct spdk_nvmf_transport		*transport = &ttransport->transport;
	struct spdk_nvmf_transport_poll_group	*group;
	struct spdk_nvmf_tcp_poll_group		*tgroup;

	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct spdk_nvmf_tcp_qpair, qpair);
	group = &tqpair->group->group;
@@ -2301,6 +2414,12 @@ nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
			spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0);
			if (tcp_req->req.data_from_pool) {
				spdk_nvmf_request_free_buffers(&tcp_req->req, group, transport);
			} else if (spdk_unlikely(tcp_req->has_incapsule_data && (tcp_req->cmd.opc == SPDK_NVME_OPC_FABRIC ||
						 tqpair->qpair.qid == 0) && tcp_req->req.length > transport->opts.in_capsule_data_size)) {
				tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
				assert(tgroup->control_msg_list);
				SPDK_DEBUGLOG(nvmf_tcp, "Put buf to control msg list\n");
				nvmf_tcp_control_msg_put(tgroup->control_msg_list, tcp_req->req.data);
			}
			tcp_req->req.length = 0;
			tcp_req->req.iovcnt = 0;
+5 −0
Original line number Diff line number Diff line
@@ -459,6 +459,7 @@ test_nvmf_tcp_poll_group_create(void)
{
	struct spdk_nvmf_transport *transport;
	struct spdk_nvmf_transport_poll_group *group;
	struct spdk_nvmf_tcp_poll_group *tgroup;
	struct spdk_thread *thread;
	struct spdk_nvmf_transport_opts opts;
	struct spdk_sock_group grp = {};
@@ -482,6 +483,10 @@ test_nvmf_tcp_poll_group_create(void)
	group = nvmf_tcp_poll_group_create(transport);
	MOCK_CLEAR_P(spdk_sock_group_create);
	SPDK_CU_ASSERT_FATAL(group);
	if (opts.in_capsule_data_size < SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE) {
		tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
		SPDK_CU_ASSERT_FATAL(tgroup->control_msg_list);
	}
	group->transport = transport;
	nvmf_tcp_poll_group_destroy(group);
	nvmf_tcp_destroy(transport);