Commit 3c88819b authored by Ziye Yang's avatar Ziye Yang Committed by Jim Harris
Browse files

nvmf/tcp: Use the common buffer cache for each polling group



Purpose: To avoid the buffer contention among different
polling groups if there are multiple core configurations
for NVMe-oF tcp transport.

Change-Id: I1c1b0126f3aad28f339ec8bf9355282e08cfa8db
Signed-off-by: default avatarZiye Yang <optimistyzy@gmail.com>
Reviewed-on: https://review.gerrithub.io/c/440444


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarSeth Howell <seth.howell5141@gmail.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 3bb815ae
Loading
Loading
Loading
Loading
+41 −18
Original line number Diff line number Diff line
@@ -297,6 +297,10 @@ struct spdk_nvmf_tcp_transport {
struct spdk_nvmf_tcp_mgmt_channel {
	/* Requests that are waiting to obtain a data buffer */
	TAILQ_HEAD(, nvme_tcp_req)	pending_data_buf_queue;

	/* Point to the transport polling group */
	struct spdk_nvmf_tcp_poll_group	*tgroup;

};

static void spdk_nvmf_tcp_qpair_process_pending(struct spdk_nvmf_tcp_transport *ttransport,
@@ -1301,6 +1305,7 @@ spdk_nvmf_tcp_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group)
	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);
	spdk_sock_group_close(&tgroup->sock_group);
	spdk_poller_unregister(&tgroup->timeout_poller);

	free(tgroup);
}

@@ -2150,6 +2155,26 @@ spdk_nvmf_tcp_req_get_xfer(struct nvme_tcp_req *tcp_req) {
	return xfer;
}

static void
spdk_nvmf_tcp_request_free_buffers(struct nvme_tcp_req *tcp_req,
				   struct spdk_nvmf_transport_poll_group *group, struct spdk_nvmf_transport *transport)
{
	for (uint32_t i = 0; i < tcp_req->req.iovcnt; i++) {
		assert(tcp_req->buffers[i] != NULL);
		if (group->buf_cache_count < group->buf_cache_size) {
			STAILQ_INSERT_HEAD(&group->buf_cache,
					   (struct spdk_nvmf_transport_pg_cache_buf *)tcp_req->buffers[i], link);
			group->buf_cache_count++;
		} else {
			spdk_mempool_put(transport->data_buf_pool, tcp_req->buffers[i]);
		}
		tcp_req->req.iov[i].iov_base = NULL;
		tcp_req->buffers[i] = NULL;
		tcp_req->req.iov[i].iov_len = 0;
	}
	tcp_req->data_from_pool = false;
}

static int
spdk_nvmf_tcp_req_fill_iovs(struct spdk_nvmf_tcp_transport *ttransport,
			    struct nvme_tcp_req *tcp_req)
@@ -2157,13 +2182,21 @@ spdk_nvmf_tcp_req_fill_iovs(struct spdk_nvmf_tcp_transport *ttransport,
	void		*buf = NULL;
	uint32_t	length = tcp_req->req.length;
	uint32_t	i = 0;
	struct nvme_tcp_qpair	*tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct nvme_tcp_qpair, qpair);
	struct spdk_nvmf_transport_poll_group  *group = &tqpair->ch->tgroup->group;

	tcp_req->req.iovcnt = 0;
	while (length) {
		if (!(STAILQ_EMPTY(&group->buf_cache))) {
			group->buf_cache_count--;
			buf = STAILQ_FIRST(&group->buf_cache);
			STAILQ_REMOVE_HEAD(&group->buf_cache, link);
		} else {
			buf = spdk_mempool_get(ttransport->transport.data_buf_pool);
			if (!buf) {
				goto nomem;
			}
		}

		tcp_req->req.iov[i].iov_base = (void *)((uintptr_t)(buf + NVMF_DATA_BUFFER_MASK) &
							~NVMF_DATA_BUFFER_MASK);
@@ -2175,17 +2208,10 @@ spdk_nvmf_tcp_req_fill_iovs(struct spdk_nvmf_tcp_transport *ttransport,
	}

	tcp_req->data_from_pool = true;

	return 0;

nomem:
	while (i) {
		i--;
		spdk_mempool_put(ttransport->transport.data_buf_pool, tcp_req->buffers[i]);
		tcp_req->req.iov[i].iov_base = NULL;
		tcp_req->req.iov[i].iov_len = 0;

	}
	spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport);
	tcp_req->req.iovcnt = 0;
	return -ENOMEM;
}
@@ -2465,8 +2491,10 @@ spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
	int				rc;
	enum spdk_nvmf_tcp_req_state prev_state;
	bool				progress = false;
	struct spdk_nvmf_transport_poll_group *group;

	tqpair = SPDK_CONTAINEROF(tcp_req->req.qpair, struct nvme_tcp_qpair, qpair);
	group = &tqpair->ch->tgroup->group;
	assert(tcp_req->state != TCP_REQUEST_STATE_FREE);

	/* The loop here is to allow for several back-to-back state changes. */
@@ -2590,13 +2618,7 @@ spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
		case TCP_REQUEST_STATE_COMPLETED:
			spdk_trace_record(TRACE_TCP_REQUEST_STATE_COMPLETED, 0, 0, (uintptr_t)tcp_req, 0);
			if (tcp_req->data_from_pool) {
				/* Put the buffer/s back in the pool */
				for (uint32_t i = 0; i < tcp_req->req.iovcnt; i++) {
					spdk_mempool_put(ttransport->transport.data_buf_pool, tcp_req->buffers[i]);
					tcp_req->req.iov[i].iov_base = NULL;
					tcp_req->buffers[i] = NULL;
				}
				tcp_req->data_from_pool = false;
				spdk_nvmf_tcp_request_free_buffers(tcp_req, group, &ttransport->transport);
			}
			tcp_req->req.length = 0;
			tcp_req->req.iovcnt = 0;
@@ -2697,6 +2719,7 @@ spdk_nvmf_tcp_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
		return -1;
	}

	tqpair->ch->tgroup = tgroup;
	tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
	tqpair->timeout = SPDK_NVME_TCP_QPAIR_EXIT_TIMEOUT;
	tqpair->last_pdu_time = spdk_get_ticks();
+20 −4
Original line number Diff line number Diff line
@@ -51,6 +51,7 @@
#define UT_IO_UNIT_SIZE 1024
#define UT_MAX_AQ_DEPTH 64
#define UT_SQ_HEAD_MAX 128
#define UT_NUM_SHARED_BUFFERS 128

SPDK_LOG_REGISTER_COMPONENT("nvmf", SPDK_LOG_NVMF)
SPDK_LOG_REGISTER_COMPONENT("nvme", SPDK_LOG_NVME)
@@ -216,6 +217,7 @@ test_nvmf_tcp_create(void)
	opts.max_io_size = UT_MAX_IO_SIZE;
	opts.io_unit_size = UT_IO_UNIT_SIZE;
	opts.max_aq_depth = UT_MAX_AQ_DEPTH;
	opts.num_shared_buffers = UT_NUM_SHARED_BUFFERS;
	/* expect success */
	transport = spdk_nvmf_tcp_create(&opts);
	CU_ASSERT_PTR_NOT_NULL(transport);
@@ -239,6 +241,7 @@ test_nvmf_tcp_create(void)
	opts.max_io_size = UT_MAX_IO_SIZE;
	opts.io_unit_size = UT_MAX_IO_SIZE + 1;
	opts.max_aq_depth = UT_MAX_AQ_DEPTH;
	opts.num_shared_buffers = UT_NUM_SHARED_BUFFERS;
	/* expect success */
	transport = spdk_nvmf_tcp_create(&opts);
	CU_ASSERT_PTR_NOT_NULL(transport);
@@ -288,6 +291,7 @@ test_nvmf_tcp_destroy(void)
	opts.max_io_size = UT_MAX_IO_SIZE;
	opts.io_unit_size = UT_IO_UNIT_SIZE;
	opts.max_aq_depth = UT_MAX_AQ_DEPTH;
	opts.num_shared_buffers = UT_NUM_SHARED_BUFFERS;
	transport = spdk_nvmf_tcp_create(&opts);
	CU_ASSERT_PTR_NOT_NULL(transport);
	transport->opts = opts;
@@ -300,17 +304,29 @@ test_nvmf_tcp_destroy(void)
static void
test_nvmf_tcp_poll_group_create(void)
{
	struct spdk_nvmf_tcp_transport ttransport;
	struct spdk_nvmf_transport *transport;
	struct spdk_nvmf_transport_poll_group *group;
	struct spdk_thread *thread;
	struct spdk_nvmf_transport_opts opts;

	thread = spdk_thread_create(NULL);
	SPDK_CU_ASSERT_FATAL(thread != NULL);
	spdk_set_thread(thread);

	memset(&ttransport, 0, sizeof(ttransport));
	group = spdk_nvmf_tcp_poll_group_create(&ttransport.transport);
	CU_ASSERT_PTR_NOT_NULL(group);
	memset(&opts, 0, sizeof(opts));
	opts.max_queue_depth = UT_MAX_QUEUE_DEPTH;
	opts.max_qpairs_per_ctrlr = UT_MAX_QPAIRS_PER_CTRLR;
	opts.in_capsule_data_size = UT_IN_CAPSULE_DATA_SIZE;
	opts.max_io_size = UT_MAX_IO_SIZE;
	opts.io_unit_size = UT_IO_UNIT_SIZE;
	opts.max_aq_depth = UT_MAX_AQ_DEPTH;
	opts.num_shared_buffers = UT_NUM_SHARED_BUFFERS;
	transport = spdk_nvmf_tcp_create(&opts);
	CU_ASSERT_PTR_NOT_NULL(transport);
	transport->opts = opts;
	group = spdk_nvmf_tcp_poll_group_create(transport);
	SPDK_CU_ASSERT_FATAL(group);
	group->transport = transport;
	spdk_nvmf_tcp_poll_group_destroy(group);

	spdk_thread_exit(thread);