Commit 18ede8d3 authored by Krzysztof Goreczny's avatar Krzysztof Goreczny Committed by Konrad Sztyber
Browse files

nvmf: enable iobuf based queuing for nvmf requests



iobuf can internally queue requests that awaits free buffers.
Currently all nvmf transports are handling pending buffers internally.

With this change spdk_nvmf_request_get_buffers() API works as follows:
- Whenever all iovecs are allocated immediately then nothing changes
  compared to the previous implementation.
- If iobuf does not have enough buffers then there are two flows:
  - if req_get_buffers_done is not set in the spdk_nvmf_transport_ops
    then again, nothing changes. All (if there were any) ioves are
    released and caller must try again later.
  - if callback was set then caller will be notified once all iovecs are
    allocated.

To use iobuf queueing following changes are needed:
- spdk_nvmf_transport_ops has now a callback function pointer used to
  notify that previously interrupted buffer allocation is now complete.
- spdk_nvmf_request has fields to store spdk_iobuf_entry and remaining
  buffer length. Both are not in use when buffer allocation succeeds
  immediately so there is no impact on the number of hot cache lines
  within spdk_nvmf_request struct.
- transport.c contains all the logic to track buffers allocation
  progress and notifies requester when it is done.

As requests waiting for the buffer might get aborted, another API to
remove such request from the iobuf queue is also added.

This patch brings only API change. Usage in transport implementations
will come in next patches.

Also, there is no support yet for the stripped buffers.

Change-Id: I95e30750dd83c27bb3be2f77b7635168a2b63e19
Signed-off-by: default avatarKrzysztof Goreczny <krzysztof.goreczny@dell.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/23214


Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
parent a48eba16
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -2,6 +2,20 @@

## v24.09: (Upcoming Release)

### nvmf

Enable iobuf based queuing for nvmf requests when there is not enough free buffers available.
Perspective from the user of the spdk_nvmf_request_get_buffers() API is that whenever all iovecs
are allocated immediately then nothing changes compared to the previous implementation.
If iobuf does not have enough buffers then there are two flows now:

- if req_get_buffers_done is not set in the spdk_nvmf_transport_ops then again, nothing changes. All
  (if there were any) ioves are released and caller must try again later.
- if callback was set then caller will be notified once all iovecs are allocated.

As requests waiting for the buffer might get aborted, another API to remove such request from
the iobuf queue is also added.

### sock

New functions that allows to register interrupt for given socket group:
+17 −1
Original line number Diff line number Diff line
@@ -119,10 +119,16 @@ struct spdk_nvmf_request {
	struct spdk_poller		*poller;
	struct spdk_bdev_io		*zcopy_bdev_io; /* Contains the bdev_io when using ZCOPY */

	/* Internal state that keeps track of the iobuf allocation progress */
	struct {
		struct spdk_iobuf_entry	entry;
		uint32_t		remaining_length;
	} iobuf;

	/* Timeout tracked for connect and abort flows. */
	uint64_t timeout_tsc;
};
SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_request) == 776, "Incorrect size");
SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_request) == 808, "Incorrect size");

enum spdk_nvmf_qpair_state {
	SPDK_NVMF_QPAIR_UNINITIALIZED = 0,
@@ -398,6 +404,16 @@ struct spdk_nvmf_transport_ops {
	 */
	int (*req_complete)(struct spdk_nvmf_request *req);

	/**
	 * Callback for the iobuf based queuing of requests awaiting free buffers.
	 * Called when all requested buffers are allocated for the given request.
	 * Used only if initial spdk_iobuf_get() call didn't allocate all buffers at once
	 * and request was queued internally in the iobuf until free buffers become available.
	 * This callback is optional and not all transports need to implement it.
	 * If not set then transport implementation must queue such requests internally.
	 */
	void (*req_get_buffers_done)(struct spdk_nvmf_request *req);

	/*
	 * Deinitialize a connection.
	 */
+80 −13
Original line number Diff line number Diff line
@@ -836,8 +836,6 @@ spdk_nvmf_request_free_buffers(struct spdk_nvmf_request *req,
	req->data_from_pool = false;
}

typedef int (*set_buffer_callback)(struct spdk_nvmf_request *req, void *buf,
				   uint32_t length,	uint32_t io_unit_size);
static int
nvmf_request_set_buffer(struct spdk_nvmf_request *req, void *buf, uint32_t length,
			uint32_t io_unit_size)
@@ -864,13 +862,16 @@ nvmf_request_set_stripped_buffer(struct spdk_nvmf_request *req, void *buf, uint3
	return length;
}

static void nvmf_request_iobuf_get_cb(struct spdk_iobuf_entry *entry, void *buf);

static int
nvmf_request_get_buffers(struct spdk_nvmf_request *req,
			 struct spdk_nvmf_transport_poll_group *group,
			 struct spdk_nvmf_transport *transport,
			 uint32_t length, uint32_t io_unit_size,
			 set_buffer_callback cb_func)
			 bool stripped_buffers)
{
	struct spdk_iobuf_entry *entry = NULL;
	uint32_t num_buffers;
	uint32_t i = 0;
	void *buffer;
@@ -883,20 +884,52 @@ nvmf_request_get_buffers(struct spdk_nvmf_request *req,
		return -EINVAL;
	}

	/* Use iobuf queuing only if transport supports it */
	if (transport->ops->req_get_buffers_done != NULL) {
		entry = &req->iobuf.entry;
	}

	while (i < num_buffers) {
		buffer = spdk_iobuf_get(group->buf_cache, spdk_min(io_unit_size, length), NULL, NULL);
		buffer = spdk_iobuf_get(group->buf_cache, spdk_min(io_unit_size, length), entry,
					nvmf_request_iobuf_get_cb);
		if (spdk_unlikely(buffer == NULL)) {
			req->iobuf.remaining_length = length;
			return -ENOMEM;
		}
		length = cb_func(req, buffer, length, io_unit_size);
		if (stripped_buffers) {
			length = nvmf_request_set_stripped_buffer(req, buffer, length, io_unit_size);
		} else {
			length = nvmf_request_set_buffer(req, buffer, length, io_unit_size);
		}
		i++;
	}

	assert(length == 0);
	req->data_from_pool = true;

	return 0;
}

static void
nvmf_request_iobuf_get_cb(struct spdk_iobuf_entry *entry, void *buf)
{
	struct spdk_nvmf_request *req = SPDK_CONTAINEROF(entry, struct spdk_nvmf_request, iobuf.entry);
	struct spdk_nvmf_transport *transport = req->qpair->transport;
	struct spdk_nvmf_poll_group *group = req->qpair->group;
	struct spdk_nvmf_transport_poll_group *tgroup = nvmf_get_transport_poll_group(group, transport);
	uint32_t length = req->iobuf.remaining_length;
	uint32_t io_unit_size = transport->opts.io_unit_size;
	int rc;

	assert(tgroup != NULL);

	length = nvmf_request_set_buffer(req, buf, length, io_unit_size);
	rc = nvmf_request_get_buffers(req, tgroup, transport, length, io_unit_size, false);
	if (rc == 0) {
		transport->ops->req_get_buffers_done(req);
	}
}

int
spdk_nvmf_request_get_buffers(struct spdk_nvmf_request *req,
			      struct spdk_nvmf_transport_poll_group *group,
@@ -908,18 +941,50 @@ spdk_nvmf_request_get_buffers(struct spdk_nvmf_request *req,
	assert(nvmf_transport_use_iobuf(transport));

	req->iovcnt = 0;
	rc = nvmf_request_get_buffers(req, group, transport, length,
				      transport->opts.io_unit_size,
				      nvmf_request_set_buffer);
	if (spdk_likely(rc == 0)) {
		req->data_from_pool = true;
	} else if (rc == -ENOMEM) {
	rc = nvmf_request_get_buffers(req, group, transport, length, transport->opts.io_unit_size, false);
	if (spdk_unlikely(rc == -ENOMEM && transport->ops->req_get_buffers_done == NULL)) {
		spdk_nvmf_request_free_buffers(req, group, transport);
	}

	return rc;
}

static int
nvmf_request_get_buffers_abort_cb(struct spdk_iobuf_channel *ch, struct spdk_iobuf_entry *entry,
				  void *cb_ctx)
{
	struct spdk_nvmf_request *req, *req_to_abort = cb_ctx;

	req = SPDK_CONTAINEROF(entry, struct spdk_nvmf_request, iobuf.entry);
	if (req != req_to_abort) {
		return 0;
	}

	spdk_iobuf_entry_abort(ch, entry, spdk_min(req->iobuf.remaining_length,
			       req->qpair->transport->opts.io_unit_size));
	return 1;
}

bool
nvmf_request_get_buffers_abort(struct spdk_nvmf_request *req)
{
	struct spdk_nvmf_transport_poll_group *tgroup = nvmf_get_transport_poll_group(req->qpair->group,
			req->qpair->transport);
	int rc;

	assert(tgroup != NULL);

	rc = spdk_iobuf_for_each_entry(tgroup->buf_cache, &tgroup->buf_cache->small,
				       nvmf_request_get_buffers_abort_cb, req);
	if (rc == 1) {
		return true;
	}

	rc = spdk_iobuf_for_each_entry(tgroup->buf_cache, &tgroup->buf_cache->large,
				       nvmf_request_get_buffers_abort_cb, req);
	return rc == 1;
}

void
nvmf_request_free_stripped_buffers(struct spdk_nvmf_request *req,
				   struct spdk_nvmf_transport_poll_group *group,
@@ -948,6 +1013,9 @@ nvmf_request_get_stripped_buffers(struct spdk_nvmf_request *req,
	uint32_t i;
	int rc;

	/* We don't support iobuf queueing with stripped buffers yet */
	assert(transport->ops->req_get_buffers_done == NULL);

	/* Data blocks must be block aligned */
	for (i = 0; i < req->iovcnt; i++) {
		if (req->iov[i].iov_len % block_size) {
@@ -963,8 +1031,7 @@ nvmf_request_get_stripped_buffers(struct spdk_nvmf_request *req,
	req->stripped_data = data;
	req->stripped_data->iovcnt = 0;

	rc = nvmf_request_get_buffers(req, group, transport, length, io_unit_size,
				      nvmf_request_set_stripped_buffer);
	rc = nvmf_request_get_buffers(req, group, transport, length, io_unit_size, true);
	if (rc == -ENOMEM) {
		nvmf_request_free_stripped_buffers(req, group, transport);
		return rc;
+2 −0
Original line number Diff line number Diff line
@@ -59,4 +59,6 @@ int nvmf_request_get_stripped_buffers(struct spdk_nvmf_request *req,
				      struct spdk_nvmf_transport *transport,
				      uint32_t length);

bool nvmf_request_get_buffers_abort(struct spdk_nvmf_request *req);

#endif /* SPDK_NVMF_TRANSPORT_H */
+8 −2
Original line number Diff line number Diff line
@@ -154,7 +154,8 @@ reset_nvmf_rdma_request(struct spdk_nvmf_rdma_request *rdma_req)
static void
test_spdk_nvmf_rdma_request_parse_sgl(void)
{
	struct spdk_nvmf_rdma_transport rtransport;
	struct spdk_nvmf_rdma_transport rtransport = {};
	struct spdk_nvmf_transport_ops ops = {};
	struct spdk_nvmf_rdma_device device;
	struct spdk_nvmf_rdma_request rdma_req = {};
	struct spdk_nvmf_rdma_recv recv;
@@ -185,6 +186,7 @@ test_spdk_nvmf_rdma_request_parse_sgl(void)

	rtransport.transport.opts = g_rdma_ut_transport_opts;
	rtransport.data_wr_pool = NULL;
	rtransport.transport.ops = &ops;

	device.attr.device_cap_flags = 0;
	sgl->keyed.key = 0xEEEE;
@@ -513,6 +515,7 @@ static void
test_spdk_nvmf_rdma_request_process(void)
{
	struct spdk_nvmf_rdma_transport rtransport = {};
	struct spdk_nvmf_transport_ops ops = {};
	struct spdk_nvmf_rdma_poll_group group = {};
	struct spdk_nvmf_rdma_poller poller = {};
	struct spdk_nvmf_rdma_device device = {};
@@ -533,6 +536,7 @@ test_spdk_nvmf_rdma_request_process(void)
	rtransport.data_wr_pool = spdk_mempool_create("test_wr_pool", 128,
				  sizeof(struct spdk_nvmf_rdma_request_data),
				  0, 0);
	rtransport.transport.ops = &ops;
	MOCK_CLEAR(spdk_iobuf_get);

	device.attr.device_cap_flags = 0;
@@ -822,7 +826,8 @@ test_nvmf_rdma_get_optimal_poll_group(void)
static void
test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)
{
	struct spdk_nvmf_rdma_transport rtransport;
	struct spdk_nvmf_rdma_transport rtransport = {};
	struct spdk_nvmf_transport_ops ops = {};
	struct spdk_nvmf_rdma_device device;
	struct spdk_nvmf_rdma_request rdma_req = {};
	struct spdk_nvmf_rdma_recv recv;
@@ -861,6 +866,7 @@ test_spdk_nvmf_rdma_request_parse_sgl_with_md(void)

	rtransport.transport.opts = g_rdma_ut_transport_opts;
	rtransport.data_wr_pool = NULL;
	rtransport.transport.ops = &ops;

	device.attr.device_cap_flags = 0;
	device.map = NULL;
Loading