sock: fix premature completion of zcopy req (a1f20e78) · Commits · Public Repositories / spdk

CHANGELOG.md

+5 −0

Original line number	Diff line number	Diff line
		@@ -67,6 +67,11 @@ in milliseconds.

		Add `spdk_reduce_vol_get_info()` to get the information for the compressed volume.

		### sock

		A new internal field was added to `spdk_sock_request`, increasing its size and moving `iovcnt` and
		user provided `iov[]` offsets by 8 bytes, which breaks ABI.

		### thread

		Added `spdk_interrupt_register_ext()` API which can receive `spdk_event_handler_opts` structure.

include/spdk/sock.h

+7 −6

Original line number	Diff line number	Diff line
		@@ -44,9 +44,7 @@ struct spdk_sock_request {
		void (cb_fn)(void cb_arg, int err);
		void *cb_arg;

		/**
		* These fields are used by the socket layer and should not be modified
		*/
		/* These fields are used by the socket layer and should not be modified. */
		struct __sock_request_internal {
		TAILQ_ENTRY(spdk_sock_request) link;

		@@ -59,15 +57,18 @@ struct spdk_sock_request {

		uint32_t offset;

		/* Indicate if the whole req or part of it is sent with zerocopy */
		bool is_zcopy;
		/* Last zero-copy sendmsg index. */
		uint32_t zcopy_idx;

		/* Indicate if the whole req or part of it is pending zerocopy completion. */
		bool pending_zcopy;
		} internal;

		int iovcnt;
		/* struct iovec iov[]; */
		};

		SPDK_STATIC_ASSERT(sizeof(struct spdk_sock_request) == 56, "Incorrect size.");
		SPDK_STATIC_ASSERT(sizeof(struct spdk_sock_request) == 64, "Incorrect size.");

		#define SPDK_SOCK_REQUEST_IOV(req, i) ((struct iovec )(((uint8_t )req + sizeof(struct spdk_sock_request)) + (sizeof(struct iovec) * i)))

include/spdk_internal/sock_module.h

+2 −1

Original line number	Diff line number	Diff line
		@@ -180,7 +180,8 @@ spdk_sock_request_complete(struct spdk_sock sock, struct spdk_sock_request req

		spdk_trace_record(TRACE_SOCK_REQ_COMPLETE, 0, 0, (uintptr_t)req, (uintptr_t)req->cb_arg);
		req->internal.offset = 0;
		req->internal.is_zcopy = 0;
		req->internal.zcopy_idx = 0;
		req->internal.pending_zcopy = false;

		closed = sock->flags.closed;
		sock->cb_cnt++;

module/sock/posix/posix.c

+24 −9

Original line number	Diff line number	Diff line
		@@ -1172,13 +1172,14 @@ _sock_check_zcopy(struct spdk_sock *sock)
		while (true) {
		found = false;
		TAILQ_FOREACH_SAFE(req, &sock->pending_reqs, internal.link, treq) {
		if (!req->internal.is_zcopy) {
		/* This wasn't a zcopy request. It was just waiting in line to complete */
		if (!req->internal.pending_zcopy) {
		/* This wasn't a zcopy request. It was just waiting in line
		* to complete. */
		rc = spdk_sock_request_put(sock, req, 0);
		if (rc < 0) {
		return rc;
		}
		} else if (req->internal.offset == idx) {
		} else if (req->internal.zcopy_idx == idx) {
		found = true;
		rc = spdk_sock_request_put(sock, req, 0);
		if (rc < 0) {
		@@ -1195,6 +1196,17 @@ _sock_check_zcopy(struct spdk_sock *sock)

		idx++;
		}

		/* If the req is sent partially (still queued) and we just received its zcopy
		* notification, next chunk may be sent without zcopy and should result in the req
		* completion if it is the last chunk. Clear the pending flag to allow it.
		* Checking the first queued req and the last index is enough, because only one req
		* can be partially sent and it is the last one we can get notification for. */
		req = TAILQ_FIRST(&sock->queued_reqs);
		if (req && req->internal.pending_zcopy &&
		req->internal.zcopy_idx == serr->ee_data) {
		req->internal.pending_zcopy = false;
		}
		}

		return 0;
		@@ -1268,8 +1280,12 @@ _sock_flush(struct spdk_sock *sock)
		while (req) {
		offset = req->internal.offset;

		/* req->internal.is_zcopy is true when the whole req or part of it is sent with zerocopy */
		req->internal.is_zcopy = is_zcopy;
		if (is_zcopy) {
		/* Cache sendmsg_idx because full request might not be handled and next
		* chunk may be sent without zero copy. */
		req->internal.pending_zcopy = true;
		req->internal.zcopy_idx = psock->sendmsg_idx;
		}

		for (i = 0; i < req->iovcnt; i++) {
		/* Advance by the offset first */
		@@ -1295,16 +1311,15 @@ _sock_flush(struct spdk_sock *sock)
		/* Handled a full request. */
		spdk_sock_request_pend(sock, req);

		if (!req->internal.is_zcopy && req == TAILQ_FIRST(&sock->pending_reqs)) {
		/* We can't put the req if zero-copy is not completed or it is not first
		* in the line. */
		if (!req->internal.pending_zcopy && req == TAILQ_FIRST(&sock->pending_reqs)) {
		/* The sendmsg syscall above isn't currently asynchronous,
		* so it's already done. */
		retval = spdk_sock_request_put(sock, req, 0);
		if (retval) {
		break;
		}
		} else {
		/* Re-use the offset field to hold the sendmsg call index. */
		req->internal.offset = psock->sendmsg_idx;
		}

		if (rc == 0) {

module/sock/uring/uring.c

+23 −9

Original line number	Diff line number	Diff line
		@@ -969,8 +969,12 @@ sock_complete_write_reqs(struct spdk_sock *_sock, ssize_t rc, bool is_zcopy)
		/* Consume the requests that were actually written */
		req = TAILQ_FIRST(&_sock->queued_reqs);
		while (req) {
		/* req->internal.is_zcopy is true when the whole req or part of it is sent with zerocopy */
		req->internal.is_zcopy = is_zcopy;
		if (is_zcopy) {
		/* Cache sendmsg_idx because full request might not be handled and next
		* chunk may be sent without zero copy. */
		req->internal.pending_zcopy = true;
		req->internal.zcopy_idx = sock->sendmsg_idx;
		}

		rc = sock_request_advance_offset(req, rc);
		if (rc < 0) {
		@@ -981,14 +985,12 @@ sock_complete_write_reqs(struct spdk_sock *_sock, ssize_t rc, bool is_zcopy)
		/* Handled a full request. */
		spdk_sock_request_pend(_sock, req);

		if (!req->internal.is_zcopy && req == TAILQ_FIRST(&_sock->pending_reqs)) {
		if (!req->internal.pending_zcopy &&
		req == TAILQ_FIRST(&_sock->pending_reqs)) {
		retval = spdk_sock_request_put(_sock, req, 0);
		if (retval) {
		return retval;
		}
		} else {
		/* Re-use the offset field to hold the sendmsg call index. */
		req->internal.offset = sock->sendmsg_idx;
		}

		if (rc == 0) {
		@@ -1048,13 +1050,14 @@ _sock_check_zcopy(struct spdk_sock *_sock, int status)
		while (true) {
		found = false;
		TAILQ_FOREACH_SAFE(req, &_sock->pending_reqs, internal.link, treq) {
		if (!req->internal.is_zcopy) {
		/* This wasn't a zcopy request. It was just waiting in line to complete */
		if (!req->internal.pending_zcopy) {
		/* This wasn't a zcopy request. It was just waiting in line
		* to complete. */
		rc = spdk_sock_request_put(_sock, req, 0);
		if (rc < 0) {
		return rc;
		}
		} else if (req->internal.offset == idx) {
		} else if (req->internal.zcopy_idx == idx) {
		found = true;
		rc = spdk_sock_request_put(_sock, req, 0);
		if (rc < 0) {
		@@ -1072,6 +1075,17 @@ _sock_check_zcopy(struct spdk_sock *_sock, int status)
		idx++;
		}

		/* If the req is sent partially (still queued) and we just received its zcopy
		* notification, next chunk may be sent without zcopy and should result in the req
		* completion if it is the last chunk. Clear the pending flag to allow it.
		* Checking the first queued req and the last index is enough, because only one req
		* can be partially sent and it is the last one we can get notification for. */
		req = TAILQ_FIRST(&_sock->queued_reqs);
		if (req && req->internal.pending_zcopy &&
		req->internal.zcopy_idx == serr->ee_data) {
		req->internal.pending_zcopy = false;
		}

		return 0;
		}