Commit 6b86039f authored by Ben Walker's avatar Ben Walker Committed by Tomasz Zawadzki
Browse files

nvme/tcp: Ensure qpair is polled when it gets a writev_async completion



There was a fix for this that went into the posix layer, but the
underlying problem is the logic in the nvme/tcp transport. Attempt to
fix that instead.

Change-Id: I04dd850bb201641d441c8c1f88c7bb8ba1d09e58
Signed-off-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6751


Community-CI: Broadcom CI
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
parent 6d6959e9
Loading
Loading
Loading
Loading
+44 −0
Original line number Diff line number Diff line
@@ -68,6 +68,8 @@ struct nvme_tcp_poll_group {
	struct spdk_sock_group *sock_group;
	uint32_t completions_per_qpair;
	int64_t num_completions;

	TAILQ_HEAD(, nvme_tcp_qpair) needs_poll;
};

/* NVMe TCP qpair extensions for spdk_nvme_qpair */
@@ -105,6 +107,9 @@ struct nvme_tcp_qpair {
	uint8_t					cpda;

	enum nvme_tcp_qpair_state		state;

	TAILQ_ENTRY(nvme_tcp_qpair)		link;
	bool					needs_poll;
};

enum nvme_tcp_req_state {
@@ -301,6 +306,13 @@ nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_
	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
	struct nvme_tcp_pdu *pdu;
	int rc;
	struct nvme_tcp_poll_group *group;

	if (tqpair->needs_poll) {
		group = nvme_tcp_poll_group(qpair->poll_group);
		TAILQ_REMOVE(&group->needs_poll, tqpair, link);
		tqpair->needs_poll = false;
	}

	rc = spdk_sock_close(&tqpair->sock);

@@ -365,6 +377,18 @@ _pdu_write_done(void *cb_arg, int err)
{
	struct nvme_tcp_pdu *pdu = cb_arg;
	struct nvme_tcp_qpair *tqpair = pdu->qpair;
	struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);

	/* If there are queued requests, we assume they are queued because they are waiting
	 * for resources to be released. Those resources are almost certainly released in
	 * response to a PDU completing here. However, to attempt to make forward progress
	 * the qpair needs to be polled and we can't rely on another network event to make
	 * that happen. Add it to a list of qpairs to poll regardless of network activity
	 * here. */
	if (pgroup && !STAILQ_EMPTY(&tqpair->qpair.queued_req) && !tqpair->needs_poll) {
		TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link);
		tqpair->needs_poll = true;
	}

	TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);

@@ -1690,6 +1714,12 @@ nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_soc
	struct spdk_nvme_qpair *qpair = ctx;
	struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group);
	int32_t num_completions;
	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);

	if (tqpair->needs_poll) {
		TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link);
		tqpair->needs_poll = false;
	}

	num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair);

@@ -2046,6 +2076,8 @@ nvme_tcp_poll_group_create(void)
		return NULL;
	}

	TAILQ_INIT(&group->needs_poll);

	group->sock_group = spdk_sock_group_create(group);
	if (group->sock_group == NULL) {
		free(group);
@@ -2089,6 +2121,11 @@ nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair)
	struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group);
	struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);

	if (tqpair->needs_poll) {
		TAILQ_REMOVE(&group->needs_poll, tqpair, link);
		tqpair->needs_poll = false;
	}

	if (tqpair->sock && group->sock_group) {
		if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) {
			return -EPROTO;
@@ -2131,6 +2168,7 @@ nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *t
{
	struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
	struct spdk_nvme_qpair *qpair, *tmp_qpair;
	struct nvme_tcp_qpair *tqpair, *tmp_tqpair;

	group->completions_per_qpair = completions_per_qpair;
	group->num_completions = 0;
@@ -2141,6 +2179,12 @@ nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *t
		disconnected_qpair_cb(qpair, tgroup->group->ctx);
	}

	/* If any qpairs were marked as needing to be polled due to an asynchronous write completion
	 * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */
	TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) {
		nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock);
	}

	return group->num_completions;
}

+0 −8
Original line number Diff line number Diff line
@@ -663,7 +663,6 @@ static int
_sock_check_zcopy(struct spdk_sock *sock)
{
	struct spdk_posix_sock *psock = __posix_sock(sock);
	struct spdk_posix_sock_group_impl *group = __posix_group_impl(sock->group_impl);
	struct msghdr msgh = {};
	uint8_t buf[sizeof(struct cmsghdr) + sizeof(struct sock_extended_err)];
	ssize_t rc;
@@ -726,13 +725,6 @@ _sock_check_zcopy(struct spdk_sock *sock)
					break;
				}
			}

			/* If we reaped buffer reclaim notification and sock is not in pending_events list yet,
			 * add it now. It allows to call socket callback and process completions */
			if (found && !psock->pending_events && group) {
				psock->pending_events = true;
				TAILQ_INSERT_TAIL(&group->pending_events, psock, link);
			}
		}
	}