Commit 6d4f580e authored by Ziye Yang's avatar Ziye Yang Committed by Jim Harris
Browse files

nvmf/tcp: Remove spdk_nvmf_tcp_qpair_process_pending



Phenomenon:
Test case:  Using the following command to test
./test/nvmf/target/shutdown.sh --iso --transport=tcp
without this patch, it will cause coredump.
The error is that the NVMe/TCP request in data buffer
waiting list has "FREE" state.

We do not need call this function in
spdk_nvmf_tcp_qpair_flush_pdus_internal, it causes the
bug during shutdown test since it will call the function
recursively, and it does not work for the shutdown path.

There are two possible recursive calls:

(1)spdk_nvmf_tcp_qpair_flush_pdus_internal ->
spdk_nvmf_tcp_qpair_process_pending ->
spdk_nvmf_tcp_qpair_flush_pdus_internal ->
>..
(2) spdk_nvmf_tcp_qpair_flush_pdus_internal->
pdu completion (pdu->cb)
->..
-> spdk_nvmf_tcp_qpair_flush_pdus_internal.

And we need to move the processing for NVMe/TCP requests
which are waiting buffer in another function to handle
in order to avoid the complicated possbile recursive
function calls. (Previously, we found the simliar
issue in spdk_nvmf_tcp_qpair_flush_pdus_internal for
pdu sending handling)

But we cannot remove this feature,
otherwise, the initiator will hang for waiting the
I/O. So we add the same functionality in spdk_nvmf_tcp_poll_group_poll
function.

Purpose: To fix the NVMe/TCP shutdown issue.
And this patch also reables the test for shutdown and bdevio.

Change-Id: Ifa193faa3f685429dcba7557df5b311bd566e297
Signed-off-by: default avatarZiye Yang <ziye.yang@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/462658


Reviewed-by: default avatarSeth Howell <seth.howell@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent fbe8f804
Loading
Loading
Loading
Loading
+9 −30
Original line number Diff line number Diff line
@@ -288,8 +288,6 @@ struct spdk_nvmf_tcp_transport {
	TAILQ_HEAD(, spdk_nvmf_tcp_port)	ports;
};

static void spdk_nvmf_tcp_qpair_process_pending(struct spdk_nvmf_tcp_transport *ttransport,
		struct spdk_nvmf_tcp_qpair *tqpair);
static bool spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,
				      struct spdk_nvmf_tcp_req *tcp_req);
static void spdk_nvmf_tcp_handle_pending_c2h_data_queue(struct spdk_nvmf_tcp_qpair *tqpair);
@@ -782,7 +780,6 @@ spdk_nvmf_tcp_qpair_flush_pdus_internal(struct spdk_nvmf_tcp_qpair *tqpair)
	struct nvme_tcp_pdu *pdu;
	int pdu_length;
	TAILQ_HEAD(, nvme_tcp_pdu) completed_pdus_list;
	struct spdk_nvmf_tcp_transport *ttransport;

	pdu = TAILQ_FIRST(&tqpair->send_queue);

@@ -850,9 +847,6 @@ spdk_nvmf_tcp_qpair_flush_pdus_internal(struct spdk_nvmf_tcp_qpair *tqpair)
		spdk_nvmf_tcp_pdu_put(tqpair, pdu);
	}

	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
	spdk_nvmf_tcp_qpair_process_pending(ttransport, tqpair);

	return TAILQ_EMPTY(&tqpair->send_queue) ? 0 : 1;
}

@@ -2641,37 +2635,13 @@ spdk_nvmf_tcp_req_process(struct spdk_nvmf_tcp_transport *ttransport,

	return progress;
}

static void
spdk_nvmf_tcp_qpair_process_pending(struct spdk_nvmf_tcp_transport *ttransport,
				    struct spdk_nvmf_tcp_qpair *tqpair)
{
	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;

	/* Tqpair is not in a good state, so return it */
	if (spdk_unlikely(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_ERROR)) {
		return;
	}


	TAILQ_FOREACH_SAFE(tcp_req, &tqpair->group->pending_data_buf_queue, link, req_tmp) {
		if (spdk_nvmf_tcp_req_process(ttransport, tcp_req) == false) {
			break;
		}
	}
}

static void
spdk_nvmf_tcp_sock_cb(void *arg, struct spdk_sock_group *group, struct spdk_sock *sock)
{
	struct spdk_nvmf_tcp_qpair *tqpair = arg;
	struct spdk_nvmf_tcp_transport *ttransport;
	int rc;

	assert(tqpair != NULL);

	ttransport = SPDK_CONTAINEROF(tqpair->qpair.transport, struct spdk_nvmf_tcp_transport, transport);
	spdk_nvmf_tcp_qpair_process_pending(ttransport, tqpair);
	rc = spdk_nvmf_tcp_sock_process(tqpair);

	/* check the following two factors:
@@ -2787,6 +2757,9 @@ spdk_nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
{
	struct spdk_nvmf_tcp_poll_group *tgroup;
	int rc;
	struct spdk_nvmf_tcp_req *tcp_req, *req_tmp;
	struct spdk_nvmf_tcp_transport *ttransport = SPDK_CONTAINEROF(group->transport,
			struct spdk_nvmf_tcp_transport, transport);

	tgroup = SPDK_CONTAINEROF(group, struct spdk_nvmf_tcp_poll_group, group);

@@ -2794,6 +2767,12 @@ spdk_nvmf_tcp_poll_group_poll(struct spdk_nvmf_transport_poll_group *group)
		return 0;
	}

	TAILQ_FOREACH_SAFE(tcp_req, &tgroup->pending_data_buf_queue, link, req_tmp) {
		if (spdk_nvmf_tcp_req_process(ttransport, tcp_req) == false) {
			break;
		}
	}

	rc = spdk_sock_group_poll(tgroup->sock_group);
	if (rc < 0) {
		SPDK_ERRLOG("Failed to poll sock_group=%p\n", tgroup->sock_group);
+2 −5
Original line number Diff line number Diff line
@@ -36,11 +36,8 @@ fi
run_test suite test/nvmf/target/nmic.sh $TEST_ARGS
run_test suite test/nvmf/target/rpc.sh $TEST_ARGS
run_test suite test/nvmf/target/fio.sh $TEST_ARGS
# bdevio currently fails with tcp transport - see issue #808
if [ "$TEST_TRANSPORT" == "rdma" ]; then
run_test suite test/nvmf/target/shutdown.sh $TEST_ARGS
run_test suite test/nvmf/target/bdevio.sh $TEST_ARGS
fi

timing_enter host