Commit bb4be4dc authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

bdev/nvme: Ctrlr reset establishes connection for qpairs sequentially



A full controller reset sequence disconnects qpairs sequentially, but
connects qpairs in parallel. It moves to the next ctrlr_channel after
starting connection establishment for the previous ctrlr_channel.
Conneciton establishment for qpairs are done in parallel.

However, this design caused one very difficult race issue. As
highlighted in the previous patch, a full controller reset sequence
returned success but connection establishment was failed. It will be
better if we confirm connection establishment one by one and return
success after all connection establishments are actually done.

Controller reset sequence does not require performance. This will reduce
resource contention for connection establishment on the target side.

Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: Ifa2836f06865bcce6bc528719a51119522c8f43b
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/18252


Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 6cdc0f25
Loading
Loading
Loading
Loading
+53 −7
Original line number Diff line number Diff line
@@ -1501,6 +1501,7 @@ bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_
	struct nvme_poll_group *group = poll_group_ctx;
	struct nvme_qpair *nvme_qpair;
	struct nvme_ctrlr_channel *ctrlr_ch;
	int status;

	nvme_qpair = nvme_poll_group_get_qpair(group, qpair);
	if (nvme_qpair == NULL) {
@@ -1518,12 +1519,20 @@ bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_

	if (ctrlr_ch != NULL) {
		if (ctrlr_ch->reset_iter != NULL) {
			/* If we are already in a full reset sequence, we do not have
			 * to restart it. Just move to the next ctrlr_channel.
			 */
			/* We are in a full reset sequence. */
			if (ctrlr_ch->connect_poller != NULL) {
				/* qpair was failed to connect. Abort the reset sequence. */
				SPDK_DEBUGLOG(bdev_nvme, "qpair %p was failed to connect. abort the reset ctrlr sequence.\n",
					      qpair);
				spdk_poller_unregister(&ctrlr_ch->connect_poller);
				status = -1;
			} else {
				/* qpair was completed to disconnect. Just move to the next ctrlr_channel. */
				SPDK_DEBUGLOG(bdev_nvme, "qpair %p was disconnected and freed in a reset ctrlr sequence.\n",
					      qpair);
			spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
				status = 0;
			}
			spdk_for_each_channel_continue(ctrlr_ch->reset_iter, status);
			ctrlr_ch->reset_iter = NULL;
		} else {
			/* qpair was disconnected unexpectedly. Reset controller for recovery. */
@@ -2098,6 +2107,33 @@ bdev_nvme_reset_create_qpairs_done(struct spdk_io_channel_iter *i, int status)
	}
}

static int
bdev_nvme_reset_check_qpair_connected(void *ctx)
{
	struct nvme_ctrlr_channel *ctrlr_ch = ctx;

	if (ctrlr_ch->reset_iter == NULL) {
		/* qpair was already failed to connect and the reset sequence is being aborted. */
		assert(ctrlr_ch->connect_poller == NULL);
		assert(ctrlr_ch->qpair->qpair == NULL);
		return SPDK_POLLER_BUSY;
	}

	assert(ctrlr_ch->qpair->qpair != NULL);

	if (!spdk_nvme_qpair_is_connected(ctrlr_ch->qpair->qpair)) {
		return SPDK_POLLER_BUSY;
	}

	spdk_poller_unregister(&ctrlr_ch->connect_poller);

	/* qpair was completed to connect. Move to the next ctrlr_channel */
	spdk_for_each_channel_continue(ctrlr_ch->reset_iter, 0);
	ctrlr_ch->reset_iter = NULL;

	return SPDK_POLLER_BUSY;
}

static void
bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i)
{
@@ -2106,9 +2142,19 @@ bdev_nvme_reset_create_qpair(struct spdk_io_channel_iter *i)
	int rc;

	rc = bdev_nvme_create_qpair(ctrlr_ch->qpair);
	if (rc == 0) {
		ctrlr_ch->connect_poller = SPDK_POLLER_REGISTER(bdev_nvme_reset_check_qpair_connected,
					   ctrlr_ch, 0);

		/* The current full reset sequence will move to the next
		 * ctrlr_channel after the qpair is actually connected.
		 */
		assert(ctrlr_ch->reset_iter == NULL);
		ctrlr_ch->reset_iter = i;
	} else {
		spdk_for_each_channel_continue(i, rc);
	}
}

static int
bdev_nvme_reconnect_ctrlr_poll(void *arg)
+1 −0
Original line number Diff line number Diff line
@@ -198,6 +198,7 @@ struct nvme_ctrlr_channel {
	TAILQ_HEAD(, spdk_bdev_io)	pending_resets;

	struct spdk_io_channel_iter	*reset_iter;
	struct spdk_poller		*connect_poller;
};

struct nvme_io_path {
+6 −0
Original line number Diff line number Diff line
@@ -1136,6 +1136,12 @@ spdk_nvme_qpair_get_failure_reason(struct spdk_nvme_qpair *qpair)
	return qpair->failure_reason;
}

bool
spdk_nvme_qpair_is_connected(struct spdk_nvme_qpair *qpair)
{
	return qpair->is_connected;
}

int32_t
spdk_nvme_qpair_process_completions(struct spdk_nvme_qpair *qpair,
				    uint32_t max_completions)