Commit 7a0a2800 authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

nvme: Add three APIs for disconnect, start re-enable, and poll re-enable ctrlr



The NVMe bdev module will support two features, delayed reconnect and
delete after multiple failures of reconnect to improve error recovery.

The recently added two APIs, spdk_nvme_ctrlr_reset_async() and
spdk_nvme_ctrlr_reset_poll_async(), were not good enough.

spdk_nvme_ctrlr_reset_ctx was not necessary. It had only a pointer to ctrlr.
Using a pointer to ctrlr directly saves us from undesirable malloc error
processing.

Separate spdk_nvme_ctrlr_reset_async() into spdk_nvme_ctrlr_disconnect()
and spdk_nvme_ctrlr_reconnect_async(). spdk_nvme_ctrlr_disconnect()
disconnects ctrlr including disconnecting adminq.
spdk_nvme_ctrlr_reconnect_async() moves the ctrlr state to INIT.

Then rename spdk_nvme_ctrlr_reset_poll_async() by
spdk_nvme_ctrlr_reconnect_poll_async().

Finally deprecate spdk_nvme_ctrlr_reset_async() and
spdk_nvme_ctrlr_reset_poll_async().

The following patches will change the NVMe bdev module to use these new APIs.

Signed-off-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Change-Id: Id1d6858dcdc5fc2e9db0a6ebf3f79cab4f9bbcb7
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10091


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent dbc1c48a
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -19,6 +19,13 @@ removed in SPDK 22.04, and the parameter `transport_retry_count` is added and us

An new parameter `bdev_retry_count` is added to the RPC `bdev_nvme_set_options`.

### nvme

New APIs, `spdk_nvme_ctrlr_disconnect`, `spdk_nvme_ctrlr_reconnect_async`, and
`spdk_nvme_ctrlr_reconnect_poll_async`, have been added to improve error recovery, and
the existing APIs,`spdk_nvme_ctrlr_reset_async` and `spdk_nvme_ctrlr_reset_poll_async`
were deprecated.

## v21.10

Structure `spdk_nvmf_target_opts` has been extended with new member `discovery_filter` which allows to specify
+6 −0
Original line number Diff line number Diff line
@@ -18,3 +18,9 @@ ABI cannot be removed without providing deprecation notice for at least single S

Deprecated `spdk_bdev_module_finish_done()` API, which will be removed in SPDK 22.01.
Bdev modules should use `spdk_bdev_module_fini_done()` instead.

### nvme

Deprecated `spdk_nvme_ctrlr_reset_async` and `spdk_nvme_ctrlr_reset_poll_async` APIs,
which will be removed in SPDK 22.01. `spdk_nvme_ctrlr_disconnect`, `spdk_nvme_ctrlr_reconnect_async`,
and `spdk_nvme_ctrlr_reconnect_poll_async` should be used instead.
+35 −0
Original line number Diff line number Diff line
@@ -1079,6 +1079,8 @@ struct spdk_nvme_ctrlr_reset_ctx;

/**
 * Create a context object that can be polled to perform a full hardware reset of the NVMe controller.
 * (Deprecated, please use spdk_nvme_ctrlr_disconnect(), spdk_nvme_ctrlr_reconnect_async(), and
 * spdk_nvme_ctrlr_reconnect_poll_async() instead.)
 *
 * The function will set the controller reset context on success, user must call
 * spdk_nvme_ctrlr_reset_poll_async() until it returns a value other than -EAGAIN.
@@ -1097,6 +1099,8 @@ int spdk_nvme_ctrlr_reset_async(struct spdk_nvme_ctrlr *ctrlr,

/**
 * Proceed with resetting controller associated with the controller reset context.
 * (Deprecated, please use spdk_nvme_ctrlr_disconnect(), spdk_nvme_ctrlr_reconnect_async(), and
 * spdk_nvme_ctrlr_reconnect_poll_async() instead.)
 *
 * The controller reset context is one returned from a previous call to
 * spdk_nvme_ctrlr_reset_async().  Users must call this function on the
@@ -1111,6 +1115,37 @@ int spdk_nvme_ctrlr_reset_async(struct spdk_nvme_ctrlr *ctrlr,
 */
int spdk_nvme_ctrlr_reset_poll_async(struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx);

/**
 * Disconnect the given NVMe controller.
 *
 * This function is used as the first operation of a full reset sequence of the given NVMe
 * controller. The NVMe controller is ready to reconnect after completing this function.
 *
 * \param ctrlr Opaque handle to NVMe controller.
 *
 * \return 0 on success, -EBUSY if controller is already resetting, or -ENXIO if controller
 * has been removed.
 */
int spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr);

/**
 * Start re-enabling the given NVMe controller in a full reset sequence
 *
 * \param ctrlr Opaque handle to NVMe controller.
 */
void spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr);

/**
 * Proceed with re-enabling the given NVMe controller.
 *
 * Users must call this function in a full reset sequence until it returns a value other
 * than -EAGAIN.
 *
 * \return 0 if the given NVMe controller is enabled, or -EBUSY if there are still
 * pending operations to enable it.
 */
int spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr);

/**
 * Perform a NVMe subsystem reset.
 *
+1 −1
Original line number Diff line number Diff line
@@ -35,7 +35,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk

SO_VER := 7
SO_MINOR := 0
SO_MINOR := 1

C_SRCS = nvme_ctrlr_cmd.c nvme_ctrlr.c nvme_fabric.c nvme_ns_cmd.c nvme_ns.c nvme_pcie_common.c nvme_pcie.c nvme_qpair.c nvme.c nvme_quirks.c nvme_transport.c \
	nvme_ctrlr_ocssd_cmd.c nvme_ns_ocssd_cmd.c nvme_tcp.c nvme_opal.c nvme_io_msg.c nvme_poll_group.c nvme_zns.c
+30 −6
Original line number Diff line number Diff line
@@ -1610,8 +1610,8 @@ nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr)
	}
}

static int
nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr)
int
spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr)
{
	struct spdk_nvme_qpair	*qpair;

@@ -1657,10 +1657,34 @@ nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr)

	spdk_bit_array_free(&ctrlr->free_io_qids);

	nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
	return 0;
}

void
spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr)
{
	nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);

	/* Set the state back to INIT to cause a full hardware reset. */
	nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);

	/* Return without releasing ctrlr_lock. ctrlr_lock will be released when spdk_nvme_ctrlr_reset_poll_async() returns 0. */
	/* Return without releasing ctrlr_lock. ctrlr_lock will be released when
	 * spdk_nvme_ctrlr_reset_poll_async() returns 0.
	 */
}

static int
nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr)
{
	int rc;

	rc = spdk_nvme_ctrlr_disconnect(ctrlr);
	if (rc != 0) {
		return rc;
	}

	spdk_nvme_ctrlr_reconnect_async(ctrlr);
	return 0;
}

@@ -1668,8 +1692,8 @@ nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr)
 * This function will be called when the controller is being reinitialized.
 * Note: the ctrlr_lock must be held when calling this function.
 */
static int
nvme_ctrlr_reinit_on_reset(struct spdk_nvme_ctrlr *ctrlr)
int
spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr)
{
	struct spdk_nvme_qpair	*qpair;
	int rc = 0, rc_tmp = 0;
@@ -1742,7 +1766,7 @@ nvme_ctrlr_reset_poll_async(struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx)
{
	struct spdk_nvme_ctrlr *ctrlr = ctrlr_reset_ctx->ctrlr;

	return nvme_ctrlr_reinit_on_reset(ctrlr);
	return spdk_nvme_ctrlr_reconnect_poll_async(ctrlr);
}

int
Loading