Commit 312a9d60 authored by Ben Walker's avatar Ben Walker Committed by Tomasz Zawadzki
Browse files

nvmf: Subsystem pauses only pause admin queues



Additionally, the user can specify a namespace to also pause during the
operation.

This allows for the management of hosts, listeners, and the addition of
namespaces all while I/O to other namespaces is occurring. Pausing a
specific namespace also allows for the removal of that namespace without
impacting I/O to other namespaces in the subsystem.

Change-Id: I364336df16df92fe2069114674cb7a68076de6fb
Signed-off-by: default avatarBen Walker <benjamin.walker@intel.com>
Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/4997


Community-CI: Broadcom CI
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
parent 7665710c
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -154,6 +154,13 @@ that allowed transport specific options within subsystem.

The `trsvcid` in `nvmf_subsystem_add_listener` RPC is now optional.

Pausing a subsystem now only pauses all admin queues. This allows for the
management of hosts and listeners, plus the addition of namespaces without a
full subsystem pause. Additionally, the target now allows for pausing
individual namespaces within a subsystem. To remove a namespace from a
subsystem, only the specific namespace must be paused. I/O will continue to
other namespaces while these operations execute.

### rpc

An new optional parameter `wait` was added to the RPC `iscsi_create_portal_group`,
+11 −0
Original line number Diff line number Diff line
@@ -447,7 +447,12 @@ int spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem,
/**
 * Transition an NVMe-oF subsystem from Active to Paused state.
 *
 * In a paused state, all admin queues are frozen across the whole subsystem. If
 * a namespace ID is provided, all commands to that namespace are quiesced and incoming
 * commands for that namespace are queued until the subsystem is resumed.
 *
 * \param subsystem The NVMe-oF subsystem.
 * \param nsid The namespace to pause. If 0, pause no namespaces.
 * \param cb_fn A function that will be called once the subsystem has changed state.
 * \param cb_arg Argument passed to cb_fn.
 *
@@ -455,12 +460,15 @@ int spdk_nvmf_subsystem_stop(struct spdk_nvmf_subsystem *subsystem,
 * be called on success.
 */
int spdk_nvmf_subsystem_pause(struct spdk_nvmf_subsystem *subsystem,
			      uint32_t nsid,
			      spdk_nvmf_subsystem_state_change_done cb_fn,
			      void *cb_arg);

/**
 * Transition an NVMe-oF subsystem from Paused to Active state.
 *
 * This resumes the entire subsystem, including any paused namespaces.
 *
 * \param subsystem The NVMe-oF subsystem.
 * \param cb_fn A function that will be called once the subsystem has changed state.
 * \param cb_arg Argument passed to cb_fn.
@@ -613,6 +621,7 @@ const char *spdk_nvmf_host_get_nqn(const struct spdk_nvmf_host *host);
 * This does not start the listener. Use spdk_nvmf_tgt_listen() for that.
 *
 * May only be performed on subsystems in the PAUSED or INACTIVE states.
 * No namespaces are required to be paused.
 *
 * \param subsystem Subsystem to add listener to.
 * \param trid The address to accept connections from.
@@ -632,6 +641,7 @@ void spdk_nvmf_subsystem_add_listener(struct spdk_nvmf_subsystem *subsystem,
 * spdk_nvmf_tgt_stop_listen().
 *
 * May only be performed on subsystems in the PAUSED or INACTIVE states.
 * No namespaces are required to be paused.
 *
 * \param subsystem Subsystem to remove listener from.
 * \param trid The address to no longer accept connections from.
@@ -801,6 +811,7 @@ uint32_t spdk_nvmf_subsystem_add_ns_ext(struct spdk_nvmf_subsystem *subsystem,
 * Remove a namespace from a subsytem.
 *
 * May only be performed on subsystems in the PAUSED or INACTIVE states.
 * Additionally, the namespace must be paused.
 *
 * \param subsystem Subsystem the namespace belong to.
 * \param nsid Namespace ID to be removed.
+83 −24
Original line number Diff line number Diff line
@@ -771,7 +771,7 @@ spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
		goto out;
	}

	sgroup->io_outstanding++;
	sgroup->mgmt_io_outstanding++;
	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);

	status = _nvmf_ctrlr_connect(req);
@@ -1672,7 +1672,7 @@ nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
	/* AER cmd is an exception */
	sgroup = &req->qpair->group->sgroups[ctrlr->subsys->id];
	assert(sgroup != NULL);
	sgroup->io_outstanding--;
	sgroup->mgmt_io_outstanding--;

	/* Four asynchronous events are supported for now */
	if (ctrlr->nr_aer_reqs >= NVMF_MAX_ASYNC_EVENTS) {
@@ -3463,7 +3463,10 @@ _nvmf_request_complete(void *ctx)
	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
	struct spdk_nvmf_qpair *qpair;
	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
	bool is_aer = false;
	uint32_t nsid;
	bool paused;

	rsp->sqid = 0;
	rsp->status.p = 0;
@@ -3489,10 +3492,33 @@ _nvmf_request_complete(void *ctx)

	/* AER cmd is an exception */
	if (sgroup && !is_aer) {
		assert(sgroup->io_outstanding > 0);
		sgroup->io_outstanding--;
		if (sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
		    sgroup->io_outstanding == 0) {
		if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC ||
				  nvmf_qpair_is_admin_queue(qpair))) {
			assert(sgroup->mgmt_io_outstanding > 0);
			sgroup->mgmt_io_outstanding--;
		} else {
			nsid = req->cmd->nvme_cmd.nsid;

			/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
			if (spdk_likely(nsid - 1 < sgroup->num_ns)) {
				sgroup->ns_info[nsid - 1].io_outstanding--;
			}
		}

		if (spdk_unlikely(sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
				  sgroup->mgmt_io_outstanding == 0)) {
			paused = true;
			for (nsid = 0; nsid < sgroup->num_ns; nsid++) {
				ns_info = &sgroup->ns_info[nsid];

				if (ns_info->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
				    ns_info->io_outstanding > 0) {
					paused = false;
					break;
				}
			}

			if (paused) {
				sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
				sgroup->cb_fn(sgroup->cb_arg, 0);
				sgroup->cb_fn = NULL;
@@ -3500,6 +3526,8 @@ _nvmf_request_complete(void *ctx)
			}
		}

	}

	nvmf_qpair_request_cleanup(qpair);
}

@@ -3532,7 +3560,7 @@ spdk_nvmf_request_exec_fabrics(struct spdk_nvmf_request *req)
	}

	assert(sgroup != NULL);
	sgroup->io_outstanding++;
	sgroup->mgmt_io_outstanding++;

	/* Place the request on the outstanding list so we can keep track of it */
	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
@@ -3550,7 +3578,9 @@ spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
{
	struct spdk_nvmf_qpair *qpair = req->qpair;
	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;
	struct spdk_nvmf_subsystem_pg_ns_info *ns_info;
	enum spdk_nvmf_request_exec_status status;
	uint32_t nsid;

	if (qpair->ctrlr) {
		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
@@ -3561,22 +3591,55 @@ spdk_nvmf_request_exec(struct spdk_nvmf_request *req)

	/* Check if the subsystem is paused (if there is a subsystem) */
	if (sgroup != NULL) {
		if (spdk_unlikely(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC ||
				  nvmf_qpair_is_admin_queue(qpair))) {
			if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
				/* The subsystem is not currently active. Queue this request. */
				TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
				return;
			}
			sgroup->mgmt_io_outstanding++;
		} else {
			nsid = req->cmd->nvme_cmd.nsid;

			/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
			if (spdk_unlikely(nsid - 1 >= sgroup->num_ns)) {
				req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
				req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
				req->rsp->nvme_cpl.status.dnr = 1;
				TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
				_nvmf_request_complete(req);
				return;
			}

			ns_info = &sgroup->ns_info[nsid - 1];
			if (ns_info->channel == NULL) {
				/* This can can happen if host sends I/O to a namespace that is
				 * in the process of being added, but before the full addition
				 * process is complete.  Report invalid namespace in that case.
				 */
				req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
				req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_INVALID_NAMESPACE_OR_FORMAT;
				req->rsp->nvme_cpl.status.dnr = 1;
				TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
				ns_info->io_outstanding++;
				_nvmf_request_complete(req);
				return;
			}

			if (ns_info->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
				/* The namespace is not currently active. Queue this request. */
				TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
				return;
			}
			ns_info->io_outstanding++;
		}
	}

	if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
		/* Place the request on the outstanding list so we can keep track of it */
		TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
		/* Still increment io_outstanding because request_complete decrements it */
		if (sgroup != NULL) {
			sgroup->io_outstanding++;
		}
		_nvmf_request_complete(req);
		return;
	}
@@ -3585,10 +3648,6 @@ spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
		spdk_nvme_print_command(qpair->qid, &req->cmd->nvme_cmd);
	}

	if (sgroup) {
		sgroup->io_outstanding++;
	}

	/* Place the request on the outstanding list so we can keep track of it */
	TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);

+1 −0
Original line number Diff line number Diff line
@@ -3151,6 +3151,7 @@ nvmf_fc_adm_add_rem_nport_listener(struct spdk_nvmf_fc_nport *nport, bool add)
						    ctx->trid.traddr);
					free(ctx);
				} else if (spdk_nvmf_subsystem_pause(subsystem,
								     0,
								     nvmf_fc_adm_subsystem_paused_cb,
								     ctx)) {
					SPDK_ERRLOG("Failed to pause subsystem: %s\n",
+34 −2
Original line number Diff line number Diff line
@@ -1308,6 +1308,7 @@ nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
{
	int rc = 0;
	struct spdk_nvmf_subsystem_poll_group *sgroup = &group->sgroups[subsystem->id];
	uint32_t i;

	TAILQ_INIT(&sgroup->queued);

@@ -1318,6 +1319,11 @@ nvmf_poll_group_add_subsystem(struct spdk_nvmf_poll_group *group,
	}

	sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;

	for (i = 0; i < sgroup->num_ns; i++) {
		sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
	}

fini:
	if (cb_fn) {
		cb_fn(cb_arg, rc);
@@ -1401,6 +1407,7 @@ nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
	struct spdk_nvmf_subsystem_poll_group *sgroup;
	struct nvmf_qpair_disconnect_many_ctx *ctx;
	int rc = 0;
	uint32_t i;

	ctx = calloc(1, sizeof(struct nvmf_qpair_disconnect_many_ctx));

@@ -1417,6 +1424,10 @@ nvmf_poll_group_remove_subsystem(struct spdk_nvmf_poll_group *group,
	sgroup = &group->sgroups[subsystem->id];
	sgroup->state = SPDK_NVMF_SUBSYSTEM_INACTIVE;

	for (i = 0; i < sgroup->num_ns; i++) {
		sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_INACTIVE;
	}

	TAILQ_FOREACH(qpair, &group->qpairs, link) {
		if ((qpair->ctrlr != NULL) && (qpair->ctrlr->subsys == subsystem)) {
			break;
@@ -1445,9 +1456,11 @@ fini:
void
nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
				struct spdk_nvmf_subsystem *subsystem,
				uint32_t nsid,
				spdk_nvmf_poll_group_mod_done cb_fn, void *cb_arg)
{
	struct spdk_nvmf_subsystem_poll_group *sgroup;
	struct spdk_nvmf_subsystem_pg_ns_info *ns_info = NULL;
	int rc = 0;

	if (subsystem->id >= group->num_sgroups) {
@@ -1466,7 +1479,13 @@ nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
	}
	sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSING;

	if (sgroup->io_outstanding > 0) {
	/* NOTE: This implicitly also checks for 0, since 0 - 1 wraps around to UINT32_MAX. */
	if (nsid - 1 < sgroup->num_ns) {
		ns_info  = &sgroup->ns_info[nsid - 1];
		ns_info->state = SPDK_NVMF_SUBSYSTEM_PAUSING;
	}

	if (sgroup->mgmt_io_outstanding > 0) {
		assert(sgroup->cb_fn == NULL);
		sgroup->cb_fn = cb_fn;
		assert(sgroup->cb_arg == NULL);
@@ -1474,7 +1493,15 @@ nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
		return;
	}

	assert(sgroup->io_outstanding == 0);
	if (ns_info != NULL && ns_info->io_outstanding > 0) {
		assert(sgroup->cb_fn == NULL);
		sgroup->cb_fn = cb_fn;
		assert(sgroup->cb_arg == NULL);
		sgroup->cb_arg = cb_arg;
		return;
	}

	assert(sgroup->mgmt_io_outstanding == 0);
	sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
fini:
	if (cb_fn) {
@@ -1490,6 +1517,7 @@ nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
	struct spdk_nvmf_request *req, *tmp;
	struct spdk_nvmf_subsystem_poll_group *sgroup;
	int rc = 0;
	uint32_t i;

	if (subsystem->id >= group->num_sgroups) {
		rc = -1;
@@ -1507,6 +1535,10 @@ nvmf_poll_group_resume_subsystem(struct spdk_nvmf_poll_group *group,
		goto fini;
	}

	for (i = 0; i < sgroup->num_ns; i++) {
		sgroup->ns_info[i].state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
	}

	sgroup->state = SPDK_NVMF_SUBSYSTEM_ACTIVE;

	/* Release all queued requests */
Loading