Commit 8fc9ac7b authored by JinYu's avatar JinYu Committed by Changpeng Liu
Browse files

nvmf: complete all I/Os before changing sgroup to PAUSED



For the nvme device, I/Os are completed asynchronously. So we
need to check the outstanding I/Os before putting IO channel
when we hot remove the device. We should be sure that all the
I/Os have been completed when we change the sgroup->state to
PAUSED, so that we can update the subsystem.

Fix #615 #755

Change-Id: I0f727a7bd0734fa9be1193e1f574892ab3e68b55
Signed-off-by: default avatarJinYu <jin.yu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/452038


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
parent 17e0283d
Loading
Loading
Loading
Loading
+37 −2
Original line number Diff line number Diff line
@@ -516,6 +516,8 @@ spdk_nvmf_ctrlr_connect(struct spdk_nvmf_request *req)
	}

	if ((subsystem->state == SPDK_NVMF_SUBSYSTEM_INACTIVE) ||
	    (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSING) ||
	    (subsystem->state == SPDK_NVMF_SUBSYSTEM_PAUSED) ||
	    (subsystem->state == SPDK_NVMF_SUBSYSTEM_DEACTIVATING)) {
		SPDK_ERRLOG("Subsystem '%s' is not ready\n", subnqn);
		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
@@ -1232,6 +1234,7 @@ spdk_nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
{
	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;
	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
	struct spdk_nvmf_subsystem_poll_group *sgroup;

	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Async Event Request\n");

@@ -1257,6 +1260,10 @@ spdk_nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
	}

	/* AER cmd is an exception */
	sgroup = &req->qpair->group->sgroups[ctrlr->subsys->id];
	sgroup->io_outstanding--;

	ctrlr->aer_req = req;
	return SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS;
}
@@ -2396,6 +2403,8 @@ spdk_nvmf_ctrlr_process_io_cmd(struct spdk_nvmf_request *req)
		return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
	}

	/* scan-build falsely reporting dereference of null pointer */
	assert(group != NULL && group->sgroups != NULL);
	ns_info = &group->sgroups[ctrlr->subsys->id].ns_info[nsid - 1];
	if (nvmf_ns_reservation_request_check(ns_info, ctrlr, req)) {
		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Reservation Conflict for nsid %u, opcode %u\n",
@@ -2462,12 +2471,16 @@ spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
{
	struct spdk_nvme_cpl *rsp = &req->rsp->nvme_cpl;
	struct spdk_nvmf_qpair *qpair;
	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;

	rsp->sqid = 0;
	rsp->status.p = 0;
	rsp->cid = req->cmd->nvme_cmd.cid;

	qpair = req->qpair;
	if (qpair->ctrlr) {
		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
	}

	SPDK_DEBUGLOG(SPDK_LOG_NVMF,
		      "cpl: cid=%u cdw0=0x%08x rsvd1=%u status=0x%04x\n",
@@ -2479,6 +2492,19 @@ spdk_nvmf_request_complete(struct spdk_nvmf_request *req)
		SPDK_ERRLOG("Transport request completion error!\n");
	}

	/* AER cmd and fabric connect are exceptions */
	if (sgroup != NULL && qpair->ctrlr->aer_req != req &&
	    !(req->cmd->nvmf_cmd.opcode == SPDK_NVME_OPC_FABRIC &&
	      req->cmd->nvmf_cmd.fctype == SPDK_NVMF_FABRIC_COMMAND_CONNECT)) {
		assert(sgroup->io_outstanding > 0);
		sgroup->io_outstanding--;
		if (sgroup->state == SPDK_NVMF_SUBSYSTEM_PAUSING &&
		    sgroup->io_outstanding == 0) {
			sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
			sgroup->cb_fn(sgroup->cb_arg, 0);
		}
	}

	spdk_nvmf_qpair_request_cleanup(qpair);

	return 0;
@@ -2533,27 +2559,36 @@ spdk_nvmf_request_exec(struct spdk_nvmf_request *req)
{
	struct spdk_nvmf_qpair *qpair = req->qpair;
	spdk_nvmf_request_exec_status status;
	struct spdk_nvmf_subsystem_poll_group *sgroup = NULL;

	nvmf_trace_command(req->cmd, spdk_nvmf_qpair_is_admin_queue(qpair));

	if (qpair->ctrlr) {
		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
	}

	if (qpair->state != SPDK_NVMF_QPAIR_ACTIVE) {
		req->rsp->nvme_cpl.status.sct = SPDK_NVME_SCT_GENERIC;
		req->rsp->nvme_cpl.status.sc = SPDK_NVME_SC_COMMAND_SEQUENCE_ERROR;
		/* Place the request on the outstanding list so we can keep track of it */
		TAILQ_INSERT_TAIL(&qpair->outstanding, req, link);
		/* Still increment io_outstanding because request_complete decrements it */
		if (sgroup != NULL) {
			sgroup->io_outstanding++;
		}
		spdk_nvmf_request_complete(req);
		return;
	}

	/* Check if the subsystem is paused (if there is a subsystem) */
	if (qpair->ctrlr) {
		struct spdk_nvmf_subsystem_poll_group *sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
	if (sgroup != NULL) {
		if (sgroup->state != SPDK_NVMF_SUBSYSTEM_ACTIVE) {
			/* The subsystem is not currently active. Queue this request. */
			TAILQ_INSERT_TAIL(&sgroup->queued, req, link);
			return;
		}

		sgroup->io_outstanding++;
	}

	/* Place the request on the outstanding list so we can keep track of it */
+9 −1
Original line number Diff line number Diff line
@@ -1142,7 +1142,15 @@ spdk_nvmf_poll_group_pause_subsystem(struct spdk_nvmf_poll_group *group,
	}

	assert(sgroup->state == SPDK_NVMF_SUBSYSTEM_ACTIVE);
	/* TODO: This currently does not quiesce I/O */
	sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSING;

	if (sgroup->io_outstanding > 0) {
		sgroup->cb_fn = cb_fn;
		sgroup->cb_arg = cb_arg;
		return;
	}

	assert(sgroup->io_outstanding == 0);
	sgroup->state = SPDK_NVMF_SUBSYSTEM_PAUSED;
fini:
	if (cb_fn) {
+6 −1
Original line number Diff line number Diff line
@@ -127,11 +127,17 @@ struct spdk_nvmf_subsystem_pg_ns_info {
	struct spdk_uuid		reg_hostid[SPDK_NVMF_MAX_NUM_REGISTRANTS];
};

typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status);

struct spdk_nvmf_subsystem_poll_group {
	/* Array of namespace information for each namespace indexed by nsid - 1 */
	struct spdk_nvmf_subsystem_pg_ns_info	*ns_info;
	uint32_t				num_ns;

	uint64_t				io_outstanding;
	spdk_nvmf_poll_group_mod_done		cb_fn;
	void					*cb_arg;

	enum spdk_nvmf_subsystem_state		state;

	TAILQ_HEAD(, spdk_nvmf_request)		queued;
@@ -319,7 +325,6 @@ struct spdk_nvmf_subsystem {
	TAILQ_ENTRY(spdk_nvmf_subsystem)	entries;
};

typedef void(*spdk_nvmf_poll_group_mod_done)(void *cb_arg, int status);

struct spdk_nvmf_transport *spdk_nvmf_tgt_get_transport(struct spdk_nvmf_tgt *tgt,
		enum spdk_nvme_transport_type);
+7 −0
Original line number Diff line number Diff line
@@ -279,6 +279,7 @@ test_connect(void)
{
	struct spdk_nvmf_fabric_connect_data connect_data;
	struct spdk_nvmf_poll_group group;
	struct spdk_nvmf_subsystem_poll_group *sgroups;
	struct spdk_nvmf_transport transport;
	struct spdk_nvmf_subsystem subsystem;
	struct spdk_nvmf_request req;
@@ -339,6 +340,10 @@ test_connect(void)
	subsystem.state = SPDK_NVMF_SUBSYSTEM_ACTIVE;
	snprintf(subsystem.subnqn, sizeof(subsystem.subnqn), "%s", subnqn);

	sgroups = calloc(subsystem.id + 1, sizeof(struct spdk_nvmf_subsystem_poll_group));
	sgroups[subsystem.id].io_outstanding = 5;
	group.sgroups = sgroups;

	memset(&cmd, 0, sizeof(cmd));
	cmd.connect_cmd.opcode = SPDK_NVME_OPC_FABRIC;
	cmd.connect_cmd.cid = 1;
@@ -643,6 +648,7 @@ test_connect(void)
	MOCK_CLEAR(spdk_nvmf_poll_group_create);

	spdk_bit_array_free(&ctrlr.qpair_mask);
	free(sgroups);
}

static void
@@ -1108,6 +1114,7 @@ test_reservation_notification_log_page(void)
	ctrlr.aer_req = &req;
	req.qpair = &qpair;
	TAILQ_INIT(&qpair.outstanding);
	qpair.ctrlr = NULL;
	qpair.state = SPDK_NVMF_QPAIR_ACTIVE;
	TAILQ_INSERT_TAIL(&qpair.outstanding, &req, link);