Commit ba6f6c5e authored by Jim Harris's avatar Jim Harris
Browse files

nvmf: wait for qpair disconnect cbs when removing subsys



nvmf_poll_group_remove_subsystem_msg() disconnects all
qpairs associated with controllers in the specified
subsystem.  If it finds any controllers that need to
be disconnected, it sends a message to the running
thread to execute the same function again later.

But when it runs again later, the qpair may no longer
be in the poll group, but there could still be
outstanding messages being sent between threads.  For
example, _nvmf_qpair_destroy() needs to send a message
to the ctrlr->thread to clear the qpair mask bit.

All of this could result in the nvmf target starting
to destroy poll groups prematurely.  Destroy poll
groups results in the nvmf spdk_threads exiting. If
there are still messages being processed from
the STOP_SUBSYSTEMS target state, we can get
use-after-free errors since processing of those
messages could access freed memory associated with
the exited thread.

Fixes issue #1850.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I1e63b9addb2956495a69b5108a41e029f6f9a85d
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7275


Community-CI: Broadcom CI
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: default avatar <dongx.yi@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
parent 530646f6
Loading
Loading
Loading
Loading
+23 −9
Original line number Diff line number Diff line
@@ -76,6 +76,7 @@ struct nvmf_qpair_disconnect_many_ctx {
	struct spdk_nvmf_poll_group *group;
	spdk_nvmf_poll_group_mod_done cpl_fn;
	void *cpl_ctx;
	uint32_t count;
};

static void
@@ -1360,6 +1361,18 @@ fini:
	}
}

static void
remove_subsystem_qpair_cb(void *ctx)
{
	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;

	assert(qpair_ctx->count > 0);
	qpair_ctx->count--;
	if (qpair_ctx->count == 0) {
		_nvmf_poll_group_remove_subsystem_cb(ctx, 0);
	}
}

static void
nvmf_poll_group_remove_subsystem_msg(void *ctx)
{
@@ -1368,29 +1381,30 @@ nvmf_poll_group_remove_subsystem_msg(void *ctx)
	struct spdk_nvmf_poll_group *group;
	struct nvmf_qpair_disconnect_many_ctx *qpair_ctx = ctx;
	int rc = 0;
	bool have_qpairs = false;

	group = qpair_ctx->group;
	subsystem = qpair_ctx->subsystem;

	/* Initialize count to 1.  This acts like a ref count, to ensure that if spdk_nvmf_qpair_disconnect
	 * immediately invokes the callback (i.e. the qpairs is already in process of being disconnected)
	 * that we don't prematurely call _nvmf_poll_group_remove_subsystem_cb() before we've
	 * iterated the full list of qpairs.
	 */
	qpair_ctx->count = 1;
	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, qpair_tmp) {
		if ((qpair->ctrlr != NULL) && (qpair->ctrlr->subsys == subsystem)) {
			/* Use another variable to check if there were any qpairs disconnected in this call since
			 * we can loop over all qpairs and iterator will be NULL in the end */
			have_qpairs = true;
			rc = spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
			qpair_ctx->count++;
			rc = spdk_nvmf_qpair_disconnect(qpair, remove_subsystem_qpair_cb, ctx);
			if (rc) {
				break;
			}
		}
	}
	qpair_ctx->count--;

	if (!have_qpairs || rc) {
	if (qpair_ctx->count == 0 || rc) {
		_nvmf_poll_group_remove_subsystem_cb(ctx, rc);
		return;
	}

	spdk_thread_send_msg(spdk_get_thread(), nvmf_poll_group_remove_subsystem_msg, ctx);
}

void