Commit 8ff8bf07 authored by Seth Howell's avatar Seth Howell Committed by Tomasz Zawadzki
Browse files

lib/nvmf: add synchronization to subsystem state change.



This is important to avoid doubling up on state changes
and hitting asserts.

Signed-off-by: default avatarSeth Howell <seth.howell@intel.com>
Change-Id: If8797ea13a5c224cee85e53e9b2542012423b37f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3759


Community-CI: Broadcom CI
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
parent bd16f574
Loading
Loading
Loading
Loading
+34 −11
Original line number Diff line number Diff line
@@ -488,11 +488,17 @@ static void
nvmf_tgt_subsystem_stop_next(struct spdk_nvmf_subsystem *subsystem,
			     void *cb_arg, int status)
{
	int rc;

	subsystem = spdk_nvmf_subsystem_get_next(subsystem);
	if (subsystem) {
		spdk_nvmf_subsystem_stop(subsystem,
		rc = spdk_nvmf_subsystem_stop(subsystem,
					      nvmf_tgt_subsystem_stop_next,
					      cb_arg);
		if (rc) {
			nvmf_tgt_subsystem_stop_next(subsystem, cb_arg, 0);
			fprintf(stderr, "Unable to stop NVMe-oF subsystem. Trying others.\n");
		}
		return;
	}

@@ -506,12 +512,17 @@ static void
nvmf_tgt_stop_subsystems(struct nvmf_target *nvmf_tgt)
{
	struct spdk_nvmf_subsystem *subsystem;
	int rc;

	subsystem = spdk_nvmf_subsystem_get_first(nvmf_tgt->tgt);
	if (spdk_likely(subsystem)) {
		spdk_nvmf_subsystem_stop(subsystem,
		rc = spdk_nvmf_subsystem_stop(subsystem,
					      nvmf_tgt_subsystem_stop_next,
					      NULL);
		if (rc) {
			nvmf_tgt_subsystem_stop_next(subsystem, NULL, 0);
			fprintf(stderr, "Unable to stop NVMe-oF subsystem. Trying others.\n");
		}
	} else {
		g_target_state = NVMF_FINI_POLL_GROUPS;
	}
@@ -531,10 +542,17 @@ static void
nvmf_tgt_subsystem_start_next(struct spdk_nvmf_subsystem *subsystem,
			      void *cb_arg, int status)
{
	int rc;

	subsystem = spdk_nvmf_subsystem_get_next(subsystem);
	if (subsystem) {
		spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_start_next,
		rc = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_start_next,
					       cb_arg);
		if (rc) {
			g_target_state = NVMF_FINI_STOP_SUBSYSTEMS;
			fprintf(stderr, "Unable to start NVMe-oF subsystem. shutting down app.\n");
			nvmf_target_advance_state();
		}
		return;
	}

@@ -548,6 +566,7 @@ static void
nvmf_tgt_start_subsystems(struct nvmf_target *nvmf_tgt)
{
	struct spdk_nvmf_subsystem *subsystem;
	int rc;

	/* Subsystem is the NVM subsystem which is a combine of namespaces
	 * except the discovery subsystem which is used for discovery service.
@@ -560,9 +579,13 @@ nvmf_tgt_start_subsystems(struct nvmf_target *nvmf_tgt)
		 * Start subsystem means make it from inactive to active that means
		 * subsystem start to work or it can be accessed.
		 */
		spdk_nvmf_subsystem_start(subsystem,
		rc = spdk_nvmf_subsystem_start(subsystem,
					       nvmf_tgt_subsystem_start_next,
					       NULL);
		if (rc) {
			fprintf(stderr, "Unable to start NVMe-oF subsystem. shutting down app.\n");
			g_target_state = NVMF_FINI_STOP_SUBSYSTEMS;
		}
	} else {
		g_target_state = NVMF_INIT_START_ACCEPTOR;
	}
+3 −0
Original line number Diff line number Diff line
@@ -260,6 +260,9 @@ struct spdk_nvmf_subsystem {
	char sn[SPDK_NVME_CTRLR_SN_LEN + 1];
	char mn[SPDK_NVME_CTRLR_MN_LEN + 1];

	/* boolean for state change synchronization. */
	bool changing_state;

	/* Array of pointers to namespaces of size max_nsid indexed by nsid - 1 */
	struct spdk_nvmf_ns			**ns;
	uint32_t				max_nsid;
+76 −17
Original line number Diff line number Diff line
@@ -496,6 +496,7 @@ rpc_nvmf_delete_subsystem(struct spdk_jsonrpc_request *request,
	struct rpc_delete_subsystem req = { 0 };
	struct spdk_nvmf_subsystem *subsystem;
	struct spdk_nvmf_tgt *tgt;
	int rc;

	if (spdk_json_decode_object(params, rpc_delete_subsystem_decoders,
				    SPDK_COUNTOF(rpc_delete_subsystem_decoders),
@@ -523,9 +524,18 @@ rpc_nvmf_delete_subsystem(struct spdk_jsonrpc_request *request,

	free_rpc_delete_subsystem(&req);

	spdk_nvmf_subsystem_stop(subsystem,
	rc = spdk_nvmf_subsystem_stop(subsystem,
				      rpc_nvmf_subsystem_stopped,
				      request);
	if (rc == -EBUSY) {
		SPDK_ERRLOG("Subsystem currently in another state change try again later.\n");
		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
						 "Subsystem currently in another state change try again later.");
	} else if (rc != 0) {
		SPDK_ERRLOG("Unable to change state on subsystem. rc=%d\n", rc);
		spdk_jsonrpc_send_error_response_fmt(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
						     "Unable to change state on subsystem. rc=%d", rc);
	}

	return;

@@ -770,6 +780,7 @@ rpc_nvmf_subsystem_add_listener(struct spdk_jsonrpc_request *request,
	struct nvmf_rpc_listener_ctx *ctx;
	struct spdk_nvmf_subsystem *subsystem;
	struct spdk_nvmf_tgt *tgt;
	int rc;

	ctx = calloc(1, sizeof(*ctx));
	if (!ctx) {
@@ -817,8 +828,14 @@ rpc_nvmf_subsystem_add_listener(struct spdk_jsonrpc_request *request,

	ctx->op = NVMF_RPC_LISTEN_ADD;

	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_listen_paused, ctx)) {
	rc = spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_listen_paused, ctx);
	if (rc != 0) {
		if (rc == -EBUSY) {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
							 "subsystem busy, retry later.\n");
		} else {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
		}
		nvmf_rpc_listener_ctx_free(ctx);
	}
}
@@ -832,6 +849,7 @@ rpc_nvmf_subsystem_remove_listener(struct spdk_jsonrpc_request *request,
	struct nvmf_rpc_listener_ctx *ctx;
	struct spdk_nvmf_subsystem *subsystem;
	struct spdk_nvmf_tgt *tgt;
	int rc;

	ctx = calloc(1, sizeof(*ctx));
	if (!ctx) {
@@ -887,8 +905,14 @@ rpc_nvmf_subsystem_remove_listener(struct spdk_jsonrpc_request *request,

	ctx->op = NVMF_RPC_LISTEN_REMOVE;

	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_listen_paused, ctx)) {
	rc = spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_listen_paused, ctx);
	if (rc != 0) {
		if (rc == -EBUSY) {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
							 "subsystem busy, retry later.\n");
		} else {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
		}
		nvmf_rpc_listener_ctx_free(ctx);
	}
}
@@ -1062,6 +1086,7 @@ rpc_nvmf_subsystem_add_ns(struct spdk_jsonrpc_request *request,
	struct nvmf_rpc_ns_ctx *ctx;
	struct spdk_nvmf_subsystem *subsystem;
	struct spdk_nvmf_tgt *tgt;
	int rc;

	ctx = calloc(1, sizeof(*ctx));
	if (!ctx) {
@@ -1098,8 +1123,14 @@ rpc_nvmf_subsystem_add_ns(struct spdk_jsonrpc_request *request,
		return;
	}

	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_ns_paused, ctx)) {
	rc = spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_ns_paused, ctx);
	if (rc != 0) {
		if (rc == -EBUSY) {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
							 "subsystem busy, retry later.\n");
		} else {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
		}
		nvmf_rpc_ns_ctx_free(ctx);
	}
}
@@ -1178,6 +1209,7 @@ rpc_nvmf_subsystem_remove_ns(struct spdk_jsonrpc_request *request,
	struct nvmf_rpc_remove_ns_ctx *ctx;
	struct spdk_nvmf_subsystem *subsystem;
	struct spdk_nvmf_tgt *tgt;
	int rc;

	ctx = calloc(1, sizeof(*ctx));
	if (!ctx) {
@@ -1214,8 +1246,14 @@ rpc_nvmf_subsystem_remove_ns(struct spdk_jsonrpc_request *request,
		return;
	}

	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_remove_ns_paused, ctx)) {
	rc = spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_remove_ns_paused, ctx);
	if (rc != 0) {
		if (rc == -EBUSY) {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
							 "subsystem busy, retry later.\n");
		} else {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
		}
		nvmf_rpc_remove_ns_ctx_free(ctx);
	}
}
@@ -1316,6 +1354,7 @@ rpc_nvmf_subsystem_add_host(struct spdk_jsonrpc_request *request,
	struct nvmf_rpc_host_ctx *ctx;
	struct spdk_nvmf_subsystem *subsystem;
	struct spdk_nvmf_tgt *tgt;
	int rc;

	ctx = calloc(1, sizeof(*ctx));
	if (!ctx) {
@@ -1353,8 +1392,14 @@ rpc_nvmf_subsystem_add_host(struct spdk_jsonrpc_request *request,
		return;
	}

	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx)) {
	rc = spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx);
	if (rc != 0) {
		if (rc == -EBUSY) {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
							 "subsystem busy, retry later.\n");
		} else {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
		}
		nvmf_rpc_host_ctx_free(ctx);
	}
}
@@ -1367,6 +1412,7 @@ rpc_nvmf_subsystem_remove_host(struct spdk_jsonrpc_request *request,
	struct nvmf_rpc_host_ctx *ctx;
	struct spdk_nvmf_subsystem *subsystem;
	struct spdk_nvmf_tgt *tgt;
	int rc;

	ctx = calloc(1, sizeof(*ctx));
	if (!ctx) {
@@ -1404,8 +1450,14 @@ rpc_nvmf_subsystem_remove_host(struct spdk_jsonrpc_request *request,
		return;
	}

	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx)) {
	rc = spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx);
	if (rc != 0) {
		if (rc == -EBUSY) {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
							 "subsystem busy, retry later.\n");
		} else {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
		}
		nvmf_rpc_host_ctx_free(ctx);
	}
}
@@ -1426,6 +1478,7 @@ rpc_nvmf_subsystem_allow_any_host(struct spdk_jsonrpc_request *request,
	struct nvmf_rpc_host_ctx *ctx;
	struct spdk_nvmf_subsystem *subsystem;
	struct spdk_nvmf_tgt *tgt;
	int rc;

	ctx = calloc(1, sizeof(*ctx));
	if (!ctx) {
@@ -1463,8 +1516,14 @@ rpc_nvmf_subsystem_allow_any_host(struct spdk_jsonrpc_request *request,
		return;
	}

	if (spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx)) {
	rc = spdk_nvmf_subsystem_pause(subsystem, nvmf_rpc_host_paused, ctx);
	if (rc != 0) {
		if (rc == -EBUSY) {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR,
							 "subsystem busy, retry later.\n");
		} else {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INTERNAL_ERROR, "Internal error");
		}
		nvmf_rpc_host_ctx_free(ctx);
	}
}
+82 −7
Original line number Diff line number Diff line
@@ -478,6 +478,7 @@ subsystem_state_change_revert_done(struct spdk_io_channel_iter *i, int status)
		SPDK_ERRLOG("Unable to revert the subsystem state after operation failure.\n");
	}

	ctx->subsystem->changing_state = false;
	if (ctx->cb_fn) {
		/* return a failure here. This function only exists in an error path. */
		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, -1);
@@ -515,6 +516,7 @@ subsystem_state_change_done(struct spdk_io_channel_iter *i, int status)
	}

out:
	ctx->subsystem->changing_state = false;
	if (ctx->cb_fn) {
		ctx->cb_fn(ctx->subsystem, ctx->cb_arg, status);
	}
@@ -569,11 +571,16 @@ nvmf_subsystem_state_change(struct spdk_nvmf_subsystem *subsystem,
	enum spdk_nvmf_subsystem_state intermediate_state;
	int rc;

	if (__sync_val_compare_and_swap(&subsystem->changing_state, false, true)) {
		return -EBUSY;
	}

	intermediate_state = nvmf_subsystem_get_intermediate_state(subsystem->state, requested_state);
	assert(intermediate_state != SPDK_NVMF_SUBSYSTEM_NUM_STATES);

	ctx = calloc(1, sizeof(*ctx));
	if (!ctx) {
		subsystem->changing_state = false;
		return -ENOMEM;
	}

@@ -581,6 +588,7 @@ nvmf_subsystem_state_change(struct spdk_nvmf_subsystem *subsystem,
	rc = nvmf_subsystem_set_state(subsystem, intermediate_state);
	if (rc) {
		free(ctx);
		subsystem->changing_state = false;
		return rc;
	}

@@ -1060,51 +1068,118 @@ spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t ns
	return 0;
}

struct subsystem_ns_change_ctx {
	struct spdk_nvmf_subsystem		*subsystem;
	spdk_nvmf_subsystem_state_change_done	cb_fn;
	uint32_t				nsid;
};

static void
_nvmf_ns_hot_remove(struct spdk_nvmf_subsystem *subsystem,
		    void *cb_arg, int status)
{
	struct spdk_nvmf_ns *ns = cb_arg;
	struct subsystem_ns_change_ctx *ctx = cb_arg;
	int rc;

	rc = spdk_nvmf_subsystem_remove_ns(subsystem, ns->opts.nsid);
	rc = spdk_nvmf_subsystem_remove_ns(subsystem, ctx->nsid);
	if (rc != 0) {
		SPDK_ERRLOG("Failed to make changes to NVME-oF subsystem with id: %u\n", subsystem->id);
	}

	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);

	free(ctx);
}

static void
nvmf_ns_change_msg(void *ns_ctx)
{
	struct subsystem_ns_change_ctx *ctx = ns_ctx;
	int rc;

	rc = spdk_nvmf_subsystem_pause(ctx->subsystem, ctx->cb_fn, ctx);
	if (rc) {
		if (rc == -EBUSY) {
			/* Try again, this is not a permanent situation. */
			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ctx);
		} else {
			free(ctx);
			SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
		}
	}
}

static void
nvmf_ns_hot_remove(void *remove_ctx)
{
	struct spdk_nvmf_ns *ns = remove_ctx;
	struct subsystem_ns_change_ctx *ns_ctx;
	int rc;

	rc = spdk_nvmf_subsystem_pause(ns->subsystem, _nvmf_ns_hot_remove, ns);
	/* We have to allocate a new context because this op
	 * is asynchronous and we could lose the ns in the middle.
	 */
	ns_ctx = calloc(1, sizeof(struct subsystem_ns_change_ctx));
	if (!ns_ctx) {
		SPDK_ERRLOG("Unable to allocate context to process namespace removal!\n");
		return;
	}

	ns_ctx->subsystem = ns->subsystem;
	ns_ctx->nsid = ns->opts.nsid;
	ns_ctx->cb_fn = _nvmf_ns_hot_remove;

	rc = spdk_nvmf_subsystem_pause(ns->subsystem, _nvmf_ns_hot_remove, ns_ctx);
	if (rc) {
		if (rc == -EBUSY) {
			/* Try again, this is not a permanent situation. */
			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ns_ctx);
		} else {
			SPDK_ERRLOG("Unable to pause subsystem to process namespace removal!\n");
			free(ns_ctx);
		}
	}
}

static void
_nvmf_ns_resize(struct spdk_nvmf_subsystem *subsystem, void *cb_arg, int status)
{
	struct spdk_nvmf_ns *ns = cb_arg;
	struct subsystem_ns_change_ctx *ctx = cb_arg;

	nvmf_subsystem_ns_changed(subsystem, ns->opts.nsid);
	nvmf_subsystem_ns_changed(subsystem, ctx->nsid);
	spdk_nvmf_subsystem_resume(subsystem, NULL, NULL);

	free(ctx);
}

static void
nvmf_ns_resize(void *event_ctx)
{
	struct spdk_nvmf_ns *ns = event_ctx;
	struct subsystem_ns_change_ctx *ns_ctx;
	int rc;

	rc = spdk_nvmf_subsystem_pause(ns->subsystem, _nvmf_ns_resize, ns);
	/* We have to allocate a new context because this op
	 * is asynchronous and we could lose the ns in the middle.
	 */
	ns_ctx = calloc(1, sizeof(struct subsystem_ns_change_ctx));
	if (!ns_ctx) {
		SPDK_ERRLOG("Unable to allocate context to process namespace removal!\n");
		return;
	}

	ns_ctx->subsystem = ns->subsystem;
	ns_ctx->nsid = ns->opts.nsid;
	ns_ctx->cb_fn = _nvmf_ns_resize;

	rc = spdk_nvmf_subsystem_pause(ns->subsystem, _nvmf_ns_resize, ns_ctx);
	if (rc) {
		if (rc == -EBUSY) {
			/* Try again, this is not a permanent situation. */
			spdk_thread_send_msg(spdk_get_thread(), nvmf_ns_change_msg, ns_ctx);
		}
		SPDK_ERRLOG("Unable to pause subsystem to process namespace resize!\n");
		free(ns_ctx);
	}
}

+23 −4
Original line number Diff line number Diff line
@@ -224,9 +224,15 @@ nvmf_tgt_subsystem_started(struct spdk_nvmf_subsystem *subsystem,
			   void *cb_arg, int status)
{
	subsystem = spdk_nvmf_subsystem_get_next(subsystem);
	int rc;

	if (subsystem) {
		spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
		rc = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
		if (rc) {
			g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
			SPDK_ERRLOG("Unable to start NVMe-oF subsystem. Stopping app.\n");
			nvmf_tgt_advance_state();
		}
		return;
	}

@@ -239,9 +245,14 @@ nvmf_tgt_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem,
			   void *cb_arg, int status)
{
	subsystem = spdk_nvmf_subsystem_get_next(subsystem);
	int rc;

	if (subsystem) {
		spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
		rc = spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
		if (rc) {
			SPDK_ERRLOG("Unable to stop NVMe-oF subsystem. Trying others.\n");
			nvmf_tgt_subsystem_stopped(subsystem, NULL, 0);
		}
		return;
	}

@@ -356,6 +367,7 @@ nvmf_tgt_advance_state(void)
{
	enum nvmf_tgt_state prev_state;
	int rc = -1;
	int ret;

	do {
		prev_state = g_tgt_state;
@@ -388,7 +400,11 @@ nvmf_tgt_advance_state(void)
			subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);

			if (subsystem) {
				spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
				ret = spdk_nvmf_subsystem_start(subsystem, nvmf_tgt_subsystem_started, NULL);
				if (ret) {
					SPDK_ERRLOG("Unable to start NVMe-oF subsystem. Stopping app.\n");
					g_tgt_state = NVMF_TGT_FINI_STOP_SUBSYSTEMS;
				}
			} else {
				g_tgt_state = NVMF_TGT_INIT_START_ACCEPTOR;
			}
@@ -408,7 +424,10 @@ nvmf_tgt_advance_state(void)
			subsystem = spdk_nvmf_subsystem_get_first(g_spdk_nvmf_tgt);

			if (subsystem) {
				spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
				ret = spdk_nvmf_subsystem_stop(subsystem, nvmf_tgt_subsystem_stopped, NULL);
				if (ret) {
					nvmf_tgt_subsystem_stopped(subsystem, NULL, 0);
				}
			} else {
				g_tgt_state = NVMF_TGT_FINI_DESTROY_POLL_GROUPS;
			}