Commit e6ed5c2d authored by Joel Cunningham's avatar Joel Cunningham Committed by Tomasz Zawadzki
Browse files

lib/nvmf: serialize reservation commands



Add support to queue multiple reservation commands that modify state
into a singlely linked TAILQ. HEAD is always the in-progress command and
subsequent received commands are appened to the tail.

Updating namespace reservation state is a two step process:
  1) Update struct spdk_nvmf_ns on subsystem->thread
  2) Trigger poll group update of their state

The existing solution could start step 1 of a subsequent received
command before step 2 of the previous command finishes, leaving a
possibility of poll groups reading spdk_nvmf_ns while a subsequent
command is modifying it.

Serializing the reservation commands closes this gap.

Basic validation in subsystem unit test.

Change-Id: I49ca4050338957748ee65face1bf1d2d089cdf0e
Signed-off-by: default avatarJoel Cunningham <joel.cunningham@oracle.com>
Reviewed-on: https://review.spdk.io/c/spdk/spdk/+/26467


Reviewed-by: default avatarJim Harris <jim.harris@nvidia.com>
Tested-by: default avatarSPDK Automated Test System <spdkbot@gmail.com>
Reviewed-by: default avatarChangpeng Liu <changpeliu@tencent.com>
Community-CI: Mellanox Build Bot
parent bc4b2d51
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -89,7 +89,8 @@ struct spdk_nvmf_request {
			uint8_t data_from_pool		: 1;
			uint8_t dif_enabled		: 1;
			uint8_t first_fused		: 1;
			uint8_t rsvd			: 5;
			uint8_t reservation_queued	: 1;
			uint8_t rsvd			: 4;
		};
	};
	uint8_t				zcopy_phase; /* type enum spdk_nvmf_zcopy_phase */
@@ -128,8 +129,9 @@ struct spdk_nvmf_request {
	/* Timeout tracked for connect and abort flows. */
	uint64_t timeout_tsc;
	uint32_t			orig_nsid;
	STAILQ_ENTRY(spdk_nvmf_request)	reservation_link;
};
SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_request) == 824, "Incorrect size");
SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_request) == 832, "Incorrect size");

enum spdk_nvmf_qpair_state {
	SPDK_NVMF_QPAIR_UNINITIALIZED = 0,
+3 −0
Original line number Diff line number Diff line
@@ -4700,6 +4700,9 @@ _nvmf_request_complete(void *ctx)
	opcode = req->cmd->nvmf_cmd.opcode;
	qpair = req->qpair;

	/* request should not be on a ns reservations list */
	assert(req->reservation_queued == false);

	if (spdk_likely(qpair->ctrlr)) {
		sgroup = &qpair->group->sgroups[qpair->ctrlr->subsys->id];
		assert(sgroup != NULL);
+2 −0
Original line number Diff line number Diff line
@@ -180,6 +180,8 @@ struct spdk_nvmf_ns {
	uint32_t gen;
	/* registrants head */
	TAILQ_HEAD(, spdk_nvmf_registrant) registrants;
	/* Queued reservation requests: head is in-progress, rest are pending */
	STAILQ_HEAD(, spdk_nvmf_request) reservations;
	/* current reservation key */
	uint64_t crkey;
	/* reservation type */
+39 −1
Original line number Diff line number Diff line
@@ -1778,6 +1778,7 @@ spdk_nvmf_subsystem_remove_ns(struct spdk_nvmf_subsystem *subsystem, uint32_t ns

	assert(ns->anagrpid - 1 < subsystem->max_nsid);
	assert(subsystem->ana_group[ns->anagrpid - 1] > 0);
	assert(STAILQ_EMPTY(&ns->reservations));

	subsystem->ana_group[ns->anagrpid - 1]--;

@@ -2251,6 +2252,7 @@ spdk_nvmf_subsystem_add_ns_ext(struct spdk_nvmf_subsystem *subsystem, const char
	ns->anagrpid = opts.anagrpid;
	subsystem->ana_group[ns->anagrpid - 1]++;
	TAILQ_INIT(&ns->registrants);
	STAILQ_INIT(&ns->reservations);
	if (ptpl_file) {
		ns->ptpl_file = strdup(ptpl_file);
		if (!ns->ptpl_file) {
@@ -3658,7 +3660,32 @@ _nvmf_ns_reservation_update_done(struct spdk_nvmf_subsystem *subsystem,
{
	struct spdk_nvmf_request *req = (struct spdk_nvmf_request *)cb_arg;
	struct spdk_nvmf_poll_group *group = req->qpair->group;
	struct spdk_nvme_cmd *cmd = &req->cmd->nvme_cmd;
	struct spdk_nvmf_ns *ns;

	assert(subsystem->thread == spdk_get_thread());

	/* Get namespace */
	ns = _nvmf_subsystem_get_ns(subsystem, cmd->nsid);
	assert(ns != NULL);

	/* sanity check: this req should be head of outstanding */
	assert(req->reservation_queued == true);
	assert(req == STAILQ_FIRST(&ns->reservations));

	/* req is complete, remove from queue and continue if there's others */
	STAILQ_REMOVE_HEAD(&ns->reservations, reservation_link);
	req->reservation_queued = false;
	if (!STAILQ_EMPTY(&ns->reservations)) {
		/* NOTE: we leave the next on the queue to prevent any in-flight
		 * requests moving from pg->thread to subsystem->thread from
		 * executing before the next one
		 */
		spdk_thread_send_msg(subsystem->thread, nvmf_ns_reservation_request,
				     STAILQ_FIRST(&ns->reservations));
	}

	/* Complete the request on the original pg */
	spdk_thread_send_msg(group->thread, nvmf_ns_reservation_complete, req);
}

@@ -3671,6 +3698,16 @@ nvmf_ns_reservation_update_state(struct spdk_nvmf_ns *ns,
	bool update_sgroup = false;
	int status = 0;

	/* All reservation state modifications must be queued to serialize them */
	if (!req->reservation_queued) {
		STAILQ_INSERT_TAIL(&ns->reservations, req, reservation_link);
		req->reservation_queued = true;
	}
	/* The head is in-progress, others must wait */
	if (req != STAILQ_FIRST(&ns->reservations)) {
		return;
	}

	switch (opc) {
	case SPDK_NVME_OPC_RESERVATION_REGISTER:
		update_sgroup = nvmf_ns_reservation_register(ns, ctrlr, req);
@@ -3717,7 +3754,8 @@ nvmf_ns_reservation_request(void *ctx)
	/* Report is a read-only command and can always be executed */
	if (cmd->opc == SPDK_NVME_OPC_RESERVATION_REPORT) {
		nvmf_ns_reservation_report(ns, req);
		_nvmf_ns_reservation_update_done(ctrlr->subsys, req, 0);
		/* Complete the request on the original pg */
		spdk_thread_send_msg(req->qpair->group->thread, nvmf_ns_reservation_complete, req);
	} else {
		/* Remaining commands modify reservation state and must be serialized.
		 * These complete asynchronously after state propagates to poll groups
+18 −0
Original line number Diff line number Diff line
@@ -771,6 +771,7 @@ ut_reservation_init(void)

	memset(&g_ns, 0, sizeof(g_ns));
	TAILQ_INIT(&g_ns.registrants);
	STAILQ_INIT(&g_ns.reservations);
	g_ns.nsid = 1;
	g_ns.subsystem = &g_subsystem;
	g_ns.ptpl_file = NULL;
@@ -811,7 +812,11 @@ ut_reservation_deinit(void)
	struct spdk_nvmf_registrant *reg, *tmp;
	struct spdk_nvmf_reservation_log *log, *log_tmp;
	struct spdk_nvmf_ctrlr *ctrlr, *ctrlr_tmp;
	struct spdk_nvmf_request *req, *req_tmp;

	STAILQ_FOREACH_SAFE(req, &g_ns.reservations, reservation_link, req_tmp) {
		STAILQ_REMOVE(&g_ns.reservations, req, spdk_nvmf_request, reservation_link);
	}
	TAILQ_FOREACH_SAFE(reg, &g_ns.registrants, link, tmp) {
		TAILQ_REMOVE(&g_ns.registrants, reg, link);
		free(reg);
@@ -1663,6 +1668,7 @@ test_reservation_request(void)
	/* This test requires a thread */
	thread = spdk_get_thread();
	SPDK_CU_ASSERT_FATAL(thread != NULL);
	g_subsystem.thread = thread;

	ut_reservation_init();
	g_subsystem.tgt = &tgt;
@@ -1696,8 +1702,11 @@ test_reservation_request(void)
	SPDK_CU_ASSERT_FATAL(rsp->status.sc == SPDK_NVME_SC_SUCCESS);
	SPDK_CU_ASSERT_FATAL(!TAILQ_EMPTY(&g_ns.registrants));
	SPDK_CU_ASSERT_FATAL(g_ns.gen == 1);
	/* reservation command is outstanding until pg updates */
	SPDK_CU_ASSERT_FATAL(!STAILQ_EMPTY(&g_ns.reservations));
	poll_threads(); /* drive poll group update */
	SPDK_CU_ASSERT_FATAL(g_ns_pg_update == 1);
	SPDK_CU_ASSERT_FATAL(STAILQ_EMPTY(&g_ns.reservations));

	/* Acquire */
	ut_reservation_build_acquire_request(req, SPDK_NVME_RESERVE_ACQUIRE, 0,
@@ -1709,8 +1718,11 @@ test_reservation_request(void)
					       &g_ctrlr1_A.hostid) == 0);
	SPDK_CU_ASSERT_FATAL(g_ns.rtype == SPDK_NVME_RESERVE_WRITE_EXCLUSIVE);
	SPDK_CU_ASSERT_FATAL(g_ns.crkey == rkey);
	/* reservation command is outstanding until pg updates */
	SPDK_CU_ASSERT_FATAL(!STAILQ_EMPTY(&g_ns.reservations));
	poll_threads(); /* drive poll group update */
	SPDK_CU_ASSERT_FATAL(g_ns_pg_update == 2);
	SPDK_CU_ASSERT_FATAL(STAILQ_EMPTY(&g_ns.reservations));

	/* Report */
	report_req = ut_reservation_build_req(
@@ -1737,6 +1749,8 @@ test_reservation_request(void)
	SPDK_CU_ASSERT_FATAL(ctrlr_data->rkey ==  rkey);
	SPDK_CU_ASSERT_FATAL(!spdk_uuid_compare(
				     (struct spdk_uuid *)ctrlr_data->hostid, &g_ctrlr1_A.hostid));
	/* Report is read-only, reservation should be complete */
	SPDK_CU_ASSERT_FATAL(STAILQ_EMPTY(&g_ns.reservations));
	/* Report is read-only, should be no pg updates */
	poll_threads();
	SPDK_CU_ASSERT_FATAL(g_ns_pg_update == 2);
@@ -1752,12 +1766,16 @@ test_reservation_request(void)
	SPDK_CU_ASSERT_FATAL(g_ns.rtype == 0);
	SPDK_CU_ASSERT_FATAL(g_ns.crkey == 0);
	SPDK_CU_ASSERT_FATAL(g_ns.gen == 1); /* registration is not removed */
	/* reservation command is outstanding until pg updates */
	SPDK_CU_ASSERT_FATAL(!STAILQ_EMPTY(&g_ns.reservations));
	poll_threads(); /* drive poll group update */
	SPDK_CU_ASSERT_FATAL(g_ns_pg_update == 3);
	SPDK_CU_ASSERT_FATAL(STAILQ_EMPTY(&g_ns.reservations));

	spdk_put_io_channel(ch);
	spdk_io_device_unregister(&tgt, NULL);
	g_subsystem.tgt = NULL;
	g_subsystem.thread = NULL;

	ut_reservation_free_req(req);
	ut_reservation_free_req(report_req);