Commit b8769cdb authored by JinYu's avatar JinYu Committed by Jim Harris
Browse files

nvmf: Add the Keep Alive feature



The controller shall treat a Keep Alive Timeout in the same manner
as connection loss. If the Keep Alive feature is in use and the
timer expires, then the controller shall:
1, stop processing commands and set the Controller Fatal Status
(CSTS,CFS) bit to '1';
2, terminate the NVMe Transport connection;
3, break the host to controller association;
A timer poller is added to each subsystem to monitor timeout event.

Change-Id: I001afab8a6764f30c39df37fa96384180d117486
Signed-off-by: default avatarJinYu <jin.yu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/439330


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
parent 058c4c89
Loading
Loading
Loading
Loading
+180 −8
Original line number Diff line number Diff line
@@ -47,7 +47,10 @@

#include "spdk_internal/log.h"

#define MIN_KEEP_ALIVE_TIMEOUT 10000
#define MIN_KEEP_ALIVE_TIMEOUT_IN_MS 10000
#define NVMF_DISC_KATO_IN_MS 120000
#define KAS_DEFAULT_VALUE 10
#define KAS_TIME_UNIT_IN_MS 100

#define MODEL_NUMBER "SPDK bdev Controller"

@@ -72,6 +75,110 @@ spdk_nvmf_invalid_connect_response(struct spdk_nvmf_fabric_connect_rsp *rsp,
#define SPDK_NVMF_INVALID_CONNECT_DATA(rsp, field)	\
	spdk_nvmf_invalid_connect_response(rsp, 1, offsetof(struct spdk_nvmf_fabric_connect_data, field))

static void
spdk_nvmf_ctrlr_stop_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
{
	if (!ctrlr) {
		SPDK_ERRLOG("Controller is NULL\n");
		return;
	}

	if (ctrlr->keep_alive_poller == NULL) {
		return;
	}

	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Stop keep alive poller\n");
	spdk_poller_unregister(&ctrlr->keep_alive_poller);
}

static void
spdk_nvmf_ctrlr_disconnect_qpairs_done(struct spdk_io_channel_iter *i, int status)
{
	if (status == 0) {
		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "ctrlr disconnect qpairs complete successfully\n");
	} else {
		SPDK_ERRLOG("Fail to disconnect ctrlr qpairs\n");
	}
}

static void
spdk_nvmf_ctrlr_disconnect_qpairs_on_pg(struct spdk_io_channel_iter *i)
{
	int rc = 0;
	struct spdk_nvmf_ctrlr *ctrlr;
	struct spdk_nvmf_qpair *qpair, *temp_qpair;
	struct spdk_io_channel *ch;
	struct spdk_nvmf_poll_group *group;

	ctrlr = spdk_io_channel_iter_get_ctx(i);
	ch = spdk_io_channel_iter_get_channel(i);
	group = spdk_io_channel_get_ctx(ch);

	TAILQ_FOREACH_SAFE(qpair, &group->qpairs, link, temp_qpair) {
		if (qpair->ctrlr == ctrlr) {
			rc = spdk_nvmf_qpair_disconnect(qpair, NULL, NULL);
			if (rc) {
				SPDK_ERRLOG("Qpair disconnect failed\n");
				goto next_channel;
			}
		}
	}

next_channel:
	spdk_for_each_channel_continue(i, rc);
}

static int
spdk_nvmf_ctrlr_keep_alive_poll(void *ctx)
{
	uint64_t keep_alive_timeout_tick;
	uint64_t now = spdk_get_ticks();
	struct spdk_nvmf_ctrlr *ctrlr = ctx;

	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Polling ctrlr keep alive timeout\n");

	/* If the Keep alive feature is in use and the timer expires */
	keep_alive_timeout_tick = ctrlr->last_keep_alive_tick +
				  ctrlr->feat.keep_alive_timer.bits.kato * spdk_get_ticks_hz() / UINT64_C(1000);
	if (now > keep_alive_timeout_tick) {
		/* set the Controller Fatal Status bit to '1' */
		if (ctrlr->vcprop.csts.bits.cfs == 0) {
			ctrlr->vcprop.csts.bits.cfs = 1;

			/*
			 * disconnect qpairs, terminate Transport connection
			 * destroy ctrlr, break the host to controller association
			 * disconnect qpairs with qpair->ctrlr == ctrlr
			 */
			spdk_for_each_channel(ctrlr->subsys->tgt,
					      spdk_nvmf_ctrlr_disconnect_qpairs_on_pg,
					      ctrlr,
					      spdk_nvmf_ctrlr_disconnect_qpairs_done);
		}
	}

	return 1;
}

static void
spdk_nvmf_ctrlr_start_keep_alive_timer(struct spdk_nvmf_ctrlr *ctrlr)
{
	if (!ctrlr) {
		SPDK_ERRLOG("Controller is NULL\n");
		return;
	}

	/* if cleared to 0 then the Keep Alive Timer is disabled */
	if (ctrlr->feat.keep_alive_timer.bits.kato != 0) {

		ctrlr->last_keep_alive_tick = spdk_get_ticks();

		SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Ctrlr add keep alive poller\n");
		ctrlr->keep_alive_poller = spdk_poller_register(spdk_nvmf_ctrlr_keep_alive_poll, ctrlr,
					   ctrlr->feat.keep_alive_timer.bits.kato * 1000);
	}
}

static void
ctrlr_add_qpair_and_update_rsp(struct spdk_nvmf_qpair *qpair,
			       struct spdk_nvmf_ctrlr *ctrlr,
@@ -121,6 +228,7 @@ _spdk_nvmf_ctrlr_add_admin_qpair(void *ctx)
	struct spdk_nvmf_ctrlr *ctrlr = qpair->ctrlr;

	ctrlr->admin_qpair = qpair;
	spdk_nvmf_ctrlr_start_keep_alive_timer(ctrlr);
	ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);
	spdk_nvmf_request_complete(req);
}
@@ -172,10 +280,43 @@ spdk_nvmf_ctrlr_create(struct spdk_nvmf_subsystem *subsystem,
		return NULL;
	}

	ctrlr->feat.keep_alive_timer.bits.kato = connect_cmd->kato;
	/*
	 * Because Identify Controller data KAS default is 10
	 * KAS: this field indicates the granularity of the Keep Alive Timer in 100ms units
	 * keep-alive timeout in milliseconds
	 */
	ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(connect_cmd->kato,
			KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) *
			KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS;
	ctrlr->feat.async_event_configuration.bits.ns_attr_notice = 1;
	ctrlr->feat.volatile_write_cache.bits.wce = 1;

	if (ctrlr->subsys->subtype == SPDK_NVMF_SUBTYPE_DISCOVERY) {
		/* Don't accept keep-alive timeout for discovery controllers */
		if (ctrlr->feat.keep_alive_timer.bits.kato != 0) {
			SPDK_ERRLOG("Discovery controller don't accept keep-alive timeout\n");
			spdk_bit_array_free(&ctrlr->qpair_mask);
			free(ctrlr);
			return NULL;
		}

		/*
		 * Discovery controllers use some arbitrary high value in order
		 * to cleanup stale discovery sessions
		 *
		 * From the 1.0a nvme-of spec:
		 * "The Keep Alive command is reserved for
		 * Discovery controllers. A transport may specify a
		 * fixed Discovery controller activity timeout value
		 * (e.g., 2 minutes).  If no commands are received
		 * by a Discovery controller within that time
		 * period, the controller may perform the
		 * actions for Keep Alive Timer expiration".
		 * kato is in millisecond.
		 */
		ctrlr->feat.keep_alive_timer.bits.kato = NVMF_DISC_KATO_IN_MS;
	}

	/* Subtract 1 for admin queue, 1 for 0's based */
	ctrlr->feat.number_of_queues.bits.ncqr = transport->opts.max_qpairs_per_ctrlr - 1 -
			1;
@@ -217,12 +358,21 @@ spdk_nvmf_ctrlr_create(struct spdk_nvmf_subsystem *subsystem,
	return ctrlr;
}

static void
_spdk_nvmf_ctrlr_destruct(void *ctx)
{
	struct spdk_nvmf_ctrlr *ctrlr = ctx;

	spdk_nvmf_ctrlr_stop_keep_alive_timer(ctrlr);
	free(ctrlr);
}

void
spdk_nvmf_ctrlr_destruct(struct spdk_nvmf_ctrlr *ctrlr)
{
	spdk_nvmf_subsystem_remove_ctrlr(ctrlr->subsys, ctrlr);

	free(ctrlr);
	spdk_thread_send_msg(ctrlr->thread, _spdk_nvmf_ctrlr_destruct, ctrlr);
}

static void
@@ -265,7 +415,6 @@ spdk_nvmf_ctrlr_add_io_qpair(void *ctx)
	}

	ctrlr_add_qpair_and_update_rsp(qpair, ctrlr, rsp);

end:
	spdk_thread_send_msg(qpair->group->thread, _spdk_nvmf_request_complete, req);
}
@@ -877,12 +1026,31 @@ spdk_nvmf_ctrlr_set_features_keep_alive_timer(struct spdk_nvmf_request *req)

	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Keep Alive Timer (%u ms)\n", cmd->cdw11);

	/*
	 * if attempts to disable keep alive by setting kato to 0h
	 * a status value of keep alive invalid shall be returned
	 */
	if (cmd->cdw11 == 0) {
		rsp->status.sc = SPDK_NVME_SC_KEEP_ALIVE_INVALID;
	} else if (cmd->cdw11 < MIN_KEEP_ALIVE_TIMEOUT) {
		ctrlr->feat.keep_alive_timer.bits.kato = MIN_KEEP_ALIVE_TIMEOUT;
	} else if (cmd->cdw11 < MIN_KEEP_ALIVE_TIMEOUT_IN_MS) {
		ctrlr->feat.keep_alive_timer.bits.kato = MIN_KEEP_ALIVE_TIMEOUT_IN_MS;
	} else {
		ctrlr->feat.keep_alive_timer.bits.kato = cmd->cdw11;
		/* round up to milliseconds */
		ctrlr->feat.keep_alive_timer.bits.kato = spdk_divide_round_up(cmd->cdw11,
				KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS) *
				KAS_DEFAULT_VALUE * KAS_TIME_UNIT_IN_MS;
	}

	/*
	 * if change the keep alive timeout value successfully
	 * update the keep alive poller.
	 */
	if (cmd->cdw11 != 0) {
		if (ctrlr->keep_alive_poller != NULL) {
			spdk_poller_unregister(&ctrlr->keep_alive_poller);
		}
		ctrlr->keep_alive_poller = spdk_poller_register(spdk_nvmf_ctrlr_keep_alive_poll, ctrlr,
					   ctrlr->feat.keep_alive_timer.bits.kato * 1000);
	}

	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Set Features - Keep Alive Timer set to %u ms\n",
@@ -1225,7 +1393,7 @@ spdk_nvmf_ctrlr_identify_ctrlr(struct spdk_nvmf_ctrlr *ctrlr, struct spdk_nvme_c
	if (subsystem->subtype == SPDK_NVMF_SUBTYPE_NVME) {
		spdk_strcpy_pad(cdata->mn, MODEL_NUMBER, sizeof(cdata->mn), ' ');
		spdk_strcpy_pad(cdata->sn, spdk_nvmf_subsystem_get_sn(subsystem), sizeof(cdata->sn), ' ');
		cdata->kas = 10;
		cdata->kas = KAS_DEFAULT_VALUE;

		cdata->rab = 6;
		cdata->cmic.multi_port = 1;
@@ -1592,6 +1760,8 @@ spdk_nvmf_ctrlr_set_features(struct spdk_nvmf_request *req)
static int
spdk_nvmf_ctrlr_keep_alive(struct spdk_nvmf_request *req)
{
	struct spdk_nvmf_ctrlr *ctrlr = req->qpair->ctrlr;

	SPDK_DEBUGLOG(SPDK_LOG_NVMF, "Keep Alive\n");
	/*
	 * To handle keep alive just clear or reset the
@@ -1601,6 +1771,8 @@ spdk_nvmf_ctrlr_keep_alive(struct spdk_nvmf_request *req)
	 * keep alive has exceeded the max duration and
	 * take appropriate action.
	 */
	ctrlr->last_keep_alive_tick = spdk_get_ticks();

	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}

+4 −0
Original line number Diff line number Diff line
@@ -236,6 +236,10 @@ struct spdk_nvmf_ctrlr {
	uint16_t changed_ns_list_count;
	struct spdk_nvme_ns_list changed_ns_list;

	/* Time to trigger keep-alive--poller_time = now_tick + period */
	uint64_t last_keep_alive_tick;
	struct spdk_poller			*keep_alive_poller;

	TAILQ_ENTRY(spdk_nvmf_ctrlr)		link;
};

+7 −0
Original line number Diff line number Diff line
@@ -120,6 +120,12 @@ DEFINE_STUB(spdk_nvmf_transport_qpair_set_sqsize,
	    (struct spdk_nvmf_qpair *qpair),
	    0);

int
spdk_nvmf_qpair_disconnect(struct spdk_nvmf_qpair *qpair, nvmf_qpair_disconnect_cb cb_fn, void *ctx)
{
	return 0;
}

void
spdk_nvmf_bdev_ctrlr_identify_ns(struct spdk_nvmf_ns *ns, struct spdk_nvme_ns_data *nsdata)
{
@@ -315,6 +321,7 @@ test_connect(void)
	CU_ASSERT(rc == SPDK_NVMF_REQUEST_EXEC_STATUS_ASYNCHRONOUS);
	CU_ASSERT(nvme_status_success(&rsp.nvme_cpl.status));
	CU_ASSERT(qpair.ctrlr != NULL);
	spdk_nvmf_ctrlr_stop_keep_alive_timer(qpair.ctrlr);
	spdk_bit_array_free(&qpair.ctrlr->qpair_mask);
	free(qpair.ctrlr);
	qpair.ctrlr = NULL;