Commit 982c25fe authored by Changpeng Liu's avatar Changpeng Liu Committed by Tomasz Zawadzki
Browse files

nvmf: add spdk_nvmf_ctrlr_[save|restore]_migr_data() APIs



When doing live migration, there are some spdk_nvmf_ctrlr internal
data structures which need to be saved/restored, these data
structures are designed only for vfio-user transport, for
the purpose to extend them to support other vendor
specific transports, here we move them as public APIs,
users can use SAVE|RESTORE to restore a new nvmf controller
based on original one.

And remove the register from vfio-user transport, these registers
are stored in the common nvmf library.

Change-Id: I9f5847ef427f7064f8e16adcc963dc6b4a35f235
Signed-off-by: default avatarJacek Kalwas <jacek.kalwas@intel.com>
Signed-off-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11059


Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarThanos Makatos <thanos.makatos@nutanix.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 04dd0282
Loading
Loading
Loading
Loading
+95 −0
Original line number Diff line number Diff line
@@ -22,6 +22,11 @@
/* The maximum number of buffers per request */
#define NVMF_REQ_MAX_BUFFERS	(SPDK_NVMF_MAX_SGL_ENTRIES * 2 + 1)

/* Maximum pending AERs that can be migrated */
#define SPDK_NVMF_MIGR_MAX_PENDING_AERS 256

#define SPDK_NVMF_MAX_ASYNC_EVENTS 4

/* AIO backend requires block size aligned data buffers,
 * extra 4KiB aligned data buffer should work for most devices.
 */
@@ -415,6 +420,7 @@ struct spdk_nvmf_registers {
	uint64_t			asq;
	uint64_t			acq;
};
SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_registers) == 40, "Incorrect size");

const struct spdk_nvmf_registers *spdk_nvmf_ctrlr_get_regs(struct spdk_nvmf_ctrlr *ctrlr);

@@ -467,6 +473,95 @@ spdk_nvmf_ctrlr_get_subsystem(struct spdk_nvmf_ctrlr *ctrlr);
 */
uint16_t spdk_nvmf_ctrlr_get_id(struct spdk_nvmf_ctrlr *ctrlr);

struct spdk_nvmf_ctrlr_feat {
	union spdk_nvme_feat_arbitration arbitration;
	union spdk_nvme_feat_power_management power_management;
	union spdk_nvme_feat_error_recovery error_recovery;
	union spdk_nvme_feat_volatile_write_cache volatile_write_cache;
	union spdk_nvme_feat_number_of_queues number_of_queues;
	union spdk_nvme_feat_interrupt_coalescing interrupt_coalescing;
	union spdk_nvme_feat_interrupt_vector_configuration interrupt_vector_configuration;
	union spdk_nvme_feat_write_atomicity write_atomicity;
	union spdk_nvme_feat_async_event_configuration async_event_configuration;
	union spdk_nvme_feat_keep_alive_timer keep_alive_timer;
};
SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_ctrlr_feat) == 40, "Incorrect size");

/* Migration data structure used to save & restore a NVMe-oF controller. */
struct spdk_nvmf_ctrlr_migr_data {
	/* `data_size` is valid size of `spdk_nvmf_ctrlr_migr_data` without counting `unused`.
	 * We use this field to migrate `spdk_nvmf_ctrlr_migr_data` from source VM and restore
	 * it in destination VM.
	 */
	uint32_t data_size;
	/* `regs_size` is valid size of `spdk_nvmf_registers`. */
	uint32_t regs_size;
	/* `feat_size` is valid size of `spdk_nvmf_ctrlr_feat`. */
	uint32_t feat_size;
	uint32_t reserved;

	struct spdk_nvmf_registers regs;
	uint8_t regs_reserved[216];

	struct spdk_nvmf_ctrlr_feat feat;
	uint8_t feat_reserved[216];

	uint16_t cntlid;
	uint8_t acre;
	uint8_t num_aer_cids;
	uint32_t num_async_events;

	union spdk_nvme_async_event_completion async_events[SPDK_NVMF_MIGR_MAX_PENDING_AERS];
	uint16_t aer_cids[SPDK_NVMF_MAX_ASYNC_EVENTS];
	uint64_t notice_aen_mask;

	uint8_t unused[2516];
};
SPDK_STATIC_ASSERT(offsetof(struct spdk_nvmf_ctrlr_migr_data,
			    regs) - offsetof(struct spdk_nvmf_ctrlr_migr_data, data_size) == 16, "Incorrect header size");
SPDK_STATIC_ASSERT(offsetof(struct spdk_nvmf_ctrlr_migr_data,
			    feat) - offsetof(struct spdk_nvmf_ctrlr_migr_data, regs) == 256, "Incorrect regs size");
SPDK_STATIC_ASSERT(offsetof(struct spdk_nvmf_ctrlr_migr_data,
			    cntlid) - offsetof(struct spdk_nvmf_ctrlr_migr_data, feat) == 256, "Incorrect feat size");
SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_ctrlr_migr_data) == 4096, "Incorrect size");

/**
 * Save the NVMe-oF controller state and configuration.
 *
 * It is allowed to save the data only when the nvmf subystem is in paused
 * state i.e. there are no outstanding cmds in nvmf layer (other than aer),
 * pending async event completions are getting blocked.
 *
 * To preserve thread safety this function must be executed on the same thread
 * the NVMe-OF controller was created.
 *
 * \param ctrlr The NVMe-oF controller
 * \param data The NVMe-oF controller state and configuration to be saved
 *
 * \return 0 on success or a negated errno on failure.
 */
int spdk_nvmf_ctrlr_save_migr_data(struct spdk_nvmf_ctrlr *ctrlr,
				   struct spdk_nvmf_ctrlr_migr_data *data);

/**
 * Restore the NVMe-oF controller state and configuration.
 *
 * It is allowed to restore the data only when the nvmf subystem is in paused
 * state.
 *
 * To preserve thread safety this function must be executed on the same thread
 * the NVMe-OF controller was created.
 *
 * AERs shall be restored using spdk_nvmf_request_exec after this function is executed.
 *
 * \param ctrlr The NVMe-oF controller
 * \param data The NVMe-oF controller state and configuration to be restored
 *
 * \return 0 on success or a negated errno on failure.
 */
int spdk_nvmf_ctrlr_restore_migr_data(struct spdk_nvmf_ctrlr *ctrlr,
				      const struct spdk_nvmf_ctrlr_migr_data *data);

static inline enum spdk_nvme_data_transfer
spdk_nvmf_req_get_xfer(struct spdk_nvmf_request *req) {
	enum spdk_nvme_data_transfer xfer;
+83 −33
Original line number Diff line number Diff line
@@ -282,7 +282,7 @@ static void
nvmf_ctrlr_cdata_init(struct spdk_nvmf_transport *transport, struct spdk_nvmf_subsystem *subsystem,
		      struct spdk_nvmf_ctrlr_data *cdata)
{
	cdata->aerl = NVMF_MAX_ASYNC_EVENTS - 1;
	cdata->aerl = SPDK_NVMF_MAX_ASYNC_EVENTS - 1;
	cdata->kas = KAS_DEFAULT_VALUE;
	cdata->vid = SPDK_PCI_VID_INTEL;
	cdata->ssvid = SPDK_PCI_VID_INTEL;
@@ -1889,66 +1889,116 @@ nvmf_ctrlr_set_features_number_of_queues(struct spdk_nvmf_request *req)
	return SPDK_NVMF_REQUEST_EXEC_STATUS_COMPLETE;
}

int
nvmf_ctrlr_save_aers(struct spdk_nvmf_ctrlr *ctrlr, uint16_t *aer_cids,
		     uint16_t max_aers)
SPDK_STATIC_ASSERT(sizeof(struct spdk_nvmf_ctrlr) == 4920,
		   "Please check migration fields that need to be added or not");

static void
nvmf_ctrlr_migr_data_copy(struct spdk_nvmf_ctrlr_migr_data *data,
			  const struct spdk_nvmf_ctrlr_migr_data *data_src, size_t data_size)
{
	struct spdk_nvmf_request *req;
	uint16_t i;
	assert(data);
	assert(data_src);
	assert(data_size);

	if (!aer_cids || max_aers < ctrlr->nr_aer_reqs) {
		return -EINVAL;
	}
	memcpy(&data->regs, &data_src->regs, spdk_min(data->regs_size, data_src->regs_size));
	memcpy(&data->feat, &data_src->feat, spdk_min(data->feat_size, data_src->feat_size));

	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
		req = ctrlr->aer_req[i];
		aer_cids[i] = req->cmd->nvme_cmd.cid;
	}
#define SET_FIELD(field) \
    if (offsetof(struct spdk_nvmf_ctrlr_migr_data, field) + sizeof(data->field) <= data_size) { \
        data->field = data_src->field; \
    } \

	SET_FIELD(cntlid);
	SET_FIELD(acre);
	SET_FIELD(num_aer_cids);
	SET_FIELD(num_async_events);
	SET_FIELD(notice_aen_mask);
#undef SET_FIELD

	return ctrlr->nr_aer_reqs;
#define SET_ARRAY(arr) \
    if (offsetof(struct spdk_nvmf_ctrlr_migr_data, arr) + sizeof(data->arr) <= data_size) { \
        memcpy(&data->arr, &data_src->arr, sizeof(data->arr)); \
    } \

	SET_ARRAY(async_events);
	SET_ARRAY(aer_cids);
#undef SET_ARRAY
}

int
nvmf_ctrlr_save_migr_data(struct spdk_nvmf_ctrlr *ctrlr, struct nvmf_ctrlr_migr_data *data)
spdk_nvmf_ctrlr_save_migr_data(struct spdk_nvmf_ctrlr *ctrlr,
			       struct spdk_nvmf_ctrlr_migr_data *data)
{
	uint32_t num_async_events = 0;
	struct spdk_nvmf_async_event_completion *event, *event_tmp;
	uint32_t i;
	struct spdk_nvmf_ctrlr_migr_data data_local = {
		.data_size = offsetof(struct spdk_nvmf_ctrlr_migr_data, unused),
		.regs_size = sizeof(struct spdk_nvmf_registers),
		.feat_size = sizeof(struct spdk_nvmf_ctrlr_feat)
	};

	assert(data->data_size <= sizeof(data_local));
	assert(spdk_get_thread() == ctrlr->thread);

	memcpy(&data_local.regs, &ctrlr->vcprop, sizeof(struct spdk_nvmf_registers));
	memcpy(&data_local.feat, &ctrlr->feat, sizeof(struct spdk_nvmf_ctrlr_feat));

	memcpy(&data->feat, &ctrlr->feat, sizeof(struct spdk_nvmf_ctrlr_feat));
	data->cntlid = ctrlr->cntlid;
	data->acre_enabled = ctrlr->acre_enabled;
	data->notice_aen_mask = ctrlr->notice_aen_mask;
	data_local.cntlid = ctrlr->cntlid;
	data_local.acre = ctrlr->acre_enabled;
	data_local.num_aer_cids = ctrlr->nr_aer_reqs;

	STAILQ_FOREACH_SAFE(event, &ctrlr->async_events, link, event_tmp) {
		data->async_events[num_async_events++].raw = event->event.raw;
		if (num_async_events == NVMF_MIGR_MAX_PENDING_AERS) {
			SPDK_ERRLOG("%p has too many pending AERs\n", ctrlr);
		data_local.async_events[data_local.num_async_events++].raw = event->event.raw;
		if (data_local.num_async_events > SPDK_NVMF_MIGR_MAX_PENDING_AERS) {
			SPDK_ERRLOG("ctrlr %p has too many pending AERs\n", ctrlr);
			break;
		}
	}
	data->num_async_events = num_async_events;

	for (i = 0; i < ctrlr->nr_aer_reqs; i++) {
		struct spdk_nvmf_request *req = ctrlr->aer_req[i];
		data_local.aer_cids[i] = req->cmd->nvme_cmd.cid;
	}
	data_local.notice_aen_mask = ctrlr->notice_aen_mask;

	nvmf_ctrlr_migr_data_copy(data, &data_local, spdk_min(data->data_size, data_local.data_size));
	return 0;
}

int
nvmf_ctrlr_restore_migr_data(struct spdk_nvmf_ctrlr *ctrlr, struct nvmf_ctrlr_migr_data *data)
spdk_nvmf_ctrlr_restore_migr_data(struct spdk_nvmf_ctrlr *ctrlr,
				  const struct spdk_nvmf_ctrlr_migr_data *data)
{
	struct spdk_nvmf_async_event_completion *event;
	uint32_t i;
	struct spdk_nvmf_ctrlr_migr_data data_local = {
		.data_size = offsetof(struct spdk_nvmf_ctrlr_migr_data, unused),
		.regs_size = sizeof(struct spdk_nvmf_registers),
		.feat_size = sizeof(struct spdk_nvmf_ctrlr_feat)
	};

	assert(data->data_size <= sizeof(data_local));
	assert(spdk_get_thread() == ctrlr->thread);

	/* local version of data should have defaults set before copy */
	nvmf_ctrlr_migr_data_copy(&data_local, data, spdk_min(data->data_size, data_local.data_size));
	memcpy(&ctrlr->vcprop, &data_local.regs, sizeof(struct spdk_nvmf_registers));
	memcpy(&ctrlr->feat, &data_local.feat, sizeof(struct spdk_nvmf_ctrlr_feat));

	memcpy(&ctrlr->feat, &data->feat, sizeof(struct spdk_nvmf_ctrlr_feat));
	ctrlr->acre_enabled = data->acre_enabled;
	ctrlr->notice_aen_mask = data->notice_aen_mask;
	ctrlr->cntlid = data_local.cntlid;
	ctrlr->acre_enabled = data_local.acre;

	for (i = 0; i < data->num_async_events; i++) {
		event = calloc(1, sizeof(struct spdk_nvmf_async_event_completion));
	for (i = 0; i < data_local.num_async_events; i++) {
		struct spdk_nvmf_async_event_completion *event;

		event = calloc(1, sizeof(*event));
		if (!event) {
			return -ENOMEM;
		}
		event->event.raw = data->async_events[i].raw;

		event->event.raw = data_local.async_events[i].raw;
		STAILQ_INSERT_TAIL(&ctrlr->async_events, event, link);
	}
	ctrlr->notice_aen_mask = data_local.notice_aen_mask;

	return 0;
}
@@ -1977,7 +2027,7 @@ nvmf_ctrlr_async_event_request(struct spdk_nvmf_request *req)
	SPDK_DEBUGLOG(nvmf, "Async Event Request\n");

	/* Four asynchronous events are supported for now */
	if (ctrlr->nr_aer_reqs >= NVMF_MAX_ASYNC_EVENTS) {
	if (ctrlr->nr_aer_reqs >= SPDK_NVMF_MAX_ASYNC_EVENTS) {
		SPDK_DEBUGLOG(nvmf, "AERL exceeded\n");
		rsp->status.sct = SPDK_NVME_SCT_COMMAND_SPECIFIC;
		rsp->status.sc = SPDK_NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED;
+1 −1
Original line number Diff line number Diff line
@@ -1214,7 +1214,7 @@ nvmf_fc_req_bdev_abort(void *arg1)
	 * Connect -> Special case (async. handling). Not sure how to
	 * handle at this point. Let it run to completion.
	 */
	for (i = 0; i < NVMF_MAX_ASYNC_EVENTS; i++) {
	for (i = 0; i < SPDK_NVMF_MAX_ASYNC_EVENTS; i++) {
		if (ctrlr->aer_req[i] == &fc_req->req) {
			SPDK_NOTICELOG("Abort AER request\n");
			nvmf_qpair_free_aer(fc_req->req.qpair);
+1 −44
Original line number Diff line number Diff line
@@ -20,8 +20,6 @@
#include "spdk/util.h"
#include "spdk/thread.h"

#define NVMF_MAX_ASYNC_EVENTS	(4)

/* The spec reserves cntlid values in the range FFF0h to FFFFh. */
#define NVMF_MIN_CNTLID 1
#define NVMF_MAX_CNTLID 0xFFEF
@@ -169,19 +167,6 @@ struct spdk_nvmf_ns {
	bool zcopy;
};

struct spdk_nvmf_ctrlr_feat {
	union spdk_nvme_feat_arbitration arbitration;
	union spdk_nvme_feat_power_management power_management;
	union spdk_nvme_feat_error_recovery error_recovery;
	union spdk_nvme_feat_volatile_write_cache volatile_write_cache;
	union spdk_nvme_feat_number_of_queues number_of_queues;
	union spdk_nvme_feat_interrupt_coalescing interrupt_coalescing;
	union spdk_nvme_feat_interrupt_vector_configuration interrupt_vector_configuration;
	union spdk_nvme_feat_write_atomicity write_atomicity;
	union spdk_nvme_feat_async_event_configuration async_event_configuration;
	union spdk_nvme_feat_keep_alive_timer keep_alive_timer;
};

/*
 * NVMf reservation notification log page.
 */
@@ -220,7 +205,7 @@ struct spdk_nvmf_ctrlr {

	const struct spdk_nvmf_subsystem_listener	*listener;

	struct spdk_nvmf_request *aer_req[NVMF_MAX_ASYNC_EVENTS];
	struct spdk_nvmf_request *aer_req[SPDK_NVMF_MAX_ASYNC_EVENTS];
	STAILQ_HEAD(, spdk_nvmf_async_event_completion) async_events;
	uint64_t notice_aen_mask;
	uint8_t nr_aer_reqs;
@@ -254,29 +239,6 @@ struct spdk_nvmf_ctrlr {
	TAILQ_ENTRY(spdk_nvmf_ctrlr)	link;
};

/* Maximum pending AERs that can be migrated */
#define NVMF_MIGR_MAX_PENDING_AERS 256

/* spdk_nvmf_ctrlr private migration data structure used to save/restore a controller */
struct nvmf_ctrlr_migr_data {
	uint32_t				opts_size;

	uint16_t				cntlid;
	uint8_t					reserved1[2];

	struct spdk_nvmf_ctrlr_feat		feat;
	uint32_t				reserved2[2];

	uint32_t				num_async_events;
	uint32_t				acre_enabled;
	uint64_t				notice_aen_mask;
	union spdk_nvme_async_event_completion	async_events[NVMF_MIGR_MAX_PENDING_AERS];

	/* New fields shouldn't go after reserved3 */
	uint8_t					reserved3[3000];
};
SPDK_STATIC_ASSERT(sizeof(struct nvmf_ctrlr_migr_data) == 0x1000, "Incorrect size");

#define NVMF_MAX_LISTENERS_PER_SUBSYSTEM	16

struct spdk_nvmf_subsystem {
@@ -442,11 +404,6 @@ void nvmf_ctrlr_reservation_notice_log(struct spdk_nvmf_ctrlr *ctrlr,
 * the host to send a subsequent AER.
 */
void nvmf_ctrlr_abort_aer(struct spdk_nvmf_ctrlr *ctrlr);
int nvmf_ctrlr_save_aers(struct spdk_nvmf_ctrlr *ctrlr, uint16_t *aer_cids,
			 uint16_t max_aers);

int nvmf_ctrlr_save_migr_data(struct spdk_nvmf_ctrlr *ctrlr, struct nvmf_ctrlr_migr_data *data);
int nvmf_ctrlr_restore_migr_data(struct spdk_nvmf_ctrlr *ctrlr, struct nvmf_ctrlr_migr_data *data);

/*
 * Abort zero-copy requests that already got the buffer (received zcopy_start cb), but haven't
+2 −0
Original line number Diff line number Diff line
@@ -114,6 +114,8 @@
	spdk_nvmf_request_zcopy_end;
	spdk_nvmf_ctrlr_get_subsystem;
	spdk_nvmf_ctrlr_get_id;
	spdk_nvmf_ctrlr_save_migr_data;
	spdk_nvmf_ctrlr_restore_migr_data;
	spdk_nvmf_req_get_xfer;
	spdk_nvmf_poll_group_remove;

Loading