Commit f6e62d2c authored by Ben Walker's avatar Ben Walker
Browse files

nvmf: Remove direct mode



There is now only virtual mode. Virtual mode has been
improved enough to reach feature parity with direct
mode and performance benchmarks show no degradation.
Simplify the code by always using virtual mode.

Change-Id: Id5cdb5d4d8c54e661b245ed7250c2f9d66ca2152
Signed-off-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.gerrithub.io/369496


Tested-by: default avatarSPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: default avatarDaniel Verkamp <daniel.verkamp@intel.com>
parent 7e9f5563
Loading
Loading
Loading
Loading
+42 −154
Original line number Diff line number Diff line
@@ -133,7 +133,6 @@ spdk_add_nvmf_discovery_subsystem(void)
	struct nvmf_tgt_subsystem *app_subsys;

	app_subsys = nvmf_tgt_create_subsystem(SPDK_NVMF_DISCOVERY_NQN, SPDK_NVMF_SUBTYPE_DISCOVERY,
					       NVMF_SUBSYSTEM_MODE_DIRECT,
					       g_spdk_nvmf_tgt_conf.acceptor_lcore);
	if (app_subsys == NULL) {
		SPDK_ERRLOG("Failed creating discovery nvmf library subsystem\n");
@@ -224,70 +223,6 @@ spdk_nvmf_parse_nvmf_tgt(void)
	return 0;
}

static bool
probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
	 struct spdk_nvme_ctrlr_opts *opts)
{
	struct spdk_nvmf_probe_ctx *ctx = cb_ctx;

	if (ctx->any && !ctx->found) {
		ctx->found = true;
		return true;
	}

	if (strcmp(trid->traddr, ctx->trid.traddr) == 0) {
		ctx->found = true;
		return true;
	}

	return false;
}

static void
attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
	  struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts)
{
	struct spdk_nvmf_probe_ctx *ctx = cb_ctx;
	int rc;
	int numa_node = -1;
	struct spdk_pci_addr pci_addr;
	struct spdk_pci_device *pci_dev;

	spdk_pci_addr_parse(&pci_addr, trid->traddr);
	if (ctx->trid.traddr[0] != '\0' && strcmp(trid->traddr, ctx->trid.traddr)) {
		SPDK_WARNLOG("Attached device is not expected\n");
		return;
	}
	SPDK_NOTICELOG("Attaching NVMe device %p at %s to subsystem %s\n",
		       ctrlr,
		       trid->traddr,
		       spdk_nvmf_subsystem_get_nqn(ctx->app_subsystem->subsystem));

	pci_dev = spdk_pci_get_device(&pci_addr);
	if (pci_dev) {
		numa_node = spdk_pci_device_get_socket_id(pci_dev);
	}
	if (numa_node >= 0) {
		/* Running subsystem and NVMe device is on the same socket or not */
		if (spdk_env_get_socket_id(ctx->app_subsystem->lcore) != (unsigned)numa_node) {
			SPDK_WARNLOG("Subsystem %s is configured to run on a CPU core %u belonging "
				     "to a different NUMA node than the associated NVMe device. "
				     "This may result in reduced performance.\n",
				     spdk_nvmf_subsystem_get_nqn(ctx->app_subsystem->subsystem),
				     ctx->app_subsystem->lcore);
			SPDK_WARNLOG("The NVMe device is on socket %u\n", numa_node);
			SPDK_WARNLOG("The Subsystem is on socket %u\n",
				     spdk_env_get_socket_id(ctx->app_subsystem->lcore));
		}
	}

	rc = nvmf_subsystem_add_ctrlr(ctx->app_subsystem->subsystem, ctrlr, &pci_addr);
	if (rc < 0) {
		SPDK_ERRLOG("Failed to add controller to subsystem\n");
	}
	ctx->found = true;
}

static int
spdk_nvmf_allocate_lcore(uint64_t mask, uint32_t lcore)
{
@@ -312,7 +247,7 @@ spdk_nvmf_allocate_lcore(uint64_t mask, uint32_t lcore)
static int
spdk_nvmf_parse_subsystem(struct spdk_conf_section *sp)
{
	const char *nqn, *mode_str;
	const char *nqn, *mode;
	int i, ret;
	int lcore;
	int num_listen_addrs;
@@ -320,15 +255,29 @@ spdk_nvmf_parse_subsystem(struct spdk_conf_section *sp)
	char *listen_addrs_str[MAX_LISTEN_ADDRESSES] = {};
	int num_hosts;
	char *hosts[MAX_HOSTS];
	const char *bdf;
	const char *sn;
	int num_devs;
	char *devs[MAX_VIRTUAL_NAMESPACE];

	nqn = spdk_conf_section_get_val(sp, "NQN");
	mode_str = spdk_conf_section_get_val(sp, "Mode");
	mode = spdk_conf_section_get_val(sp, "Mode");
	lcore = spdk_conf_section_get_intval(sp, "Core");

	/* Mode is no longer a valid parameter, but print out a nice
	 * message if it exists to inform users.
	 */
	if (mode) {
		SPDK_NOTICELOG("Mode present in the [Subsystem] section of the config file.\n"
			       "Mode was removed as a valid parameter.\n");
		if (strcasecmp(mode, "Virtual")) {
			SPDK_NOTICELOG("Your mode value is 'Virtual' which is now the only possible mode.\n"
				       "Your configuration file will work as expected.\n");
		} else {
			SPDK_NOTICELOG("Please remove Mode from your configuration file.\n");
			return -1;
		}
	}

	/* Parse Listen sections */
	num_listen_addrs = 0;
	for (i = 0; i < MAX_LISTEN_ADDRESSES; i++) {
@@ -366,7 +315,6 @@ spdk_nvmf_parse_subsystem(struct spdk_conf_section *sp)
	}
	num_hosts = i;

	bdf = spdk_conf_section_get_val(sp, "NVMe");
	sn = spdk_conf_section_get_val(sp, "SN");

	num_devs = 0;
@@ -379,10 +327,10 @@ spdk_nvmf_parse_subsystem(struct spdk_conf_section *sp)
		num_devs++;
	}

	ret = spdk_nvmf_construct_subsystem(nqn, mode_str, lcore,
	ret = spdk_nvmf_construct_subsystem(nqn, lcore,
					    num_listen_addrs, listen_addrs,
					    num_hosts, hosts,
					    bdf, sn,
					    sn,
					    num_devs, devs);

	for (i = 0; i < MAX_LISTEN_ADDRESSES; i++) {
@@ -432,18 +380,18 @@ spdk_nvmf_parse_conf(void)
}

int
spdk_nvmf_construct_subsystem(const char *name,
			      const char *mode_str, int32_t lcore,
spdk_nvmf_construct_subsystem(const char *name, int32_t lcore,
			      int num_listen_addresses, struct rpc_listen_address *addresses,
			      int num_hosts, char *hosts[], const char *bdf,
			      int num_hosts, char *hosts[],
			      const char *sn, int num_devs, char *dev_list[])
{
	struct spdk_nvmf_subsystem *subsystem;
	struct nvmf_tgt_subsystem *app_subsys;
	struct spdk_nvmf_listen_addr *listen_addr;
	enum spdk_nvmf_subsystem_mode mode;
	int i;
	uint64_t mask;
	struct spdk_bdev *bdev;
	const char *namespace;

	if (name == NULL) {
		SPDK_ERRLOG("No NQN specified for subsystem\n");
@@ -469,23 +417,7 @@ spdk_nvmf_construct_subsystem(const char *name,
	lcore = spdk_nvmf_allocate_lcore(mask, lcore);
	g_last_core = lcore;

	/* Determine the mode the subsysem will operate in */
	if (mode_str == NULL) {
		SPDK_ERRLOG("No Mode specified for Subsystem %s\n", name);
		return -1;
	}

	if (strcasecmp(mode_str, "Direct") == 0) {
		mode = NVMF_SUBSYSTEM_MODE_DIRECT;
	} else if (strcasecmp(mode_str, "Virtual") == 0) {
		mode = NVMF_SUBSYSTEM_MODE_VIRTUAL;
	} else {
		SPDK_ERRLOG("Invalid Subsystem mode: %s\n", mode_str);
		return -1;
	}

	app_subsys = nvmf_tgt_create_subsystem(name, SPDK_NVMF_SUBTYPE_NVME,
					       mode, lcore);
	app_subsys = nvmf_tgt_create_subsystem(name, SPDK_NVMF_SUBTYPE_NVME, lcore);
	if (app_subsys == NULL) {
		SPDK_ERRLOG("Subsystem creation failed\n");
		return -1;
@@ -544,49 +476,6 @@ spdk_nvmf_construct_subsystem(const char *name,
		spdk_nvmf_subsystem_add_host(subsystem, hosts[i]);
	}

	if (mode == NVMF_SUBSYSTEM_MODE_DIRECT) {
		struct spdk_nvmf_probe_ctx ctx = { 0 };
		struct spdk_nvme_transport_id trid = {};
		struct spdk_pci_addr pci_addr = {};

		if (bdf == NULL) {
			SPDK_ERRLOG("Subsystem %s: missing NVMe directive\n", name);
			goto error;
		}

		if (num_devs != 0) {
			SPDK_ERRLOG("Subsystem %s: Namespaces not allowed for Direct mode\n", name);
			goto error;
		}

		trid.trtype = SPDK_NVME_TRANSPORT_PCIE;
		ctx.app_subsystem = app_subsys;
		ctx.found = false;
		if (strcmp(bdf, "*") == 0) {
			ctx.any = true;
		} else {
			if (spdk_pci_addr_parse(&pci_addr, bdf) < 0) {
				SPDK_ERRLOG("Invalid format for NVMe BDF: %s\n", bdf);
				goto error;
			}
			ctx.any = false;
			spdk_pci_addr_fmt(trid.traddr, sizeof(trid.traddr), &pci_addr);
			ctx.trid = trid;
		}

		if (spdk_nvme_probe(&trid, &ctx, probe_cb, attach_cb, NULL)) {
			SPDK_ERRLOG("One or more controllers failed in spdk_nvme_probe()\n");
		}

		if (!ctx.found) {
			SPDK_ERRLOG("Could not find NVMe controller at PCI address %04x:%02x:%02x.%x\n",
				    pci_addr.domain, pci_addr.bus, pci_addr.dev, pci_addr.func);
			goto error;
		}
	} else {
		struct spdk_bdev *bdev;
		const char *namespace;

	if (sn == NULL) {
		SPDK_ERRLOG("Subsystem %s: missing serial number\n", name);
		goto error;
@@ -616,7 +505,6 @@ spdk_nvmf_construct_subsystem(const char *name,
			       spdk_bdev_get_name(bdev), spdk_nvmf_subsystem_get_nqn(subsystem));

	}
	}

	nvmf_tgt_start_subsystem(app_subsys);

+34 −38
Original line number Diff line number Diff line
@@ -58,14 +58,6 @@ dump_nvmf_subsystem(struct spdk_json_write_ctx *w, struct nvmf_tgt_subsystem *tg

	spdk_json_write_name(w, "nqn");
	spdk_json_write_string(w, spdk_nvmf_subsystem_get_nqn(subsystem));
	if (spdk_nvmf_subsystem_get_type(subsystem) == SPDK_NVMF_SUBTYPE_NVME) {
		spdk_json_write_name(w, "mode");
		if (spdk_nvmf_subsystem_get_mode(subsystem) == NVMF_SUBSYSTEM_MODE_DIRECT) {
			spdk_json_write_string(w, "direct");
		} else {
			spdk_json_write_string(w, "virtual");
		}
	}
	spdk_json_write_name(w, "subtype");
	if (spdk_nvmf_subsystem_get_type(subsystem) == SPDK_NVMF_SUBTYPE_NVME) {
		spdk_json_write_string(w, "NVMe");
@@ -112,22 +104,14 @@ dump_nvmf_subsystem(struct spdk_json_write_ctx *w, struct nvmf_tgt_subsystem *tg
	spdk_json_write_array_end(w);

	if (spdk_nvmf_subsystem_get_type(subsystem) == SPDK_NVMF_SUBTYPE_NVME) {
		if (spdk_nvmf_subsystem_get_mode(subsystem) == NVMF_SUBSYSTEM_MODE_DIRECT) {
			spdk_json_write_name(w, "pci_address");
			spdk_json_write_string_fmt(w, "%04x:%02x:%02x.%x",
						   subsystem->dev.direct.pci_addr.domain,
						   subsystem->dev.direct.pci_addr.bus,
						   subsystem->dev.direct.pci_addr.dev,
						   subsystem->dev.direct.pci_addr.func);
		} else {
		uint32_t i;

		spdk_json_write_name(w, "serial_number");
		spdk_json_write_string(w, spdk_nvmf_subsystem_get_sn(subsystem));
		spdk_json_write_name(w, "namespaces");
		spdk_json_write_array_begin(w);
			for (i = 0; i < subsystem->dev.virt.max_nsid; i++) {
				if (subsystem->dev.virt.ns_list[i] == NULL) {
		for (i = 0; i < subsystem->dev.max_nsid; i++) {
			if (subsystem->dev.ns_list[i] == NULL) {
				continue;
			}

@@ -135,12 +119,10 @@ dump_nvmf_subsystem(struct spdk_json_write_ctx *w, struct nvmf_tgt_subsystem *tg
			spdk_json_write_name(w, "nsid");
			spdk_json_write_int32(w, i + 1);
			spdk_json_write_name(w, "name");
				spdk_json_write_string(w, spdk_bdev_get_name(subsystem->dev.virt.ns_list[i]));
			spdk_json_write_string(w, spdk_bdev_get_name(subsystem->dev.ns_list[i]));
			spdk_json_write_object_end(w);
		}
		spdk_json_write_array_end(w);

		}
	}
	spdk_json_write_object_end(w);
}
@@ -299,11 +281,10 @@ free_rpc_subsystem(struct rpc_subsystem *req)

static const struct spdk_json_object_decoder rpc_subsystem_decoders[] = {
	{"core", offsetof(struct rpc_subsystem, core), spdk_json_decode_int32, true},
	{"mode", offsetof(struct rpc_subsystem, mode), spdk_json_decode_string},
	{"mode", offsetof(struct rpc_subsystem, mode), spdk_json_decode_string, true},
	{"nqn", offsetof(struct rpc_subsystem, nqn), spdk_json_decode_string},
	{"listen_addresses", offsetof(struct rpc_subsystem, listen_addresses), decode_rpc_listen_addresses},
	{"hosts", offsetof(struct rpc_subsystem, hosts), decode_rpc_hosts, true},
	{"pci_address", offsetof(struct rpc_subsystem, pci_address), spdk_json_decode_string, true},
	{"serial_number", offsetof(struct rpc_subsystem, serial_number), spdk_json_decode_string, true},
	{"namespaces", offsetof(struct rpc_subsystem, namespaces), decode_rpc_dev_names, true},
};
@@ -324,10 +305,25 @@ spdk_rpc_construct_nvmf_subsystem(struct spdk_jsonrpc_request *request,
		goto invalid;
	}

	ret = spdk_nvmf_construct_subsystem(req.nqn, req.mode, req.core,
	/* Mode is no longer a valid parameter, but print out a nice
	 * message if it exists to inform users.
	 */
	if (req.mode) {
		SPDK_NOTICELOG("Mode present in the construct NVMe-oF subsystem RPC.\n"
			       "Mode was removed as a valid parameter.\n");
		if (strcasecmp(req.mode, "Virtual")) {
			SPDK_NOTICELOG("Your mode value is 'Virtual' which is now the only possible mode.\n"
				       "Your RPC will work as expected.\n");
		} else {
			SPDK_NOTICELOG("Please remove 'mode' from the RPC.\n");
			goto invalid;
		}
	}

	ret = spdk_nvmf_construct_subsystem(req.nqn, req.core,
					    req.listen_addresses.num_listen_address,
					    req.listen_addresses.addresses,
					    req.hosts.num_hosts, req.hosts.hosts, req.pci_address,
					    req.hosts.num_hosts, req.hosts.hosts,
					    req.serial_number,
					    req.namespaces.num_names, req.namespaces.names);
	if (ret) {
+2 −2
Original line number Diff line number Diff line
@@ -184,7 +184,7 @@ nvmf_tgt_start_subsystem(struct nvmf_tgt_subsystem *app_subsys)

struct nvmf_tgt_subsystem *
nvmf_tgt_create_subsystem(const char *name, enum spdk_nvmf_subtype subtype,
			  enum spdk_nvmf_subsystem_mode mode, uint32_t lcore)
			  uint32_t lcore)
{
	struct spdk_nvmf_subsystem *subsystem;
	struct nvmf_tgt_subsystem *app_subsys;
@@ -200,7 +200,7 @@ nvmf_tgt_create_subsystem(const char *name, enum spdk_nvmf_subtype subtype,
		return NULL;
	}

	subsystem = spdk_nvmf_create_subsystem(name, subtype, mode, app_subsys, connect_cb,
	subsystem = spdk_nvmf_create_subsystem(name, subtype, app_subsys, connect_cb,
					       disconnect_cb);
	if (subsystem == NULL) {
		SPDK_ERRLOG("Subsystem creation failed\n");
+2 −3
Original line number Diff line number Diff line
@@ -75,14 +75,13 @@ void nvmf_tgt_start_subsystem(struct nvmf_tgt_subsystem *subsystem);

struct nvmf_tgt_subsystem *nvmf_tgt_create_subsystem(const char *name,
		enum spdk_nvmf_subtype subtype,
		enum spdk_nvmf_subsystem_mode mode,
		uint32_t lcore);

int
spdk_nvmf_construct_subsystem(const char *name,
			      const char *mode, int32_t lcore,
			      int32_t lcore,
			      int num_listen_addresses, struct rpc_listen_address *addresses,
			      int num_hosts, char *hosts[], const char *bdf,
			      int num_hosts, char *hosts[],
			      const char *sn, int num_devs, char *dev_list[]);

int
+10 −9
Original line number Diff line number Diff line
@@ -148,21 +148,24 @@ ReactorMask 0xF000000
the [Subsystem] section of the configuration file. For example,
to assign the Subsystems to lcores 25 and 26:
~~~{.sh}
[Nvme]
TransportID "trtype:PCIe traddr:0000:02:00.0" Nvme0
TransportID "trtype:PCIe traddr:0000:82:00.0" Nvme1

[Subsystem1]
NQN nqn.2016-06.io.spdk:cnode1
Core 25
Mode Direct
Listen RDMA 192.168.100.8:4420
Host nqn.2016-06.io.spdk:init
NVMe 0000:81:00.0
SN SPDK00000000000001
Namespace Nvme0n1

[Subsystem2]
NQN nqn.2016-06.io.spdk:cnode2
Core 26
Mode Direct
Listen RDMA 192.168.100.9:4420
Host nqn.2016-06.io.spdk:init
NVMe 0000:86:00.0
SN SPDK00000000000002
Namespace Nvme1n1
~~~
SPDK executes all code for an NVMe-oF subsystem on a single thread. Different subsystems may execute
on different threads. SPDK gives the user maximum control to determine how many CPU cores are used
@@ -178,15 +181,13 @@ file as follows:

**Create malloc LUNs:** See @ref bdev_getting_started for details on creating Malloc block devices.

**Create a virtual controller:** Virtual mode allows any SPDK block device to be presented as an
NVMe-oF namespace. These block devices don't need to be NVMe devices. For example, to create a
virtual controller for malloc LUNs named Malloc0 and Malloc1:
**Create a virtual controller:** Any bdev may be presented as a namespace. For example, to create a
virtual controller with two namespaces backed by the malloc LUNs named Malloc0 and Malloc1:
~~~{.sh}
# Virtual controller
[Subsystem2]
  NQN nqn.2016-06.io.spdk:cnode2
  Core 0
  Mode Virtual
  Listen RDMA 192.168.2.21:4420
  Host nqn.2016-06.io.spdk:init
  SN SPDK00000000000001
Loading