Commit 0829424e authored by Ben Walker's avatar Ben Walker
Browse files

conf: No longer allow wildcard claiming of NVMe devices



All devices must be specified by BDF. Add support for scripts
to use lspci to grab the available NVMe device BDFs for the
current machine.

Change-Id: I4a53b335e3d516629f050ae1b2ab7aff8dd7f568
Signed-off-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 8fefa7e9
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -17,6 +17,22 @@ The `identify` and `perf` NVMe examples were modified to add a consistent format
specifying remote NVMe over Fabrics devices via the `-r` option.
This is implemented using the new `spdk_nvme_transport_id_parse()` function.

### iSCSI Target

The [Nvme] section of the configuration file was modified to remove the `BDF` directive
and replace it with a `TransportID` directive. Both local (PCIe) and remote (NVMe-oF)
devices can now be specified as the backing block device. A script to generate an
entire [Nvme] section based on the local NVMe devices attached was added at
`scripts/gen_nvme.sh`.

### NVMe-oF Target

The [Nvme] section of the configuration file was modified to remove the `BDF` directive
and replace it with a `TransportID` directive. Both local (PCIe) and remote (NVMe-oF)
devices can now be specified as the backing block device. A script to generate an
entire [Nvme] section based on the local NVMe devices attached was added at
`scripts/gen_nvme.sh`.

## v16.12: NVMe over Fabrics host, hotplug, and multi-process

### NVMe library
+6 −13
Original line number Diff line number Diff line
@@ -100,23 +100,16 @@ the kernel to avoid interrupts and context switching.
~~~
[Nvme]
  # NVMe Device Whitelist
  # Users may specify which NVMe devices to claim by their PCI
  # domain, bus, device, and function. The format is dddd:bb:dd.f, which is
  # the same format displayed by lspci or in /sys/bus/pci/devices. The second
  # argument is a "name" for the device that can be anything. The name
  # is referenced later in the Subsystem section.
  #
  # Alternatively, the user can specify ClaimAllDevices. All
  # NVMe devices will be claimed.
  BDF 0000:00:00.0
  BDF 0000:01:00.0
  # Users may specify which NVMe devices to claim by their transport id.
  # See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.
  # The devices will be assigned names in the format NvmeXnY, where X starts at 0 and
  # increases by 1 for each entry and Y is the namespace id, which starts at 1.
  TransportID "trtype:PCIe traddr:0000:00:00.0"
  TransportID "trtype:PCIe traddr:0000:01:00.0"

  # The number of attempts per I/O when an I/O fails. Do not include
  # this key to get the default behavior.
  NvmeRetryCount 4
  # The maximum number of NVMe controllers to claim. Do not include this key to
  # claim all of them.
  NumControllers 2

[TargetNodeX]
  # other TargetNode parameters go here (TargetName, Mapping, etc.)
+7 −14
Original line number Diff line number Diff line
@@ -88,23 +88,16 @@
# NVMe configuration options
[Nvme]
  # NVMe Device Whitelist
# Users may specify which NVMe devices to claim by their PCI
# domain, bus, device, and function. The format is dddd:bb:dd.f, which is
# the same format displayed by lspci or in /sys/bus/pci/devices. The second
# argument is a "name" for the device that can be anything. The name
# is referenced later in the Subsystem section.
#
# Alternatively, the user can specify ClaimAllDevices. All
# NVMe devices will be claimed and named Nvme0, Nvme1, etc.
  BDF 0000:00:00.0 Nvme0
  BDF 0000:01:00.0 Nvme1
  # Users may specify which NVMe devices to claim by their transport id.
  # See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.
  # The devices will be assigned names in the format NvmeXnY, where X starts at 0 and
  # increases by 1 for each entry and Y is the namespace id, which starts at 1.
  TransportID "trtype:PCIe traddr:0000:00:00.0"
  TransportID "trtype:PCIe traddr:0000:01:00.0"

  # The number of attempts per I/O when an I/O fails. Do not include
  # this key to get the default behavior.
  NvmeRetryCount 4
  # The maximum number of NVMe controllers to claim. Do not include this key to
  # claim all of them.
  NumControllers 2
  # Registers the application to receive timeout callback and to reset the controller.
  ResetControllerOnTimeout Yes
  # Timeout value.
+6 −11
Original line number Diff line number Diff line
@@ -67,17 +67,12 @@
# NVMe configuration options
[Nvme]
  # NVMe Device Whitelist
  # Users may specify which NVMe devices to claim by their PCI
  # domain, bus, device, and function. The format is dddd:bb:dd.f, which is
  # the same format displayed by lspci or in /sys/bus/pci/devices. The second
  # argument is a "name" for the device that can be anything. The name
  # is referenced later in the Subsystem section.
  #
  # Alternatively, the user can specify ClaimAllDevices. All
  # NVMe devices will be claimed and named Nvme0, Nvme1, etc.
  #BDF 0000:81:00.0 Nvme0
  #BDF 0000:01:00.0 Nvme1
  ClaimAllDevices
  # Users may specify which NVMe devices to claim by their transport id.
  # See spdk_nvme_transport_id_parse() in spdk/nvme.h for the correct format.
  # The devices will be assigned names in the format NvmeXnY, where X starts at 0 and
  # increases by 1 for each entry and Y is the namespace id, which starts at 1.
  TransportID "trtype:PCIe traddr:0000:00:00.0"
  TransportID "trtype:PCIe traddr:0000:01:00.0"

  # The number of attempts per I/O when an I/O fails. Do not include
  # this key to get the default behavior.
+75 −76
Original line number Diff line number Diff line
@@ -62,7 +62,7 @@ struct nvme_ctrlr {
	 *  target for CONTROLLER IDENTIFY command during initialization
	 */
	struct spdk_nvme_ctrlr		*ctrlr;
	struct spdk_pci_addr		pci_addr;
	struct spdk_nvme_transport_id	trid;

	struct spdk_poller		*adminq_timer_poller;

@@ -106,9 +106,8 @@ enum data_direction {
};

struct nvme_probe_ctx {
	int controllers_remaining;
	int num_whitelist_controllers;
	struct spdk_pci_addr whitelist[NVME_MAX_CONTROLLERS];
	size_t count;
	struct spdk_nvme_transport_id trids[NVME_MAX_CONTROLLERS];
};

static int nvme_controller_index = 0;
@@ -399,10 +398,39 @@ bdev_nvme_dump_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w
	spdk_json_write_name(w, "nvme");
	spdk_json_write_object_begin(w);

	if (nvme_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
		spdk_json_write_name(w, "pci_address");
	spdk_json_write_string_fmt(w, "%04x:%02x:%02x.%x", nvme_ctrlr->pci_addr.domain,
				   nvme_ctrlr->pci_addr.bus, nvme_ctrlr->pci_addr.dev,
				   nvme_ctrlr->pci_addr.func);
		spdk_json_write_string(w, nvme_ctrlr->trid.traddr);
	}

	spdk_json_write_name(w, "trid");
	spdk_json_write_object_begin(w);

	spdk_json_write_name(w, "trtype");
	if (nvme_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_PCIE) {
		spdk_json_write_string(w, "PCIe");
	} else if (nvme_ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_RDMA) {
		spdk_json_write_string(w, "RDMA");
	} else {
		spdk_json_write_string(w, "Unknown");
	}

	if (nvme_ctrlr->trid.traddr) {
		spdk_json_write_name(w, "traddr");
		spdk_json_write_string(w, nvme_ctrlr->trid.traddr);
	}

	if (nvme_ctrlr->trid.trsvcid) {
		spdk_json_write_name(w, "trsvcid");
		spdk_json_write_string(w, nvme_ctrlr->trid.trsvcid);
	}

	if (nvme_ctrlr->trid.subnqn) {
		spdk_json_write_name(w, "subnqn");
		spdk_json_write_string(w, nvme_ctrlr->trid.subnqn);
	}

	spdk_json_write_object_end(w);

	spdk_json_write_name(w, "ctrlr_data");
	spdk_json_write_object_begin(w);
@@ -502,42 +530,33 @@ probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
	 struct spdk_nvme_ctrlr_opts *opts)
{
	struct nvme_probe_ctx *ctx = cb_ctx;
	int i;
	size_t i;
	bool claim_device = false;
	struct spdk_pci_addr pci_addr;

	if (spdk_pci_addr_parse(&pci_addr, trid->traddr)) {
		return false;
	}

	SPDK_NOTICELOG("Probing device %s\n",
		       trid->traddr);

	if (ctx->controllers_remaining == 0) {
		return false;
	}
	SPDK_NOTICELOG("Probing device %s\n", trid->traddr);

	if (ctx->num_whitelist_controllers == 0) {
		claim_device = true;
	} else {
		for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
			if (spdk_pci_addr_compare(&pci_addr, &ctx->whitelist[i]) == 0) {
	for (i = 0; i < ctx->count; i++) {
		if (spdk_nvme_transport_id_compare(trid, &ctx->trids[i]) == 0) {
			claim_device = true;
			break;
		}
	}
	}

	if (!claim_device) {
		return false;
	}

	/* Claim the device in case conflict with other process */
	if (spdk_pci_device_claim(&pci_addr) != 0) {
	if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
		struct spdk_pci_addr pci_addr;

		if (spdk_pci_addr_parse(&pci_addr, trid->traddr)) {
			return false;
		}

	ctx->controllers_remaining--;
		if (spdk_pci_device_claim(&pci_addr) != 0) {
			return false;
		}
	}

	return true;
}
@@ -571,7 +590,7 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
	dev->adminq_timer_poller = NULL;
	dev->ctrlr = ctrlr;
	dev->ref = 0;
	spdk_pci_addr_parse(&dev->pci_addr, trid->traddr);
	dev->trid = *trid;
	dev->id = nvme_controller_index++;

	nvme_ctrlr_create_bdevs(dev, dev->id);
@@ -590,12 +609,12 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid,
}

static struct nvme_ctrlr *
nvme_ctrlr_get(struct spdk_pci_addr *addr)
nvme_ctrlr_get(const struct spdk_nvme_transport_id *trid)
{
	struct nvme_ctrlr	*nvme_ctrlr;

	TAILQ_FOREACH(nvme_ctrlr, &g_nvme_ctrlrs, tailq) {
		if (spdk_pci_addr_compare(&nvme_ctrlr->pci_addr, addr) == 0) {
		if (spdk_nvme_transport_id_compare(trid, &nvme_ctrlr->trid) == 0) {
			return nvme_ctrlr;
		}
	}
@@ -648,21 +667,18 @@ spdk_bdev_nvme_create(struct spdk_nvme_transport_id *trid,
	struct nvme_bdev	*nvme_bdev;
	size_t			j;

	if (spdk_pci_addr_parse(&probe_ctx.whitelist[0], trid->traddr) < 0) {
		return -1;
	}
	probe_ctx.num_whitelist_controllers = 1;
	probe_ctx.controllers_remaining = 1;

	if (nvme_ctrlr_get(&probe_ctx.whitelist[0]) != NULL) {
	if (nvme_ctrlr_get(trid) != NULL) {
		SPDK_ERRLOG("A controller with the provided trid (traddr: %s) already exists.\n", trid->traddr);
		return -1;
	}

	probe_ctx.count = 1;
	probe_ctx.trids[0] = *trid;
	if (spdk_nvme_probe(trid, &probe_ctx, probe_cb, attach_cb, NULL)) {
		return -1;
	}

	nvme_ctrlr = nvme_ctrlr_get(&probe_ctx.whitelist[0]);
	nvme_ctrlr = nvme_ctrlr_get(trid);
	if (!nvme_ctrlr) {
		return -1;
	}
@@ -693,50 +709,33 @@ bdev_nvme_library_init(void)
{
	struct spdk_conf_section *sp;
	const char *val;
	int i;
	struct nvme_probe_ctx probe_ctx;
	int i, rc;
	struct nvme_probe_ctx probe_ctx = {};

	sp = spdk_conf_find_section(NULL, "Nvme");
	if (sp == NULL) {
		/*
		 * If configuration file did not specify the Nvme section, do
		 *  not take the time to initialize the NVMe devices.
		 */
		return 0;
	}

	spdk_nvme_retry_count = spdk_conf_section_get_intval(sp, "NvmeRetryCount");
	if (spdk_nvme_retry_count < 0)
	if (spdk_nvme_retry_count < 0) {
		spdk_nvme_retry_count = SPDK_NVME_DEFAULT_RETRY_COUNT;
	}

	/*
	 * If NumControllers is not found, this will return -1, which we
	 * will later use to denote that we should initialize all
	 * controllers.
	 */
	num_controllers = spdk_conf_section_get_intval(sp, "NumControllers");

	/* Init the whitelist */
	probe_ctx.num_whitelist_controllers = 0;

	if (num_controllers > 0) {
		for (i = 0; ; i++) {
			val = spdk_conf_section_get_nmval(sp, "BDF", i, 0);
	for (i = 0; i < NVME_MAX_CONTROLLERS; i++) {
		val = spdk_conf_section_get_nmval(sp, "TransportID", i, 0);
		if (val == NULL) {
			break;
		}

			if (spdk_pci_addr_parse(&probe_ctx.whitelist[probe_ctx.num_whitelist_controllers], val) < 0) {
				SPDK_ERRLOG("Invalid format for BDF: %s\n", val);
		rc = spdk_nvme_transport_id_parse(&probe_ctx.trids[i], val);
		if (rc < 0) {
			SPDK_ERRLOG("Unable to parse TransportID: %s\n", val);
			return -1;
		}

			probe_ctx.num_whitelist_controllers++;
		}
		probe_ctx.count++;
	}

	probe_ctx.controllers_remaining = num_controllers;

	val = spdk_conf_section_get_val(sp, "ResetControllerOnTimeout");
	if (val != NULL) {
		if (!strcmp(val, "Yes")) {
Loading