Commit 7b397f62 authored by Ben Walker's avatar Ben Walker Committed by Jim Harris
Browse files

nvmf: No longer tie subsystems to CPU cores



The "Core" parameter in the configuration file has been removed. New
connections are handed out to available cores using round-robin.

Change-Id: I24527fa22a0b2738ebbf5fb030e3bb373ead5da2
Signed-off-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.gerrithub.io/388295


Tested-by: default avatarSPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: default avatarDaniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 80016bd9
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -39,6 +39,11 @@ vfio-pci driver should trigger one "remove" uevent at the very beginning of the
of pci driver, this will make sure the SPDK could detect the event and release the vfio-attached
device Fd and related resource when the device removed.

### NVMe-oF Target

Subsystems are no longer tied explicitly to CPU cores. Instead, connections are handed out to the available
cores round-robin. The "Core" option in the configuration file has been removed.

### Blobstore

A number of functions have been renamed:
+19 −110
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@
#define ACCEPT_TIMEOUT_US		10000 /* 10ms */

struct spdk_nvmf_probe_ctx {
	struct nvmf_tgt_subsystem	*app_subsystem;
	struct spdk_nvmf_subsystem	*subsystem;
	bool				any;
	bool				found;
	struct spdk_nvme_transport_id	trid;
@@ -61,71 +61,19 @@ struct spdk_nvmf_probe_ctx {
#define MAX_STRING_LEN 255

struct spdk_nvmf_tgt_conf g_spdk_nvmf_tgt_conf;
static int32_t g_last_core = -1;

static int
spdk_get_numa_node_value(const char *path)
{
	FILE *fd;
	int numa_node = -1;
	char buf[MAX_STRING_LEN];

	fd = fopen(path, "r");
	if (!fd) {
		return -1;
	}

	if (fgets(buf, sizeof(buf), fd) != NULL) {
		numa_node = strtoul(buf, NULL, 10);
	}
	fclose(fd);

	return numa_node;
}

static int
spdk_get_ifaddr_numa_node(const char *if_addr)
{
	int ret;
	struct ifaddrs *ifaddrs, *ifa;
	struct sockaddr_in addr, addr_in;
	char path[MAX_STRING_LEN];
	int numa_node = -1;

	addr_in.sin_addr.s_addr = inet_addr(if_addr);

	ret = getifaddrs(&ifaddrs);
	if (ret < 0) {
		return -1;
	}

	for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) {
		addr = *(struct sockaddr_in *)ifa->ifa_addr;
		if ((uint32_t)addr_in.sin_addr.s_addr != (uint32_t)addr.sin_addr.s_addr) {
			continue;
		}
		snprintf(path, MAX_STRING_LEN, "/sys/class/net/%s/device/numa_node", ifa->ifa_name);
		numa_node = spdk_get_numa_node_value(path);
		break;
	}
	freeifaddrs(ifaddrs);

	return numa_node;
}

static int
spdk_add_nvmf_discovery_subsystem(void)
{
	struct nvmf_tgt_subsystem *app_subsys;
	struct spdk_nvmf_subsystem *subsystem;

	app_subsys = nvmf_tgt_create_subsystem(SPDK_NVMF_DISCOVERY_NQN, SPDK_NVMF_SUBTYPE_DISCOVERY, 0,
					       spdk_env_get_current_core());
	if (app_subsys == NULL) {
	subsystem = nvmf_tgt_create_subsystem(SPDK_NVMF_DISCOVERY_NQN, SPDK_NVMF_SUBTYPE_DISCOVERY, 0);
	if (subsystem == NULL) {
		SPDK_ERRLOG("Failed creating discovery nvmf library subsystem\n");
		return -1;
	}

	spdk_nvmf_subsystem_set_allow_any_host(app_subsys->subsystem, true);
	spdk_nvmf_subsystem_set_allow_any_host(subsystem, true);

	return 0;
}
@@ -196,27 +144,6 @@ spdk_nvmf_parse_nvmf_tgt(void)
	return 0;
}

static int
spdk_nvmf_allocate_lcore(uint64_t mask, uint32_t lcore)
{
	uint32_t end;

	if (lcore == 0) {
		end = 0;
	} else {
		end = lcore - 1;
	}

	do {
		if (((mask >> lcore) & 1U) == 1U) {
			break;
		}
		lcore = (lcore + 1) % 64;
	} while (lcore != end);

	return lcore;
}

static int
spdk_nvmf_parse_subsystem(struct spdk_conf_section *sp)
{
@@ -253,6 +180,15 @@ spdk_nvmf_parse_subsystem(struct spdk_conf_section *sp)
		}
	}

	/* Core is no longer a valid parameter, but print out a nice
	 * message if it exists to inform users.
	 */
	if (lcore >= 0) {
		SPDK_NOTICELOG("Core present in the [Subsystem] section of the config file.\n"
			       "Core was removed as an option. Subsystems can now run on all available cores.\n");
		SPDK_NOTICELOG("Please remove Core from your configuration file. Ignoring it and continuing.\n");
	}

	/* Parse Listen sections */
	num_listen_addrs = 0;
	for (i = 0; i < MAX_LISTEN_ADDRESSES; i++) {
@@ -322,7 +258,7 @@ spdk_nvmf_parse_subsystem(struct spdk_conf_section *sp)
		num_ns++;
	}

	ret = spdk_nvmf_construct_subsystem(nqn, lcore,
	ret = spdk_nvmf_construct_subsystem(nqn,
					    num_listen_addrs, listen_addrs,
					    num_hosts, hosts, allow_any_host,
					    sn,
@@ -375,16 +311,14 @@ spdk_nvmf_parse_conf(void)
}

int
spdk_nvmf_construct_subsystem(const char *name, int32_t lcore,
spdk_nvmf_construct_subsystem(const char *name,
			      int num_listen_addresses, struct rpc_listen_address *addresses,
			      int num_hosts, char *hosts[], bool allow_any_host,
			      const char *sn, size_t num_ns, struct spdk_nvmf_ns_params *ns_list)
{
	struct spdk_nvmf_subsystem *subsystem;
	struct nvmf_tgt_subsystem *app_subsys;
	int i, rc;
	size_t j;
	uint64_t mask;
	struct spdk_bdev *bdev;

	if (name == NULL) {
@@ -402,40 +336,16 @@ spdk_nvmf_construct_subsystem(const char *name, int32_t lcore,
		return -1;
	}

	if (lcore < 0) {
		lcore = ++g_last_core;
	}

	/* Determine which core to assign to the subsystem */
	mask = spdk_app_get_core_mask();
	lcore = spdk_nvmf_allocate_lcore(mask, lcore);
	g_last_core = lcore;

	app_subsys = nvmf_tgt_create_subsystem(name, SPDK_NVMF_SUBTYPE_NVME, num_ns, lcore);
	if (app_subsys == NULL) {
	subsystem = nvmf_tgt_create_subsystem(name, SPDK_NVMF_SUBTYPE_NVME, num_ns);
	if (subsystem == NULL) {
		SPDK_ERRLOG("Subsystem creation failed\n");
		return -1;
	}
	subsystem = app_subsys->subsystem;

	/* Parse Listen sections */
	for (i = 0; i < num_listen_addresses; i++) {
		int nic_numa_node = spdk_get_ifaddr_numa_node(addresses[i].traddr);
		unsigned subsys_numa_node = spdk_env_get_socket_id(app_subsys->lcore);
		struct spdk_nvme_transport_id trid = {};

		if (nic_numa_node >= 0) {
			if (subsys_numa_node != (unsigned)nic_numa_node) {
				SPDK_WARNLOG("Subsystem %s is configured to run on a CPU core %d belonging "
					     "to a different NUMA node than the associated NIC. "
					     "This may result in reduced performance.\n",
					     name, lcore);
				SPDK_WARNLOG("The NIC is on socket %d\n", nic_numa_node);
				SPDK_WARNLOG("The Subsystem is on socket %u\n",
					     subsys_numa_node);
			}
		}

		if (spdk_nvme_transport_id_parse_trtype(&trid.trtype, addresses[i].transport)) {
			SPDK_ERRLOG("Missing listen address transport type\n");
			goto error;
@@ -502,7 +412,6 @@ spdk_nvmf_construct_subsystem(const char *name, int32_t lcore,
	return 0;

error:
	spdk_nvmf_delete_subsystem(app_subsys->subsystem);
	app_subsys->subsystem = NULL;
	spdk_nvmf_delete_subsystem(subsystem);
	return -1;
}
+16 −11
Original line number Diff line number Diff line
@@ -44,17 +44,13 @@
#include "nvmf_tgt.h"

static void
dump_nvmf_subsystem(struct spdk_json_write_ctx *w, struct nvmf_tgt_subsystem *tgt_subsystem)
dump_nvmf_subsystem(struct spdk_json_write_ctx *w, struct spdk_nvmf_subsystem *subsystem)
{
	struct spdk_nvmf_host		*host;
	struct spdk_nvmf_subsystem	*subsystem = tgt_subsystem->subsystem;
	struct spdk_nvmf_listener 	*listener;

	spdk_json_write_object_begin(w);

	spdk_json_write_name(w, "core");
	spdk_json_write_int32(w, tgt_subsystem->lcore);

	spdk_json_write_name(w, "nqn");
	spdk_json_write_string(w, spdk_nvmf_subsystem_get_nqn(subsystem));
	spdk_json_write_name(w, "subtype");
@@ -143,7 +139,7 @@ spdk_rpc_get_nvmf_subsystems(struct spdk_jsonrpc_request *request,
			     const struct spdk_json_val *params)
{
	struct spdk_json_write_ctx *w;
	struct nvmf_tgt_subsystem	*tgt_subsystem;
	struct spdk_nvmf_subsystem *subsystem;

	if (params != NULL) {
		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
@@ -157,10 +153,10 @@ spdk_rpc_get_nvmf_subsystems(struct spdk_jsonrpc_request *request,
	}

	spdk_json_write_array_begin(w);
	tgt_subsystem = nvmf_tgt_subsystem_first();
	while (tgt_subsystem) {
		dump_nvmf_subsystem(w, tgt_subsystem);
		tgt_subsystem = nvmf_tgt_subsystem_next(tgt_subsystem);
	subsystem = spdk_nvmf_subsystem_get_first(g_tgt.tgt);
	while (subsystem) {
		dump_nvmf_subsystem(w, subsystem);
		subsystem = spdk_nvmf_subsystem_get_next(subsystem);
	}
	spdk_json_write_array_end(w);
	spdk_jsonrpc_end_result(request, w);
@@ -379,7 +375,16 @@ spdk_rpc_construct_nvmf_subsystem(struct spdk_jsonrpc_request *request,
		}
	}

	ret = spdk_nvmf_construct_subsystem(req.nqn, req.core,
	/* Core is no longer a valid parameter, but print out a nice
	 * message if it exists to inform users.
	 */
	if (req.core != -1) {
		SPDK_NOTICELOG("Core present in the construct NVMe-oF subsystem RPC.\n"
			       "Core was removed as an option. Subsystems can now run on all available cores.\n");
		SPDK_NOTICELOG("Ignoring it and continuing.\n");
	}

	ret = spdk_nvmf_construct_subsystem(req.nqn,
					    req.listen_addresses.num_listen_address,
					    req.listen_addresses.addresses,
					    req.hosts.num_hosts, req.hosts.hosts, req.allow_any_host,
+13 −60
Original line number Diff line number Diff line
@@ -54,20 +54,8 @@ static size_t g_active_poll_groups = 0;

static struct spdk_poller *g_acceptor_poller = NULL;

static TAILQ_HEAD(, nvmf_tgt_subsystem) g_subsystems = TAILQ_HEAD_INITIALIZER(g_subsystems);

static void nvmf_tgt_advance_state(void *arg1, void *arg2);

static void
nvmf_tgt_delete_subsystem(struct nvmf_tgt_subsystem *app_subsys)
{
	TAILQ_REMOVE(&g_subsystems, app_subsys, tailq);

	spdk_nvmf_delete_subsystem(app_subsys->subsystem);

	free(app_subsys);
}

static void
spdk_nvmf_shutdown_cb(void)
{
@@ -79,67 +67,41 @@ spdk_nvmf_shutdown_cb(void)
	nvmf_tgt_advance_state(NULL, NULL);
}

struct nvmf_tgt_subsystem *
nvmf_tgt_create_subsystem(const char *name, enum spdk_nvmf_subtype subtype, uint32_t num_ns,
			  uint32_t lcore)
struct spdk_nvmf_subsystem *
nvmf_tgt_create_subsystem(const char *name, enum spdk_nvmf_subtype subtype, uint32_t num_ns)
{
	struct spdk_nvmf_subsystem *subsystem;
	struct nvmf_tgt_subsystem *app_subsys;

	if (spdk_nvmf_tgt_find_subsystem(g_tgt.tgt, name)) {
		SPDK_ERRLOG("Subsystem already exist\n");
		return NULL;
	}

	app_subsys = calloc(1, sizeof(*app_subsys));
	if (app_subsys == NULL) {
		SPDK_ERRLOG("Subsystem allocation failed\n");
		return NULL;
	}

	subsystem = spdk_nvmf_create_subsystem(g_tgt.tgt, name, subtype, num_ns);
	if (subsystem == NULL) {
		SPDK_ERRLOG("Subsystem creation failed\n");
		free(app_subsys);
		return NULL;
	}

	app_subsys->subsystem = subsystem;
	app_subsys->lcore = lcore;

	SPDK_NOTICELOG("allocated subsystem %s on lcore %u on socket %u\n", name, lcore,
		       spdk_env_get_socket_id(lcore));
	SPDK_NOTICELOG("allocated subsystem %s\n", name);

	TAILQ_INSERT_TAIL(&g_subsystems, app_subsys, tailq);

	return app_subsys;
	return subsystem;
}

struct nvmf_tgt_subsystem *
nvmf_tgt_subsystem_first(void)
int
nvmf_tgt_shutdown_subsystem_by_nqn(const char *nqn)
{
	return TAILQ_FIRST(&g_subsystems);
}
	struct spdk_nvmf_subsystem *subsystem;

struct nvmf_tgt_subsystem *
nvmf_tgt_subsystem_next(struct nvmf_tgt_subsystem *subsystem)
{
	return TAILQ_NEXT(subsystem, tailq);
	subsystem = spdk_nvmf_tgt_find_subsystem(g_tgt.tgt, nqn);
	if (!subsystem) {
		return -EINVAL;
	}

int
nvmf_tgt_shutdown_subsystem_by_nqn(const char *nqn)
{
	struct nvmf_tgt_subsystem *tgt_subsystem, *subsys_tmp;
	spdk_nvmf_delete_subsystem(subsystem);

	TAILQ_FOREACH_SAFE(tgt_subsystem, &g_subsystems, tailq, subsys_tmp) {
		if (strcmp(spdk_nvmf_subsystem_get_nqn(tgt_subsystem->subsystem), nqn) == 0) {
			nvmf_tgt_delete_subsystem(tgt_subsystem);
	return 0;
}
	}
	return -1;
}

static void
nvmf_tgt_poll_group_add(void *arg1, void *arg2)
@@ -181,7 +143,7 @@ acceptor_poll(void *arg)
static void
nvmf_tgt_destroy_poll_group_done(void *ctx)
{
	g_tgt.state = NVMF_TGT_FINI_SHUTDOWN_SUBSYSTEMS;
	g_tgt.state = NVMF_TGT_FINI_FREE_RESOURCES;
	nvmf_tgt_advance_state(NULL, NULL);
}

@@ -292,15 +254,6 @@ nvmf_tgt_advance_state(void *arg1, void *arg2)
					     NULL,
					     nvmf_tgt_destroy_poll_group_done);
			break;
		case NVMF_TGT_FINI_SHUTDOWN_SUBSYSTEMS: {
			struct nvmf_tgt_subsystem *app_subsys, *tmp;

			TAILQ_FOREACH_SAFE(app_subsys, &g_subsystems, tailq, tmp) {
				nvmf_tgt_delete_subsystem(app_subsys);
			}
			g_tgt.state = NVMF_TGT_FINI_FREE_RESOURCES;
			break;
		}
		case NVMF_TGT_FINI_FREE_RESOURCES:
			spdk_nvmf_tgt_destroy(g_tgt.tgt);
			g_tgt.state = NVMF_TGT_STOPPED;
+2 −19
Original line number Diff line number Diff line
@@ -51,14 +51,6 @@ struct spdk_nvmf_tgt_conf {
	uint32_t acceptor_poll_rate;
};

struct nvmf_tgt_subsystem {
	struct spdk_nvmf_subsystem *subsystem;

	TAILQ_ENTRY(nvmf_tgt_subsystem) tailq;

	uint32_t lcore;
};

enum nvmf_tgt_state {
	NVMF_TGT_INIT_NONE = 0,
	NVMF_TGT_INIT_PARSE_CONFIG,
@@ -67,7 +59,6 @@ enum nvmf_tgt_state {
	NVMF_TGT_RUNNING,
	NVMF_TGT_FINI_STOP_ACCEPTOR,
	NVMF_TGT_FINI_DESTROY_POLL_GROUPS,
	NVMF_TGT_FINI_SHUTDOWN_SUBSYSTEMS,
	NVMF_TGT_FINI_FREE_RESOURCES,
	NVMF_TGT_STOPPED,
	NVMF_TGT_ERROR,
@@ -85,17 +76,10 @@ extern struct spdk_nvmf_tgt_conf g_spdk_nvmf_tgt_conf;

extern struct nvmf_tgt g_tgt;

struct nvmf_tgt_subsystem *
nvmf_tgt_subsystem_first(void);

struct nvmf_tgt_subsystem *
nvmf_tgt_subsystem_next(struct nvmf_tgt_subsystem *subsystem);

int spdk_nvmf_parse_conf(void);

struct nvmf_tgt_subsystem *nvmf_tgt_create_subsystem(const char *name,
		enum spdk_nvmf_subtype subtype, uint32_t num_ns,
		uint32_t lcore);
struct spdk_nvmf_subsystem *nvmf_tgt_create_subsystem(const char *name,
		enum spdk_nvmf_subtype subtype, uint32_t num_ns);

struct spdk_nvmf_ns_params {
	char *bdev_name;
@@ -104,7 +88,6 @@ struct spdk_nvmf_ns_params {

int
spdk_nvmf_construct_subsystem(const char *name,
			      int32_t lcore,
			      int num_listen_addresses, struct rpc_listen_address *addresses,
			      int num_hosts, char *hosts[], bool allow_any_host,
			      const char *sn, size_t num_ns, struct spdk_nvmf_ns_params *ns_list);
Loading