nvmf: Add IP based load balancing (1f3f1605) · Commits · Public Repositories / spdk

lib/event/subsystems/nvmf/conf.c

+1 −0

Original line number	Diff line number	Diff line
		@@ -146,6 +146,7 @@ spdk_nvmf_parse_tgt_conf(void)
		}

		conf->acceptor_poll_rate = ACCEPT_TIMEOUT_US;
		conf->conn_sched = DEFAULT_CONN_SCHED;

		sp = spdk_conf_find_section(NULL, "Nvmf");
		if (sp != NULL) {

lib/event/subsystems/nvmf/event_nvmf.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -43,9 +43,16 @@
		#include "spdk_internal/log.h"

		#define ACCEPT_TIMEOUT_US 10000 /* 10ms */
		#define DEFAULT_CONN_SCHED CONNECT_SCHED_ROUND_ROBIN

		enum spdk_nvmf_connect_sched {
		CONNECT_SCHED_ROUND_ROBIN = 0,
		CONNECT_SCHED_HOST_IP,
		};

		struct spdk_nvmf_tgt_conf {
		uint32_t acceptor_poll_rate;
		enum spdk_nvmf_connect_sched conn_sched;
		};

		extern struct spdk_nvmf_tgt_opts *g_spdk_nvmf_tgt_opts;

lib/event/subsystems/nvmf/nvmf_rpc.c

+18 −0

Original line number	Diff line number	Diff line
		@@ -1618,8 +1618,25 @@ nvmf_rpc_subsystem_set_tgt_opts(struct spdk_jsonrpc_request *request,
		}
		SPDK_RPC_REGISTER("set_nvmf_target_options", nvmf_rpc_subsystem_set_tgt_opts, SPDK_RPC_STARTUP)

		static int decode_conn_sched(const struct spdk_json_val val, void out)
		{
		enum spdk_nvmf_connect_sched *sched = out;

		if (spdk_json_strequal(val, "roundrobin") == true) {
		*sched = CONNECT_SCHED_ROUND_ROBIN;
		} else if (spdk_json_strequal(val, "hostip") == true) {
		*sched = CONNECT_SCHED_HOST_IP;
		} else {
		SPDK_ERRLOG("Invalid connection scheduling parameter\n");
		return -EINVAL;
		}

		return 0;
		}

		static const struct spdk_json_object_decoder nvmf_rpc_subsystem_tgt_conf_decoder[] = {
		{"acceptor_poll_rate", offsetof(struct spdk_nvmf_tgt_conf, acceptor_poll_rate), spdk_json_decode_uint32, true},
		{"conn_sched", offsetof(struct spdk_nvmf_tgt_conf, conn_sched), decode_conn_sched, true},
		};

		static void
		@@ -1645,6 +1662,7 @@ nvmf_rpc_subsystem_set_tgt_conf(struct spdk_jsonrpc_request *request,
		}

		conf->acceptor_poll_rate = ACCEPT_TIMEOUT_US;
		conf->conn_sched = DEFAULT_CONN_SCHED;

		if (params != NULL) {
		if (spdk_json_decode_object(params, nvmf_rpc_subsystem_tgt_conf_decoder,

lib/event/subsystems/nvmf/nvmf_tgt.c

+96 −6

Original line number	Diff line number	Diff line
		@@ -59,11 +59,23 @@ struct nvmf_tgt_poll_group {
		struct spdk_nvmf_poll_group *group;
		};

		struct nvmf_tgt_host_trid {
		struct spdk_nvme_transport_id host_trid;
		uint32_t core;
		uint32_t ref;
		TAILQ_ENTRY(nvmf_tgt_host_trid) link;
		};

		/* List of host trids that are connected to the target */
		static TAILQ_HEAD(, nvmf_tgt_host_trid) g_nvmf_tgt_host_trids =
		TAILQ_HEAD_INITIALIZER(g_nvmf_tgt_host_trids);

		struct spdk_nvmf_tgt *g_spdk_nvmf_tgt = NULL;

		static enum nvmf_tgt_state g_tgt_state;

		static uint32_t g_tgt_core; /* Round-robin tracking of cores for qpair assignment */
		/* Round-Robin/IP-based tracking of cores for qpair assignment */
		static uint32_t g_tgt_core;

		static struct nvmf_tgt_poll_group *g_poll_groups = NULL;
		static size_t g_num_poll_groups = 0;
		@@ -111,6 +123,68 @@ nvmf_tgt_poll_group_add(void arg1, void arg2)
		spdk_nvmf_poll_group_add(pg->group, qpair);
		}

		/* Round robin selection of cores */
		static uint32_t
		spdk_nvmf_get_core_rr(void)
		{
		uint32_t core;

		core = g_tgt_core;
		g_tgt_core = spdk_env_get_next_core(core);
		if (g_tgt_core == UINT32_MAX) {
		g_tgt_core = spdk_env_get_first_core();
		}

		return core;
		}

		static uint32_t
		nvmf_tgt_get_qpair_core(struct spdk_nvmf_qpair *qpair)
		{
		struct spdk_nvme_transport_id trid;
		struct nvmf_tgt_host_trid tmp_trid = NULL, new_trid = NULL;
		int ret;
		uint32_t core = 0;

		switch (g_spdk_nvmf_tgt_conf->conn_sched) {
		case CONNECT_SCHED_HOST_IP:
		ret = spdk_nvmf_qpair_get_peer_trid(qpair, &trid);
		if (ret) {
		SPDK_ERRLOG("Invalid host transport Id. Assigning to core %d\n", core);
		break;
		}

		TAILQ_FOREACH(tmp_trid, &g_nvmf_tgt_host_trids, link) {
		if (tmp_trid && !strncmp(tmp_trid->host_trid.traddr,
		trid.traddr, SPDK_NVMF_TRADDR_MAX_LEN + 1)) {
		tmp_trid->ref++;
		core = tmp_trid->core;
		break;
		}
		}
		if (!tmp_trid) {
		new_trid = calloc(1, sizeof(*new_trid));
		if (!new_trid) {
		SPDK_ERRLOG("Insufficient memory. Assigning to core %d\n", core);
		break;
		}
		/* Get the next available core for the new host */
		core = spdk_nvmf_get_core_rr();
		new_trid->core = core;
		memcpy(new_trid->host_trid.traddr, trid.traddr,
		SPDK_NVMF_TRADDR_MAX_LEN + 1);
		TAILQ_INSERT_TAIL(&g_nvmf_tgt_host_trids, new_trid, link);
		}
		break;
		case CONNECT_SCHED_ROUND_ROBIN:
		default:
		core = spdk_nvmf_get_core_rr();
		break;
		}

		return core;
		}

		static void
		new_qpair(struct spdk_nvmf_qpair *qpair)
		{
		@@ -123,11 +197,7 @@ new_qpair(struct spdk_nvmf_qpair *qpair)
		return;
		}

		core = g_tgt_core;
		g_tgt_core = spdk_env_get_next_core(core);
		if (g_tgt_core == UINT32_MAX) {
		g_tgt_core = spdk_env_get_first_core();
		}
		core = nvmf_tgt_get_qpair_core(qpair);

		pg = &g_poll_groups[core];
		assert(pg != NULL);
		@@ -222,7 +292,15 @@ nvmf_tgt_subsystem_stopped(struct spdk_nvmf_subsystem *subsystem,
		static void
		nvmf_tgt_destroy_done(void *ctx, int status)
		{
		struct nvmf_tgt_host_trid trid, tmp_trid;

		g_tgt_state = NVMF_TGT_STOPPED;

		TAILQ_FOREACH_SAFE(trid, &g_nvmf_tgt_host_trids, link, tmp_trid) {
		TAILQ_REMOVE(&g_nvmf_tgt_host_trids, trid, link);
		free(trid);
		}

		free(g_spdk_nvmf_tgt_conf);
		nvmf_tgt_advance_state();
		}
		@@ -334,6 +412,16 @@ spdk_nvmf_subsystem_init(void)
		nvmf_tgt_advance_state();
		}

		static char *
		get_conn_sched_string(enum spdk_nvmf_connect_sched sched)
		{
		if (sched == CONNECT_SCHED_HOST_IP) {
		return "hostip";
		} else {
		return "roundrobin";
		}
		}

		static void
		spdk_nvmf_subsystem_write_config_json(struct spdk_json_write_ctx w, struct spdk_event done_ev)
		{
		@@ -344,6 +432,8 @@ spdk_nvmf_subsystem_write_config_json(struct spdk_json_write_ctx *w, struct spdk

		spdk_json_write_named_object_begin(w, "params");
		spdk_json_write_named_uint32(w, "acceptor_poll_rate", g_spdk_nvmf_tgt_conf->acceptor_poll_rate);
		spdk_json_write_named_string(w, "conn_sched",
		get_conn_sched_string(g_spdk_nvmf_tgt_conf->conn_sched));
		spdk_json_write_object_end(w);
		spdk_json_write_object_end(w);

scripts/rpc.py

+6 −1

Original line number	Diff line number	Diff line
		@@ -1238,10 +1238,15 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse
		@call_cmd
		def set_nvmf_target_config(args):
		rpc.nvmf.set_nvmf_target_config(args.client,
		acceptor_poll_rate=args.acceptor_poll_rate)
		acceptor_poll_rate=args.acceptor_poll_rate,
		conn_sched=args.conn_sched)

		p = subparsers.add_parser('set_nvmf_target_config', help='Set NVMf target config')
		p.add_argument('-r', '--acceptor-poll-rate', help='Polling interval of the acceptor for incoming connections (usec)', type=int)
		p.add_argument('-s', '--conn-sched', help="""'roundrobin' - Schedule the incoming connections from any host
		on the cores in a round robin manner (Default). 'hostip' - Schedule all the incoming connections from a
		specific host IP on to the same core. Connections from different IP will be assigned to cores in a round
		robin manner""")
		p.set_defaults(func=set_nvmf_target_config)

		@call_cmd