Commit 201aa634 authored by Richael Zhuang's avatar Richael Zhuang Committed by Tomasz Zawadzki
Browse files

sock: introduce SO_INCOMING_CPU to get placement_id



Leverage SO_INCOMING_CPU to get the CPU affinity of connections
(sockets). And allocate the connections to specific poll groups,
which aims to utilize cache locality.

From our test:
6 P4600 NVMe on target,target uses 8 cores, NIC irqs are bound to
these 8 cores, and initiator side uses 24 and 32 cores,
we can get 11%~17% randwrite performance boost for posix, and 8%~12%
for uring.

Change-Id: I011e0a21502c85adcccd4a14fbe9838b43f54976
Signed-off-by: default avatarRichael Zhuang <richael.zhuang@arm.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5748


Community-CI: Broadcom CI
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: default avatarZiye Yang <ziye.yang@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
parent 9713bfe9
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -41,6 +41,13 @@ independent SPDK processes are running on one node. The filter function can
then be implemented in these processes to decide which SSDs to probe based on
the new SSD's PCI address.

### sock

The type of enable_placement_id in struct spdk_sock_impl_opts is changed from
bool to int. We can use RPC to configure different value of enable_placement_id.
Then we can leverage SO_INCOMING_CPU to get placement_id, which aims to utilize
CPU cache locality, enabled by setting enable_placement_id=2.

## v21.01:

### idxd
+3 −3
Original line number Diff line number Diff line
@@ -7731,7 +7731,7 @@ Example response:
  "result": {
    "recv_buf_size": 2097152,
    "send_buf_size": 2097152,
    "enable_recv_pipe": true
    "enable_recv_pipe": true,
    "enable_zerocopy_send": true
  }
}
@@ -7751,7 +7751,7 @@ send_buf_size | Optional | number | Size of socket send buffer in
enable_recv_pipe        | Optional | boolean     | Enable or disable receive pipe
enable_zerocopy_send    | Optional | boolean     | Enable or disable zero copy on send
enable_quick_ack        | Optional | boolean     | Enable or disable quick ACK
enable_placement_id     | Optional | boolean     | Enable or disable placement_id
enable_placement_id     | Optional | number      | Enable or disable placement_id. 0:disable,1:incoming_napi,2:incoming_cpu

### Response

@@ -7773,7 +7773,7 @@ Example request:
    "enable_recv_pipe": false,
    "enable_zerocopy_send": true,
    "enable_quick_ack": false,
    "enable_placement_id": false
    "enable_placement_id": 0
  }
}
~~~
+1 −1
Original line number Diff line number Diff line
@@ -116,7 +116,7 @@ struct spdk_sock_impl_opts {
	/**
	 * Enable or disable placement_id. Used by posix and uring socket modules.
	 */
	bool enable_placement_id;
	uint32_t enable_placement_id;

};

+28 −9
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#include "spdk/sock.h"
#include "spdk_internal/sock.h"
#include "spdk/log.h"
#include "spdk/env.h"

#define SPDK_SOCK_DEFAULT_PRIORITY 0
#define SPDK_SOCK_DEFAULT_ZCOPY true
@@ -59,7 +60,7 @@ static pthread_mutex_t g_map_table_mutex = PTHREAD_MUTEX_INITIALIZER;
 * If the group is already in the map, take a reference.
 */
static int
sock_map_insert(int placement_id, struct spdk_sock_group *group)
sock_map_insert(int placement_id, struct spdk_sock_group *group, bool init)
{
	struct spdk_sock_placement_id_entry *entry;

@@ -84,7 +85,9 @@ sock_map_insert(int placement_id, struct spdk_sock_group *group)

	entry->placement_id = placement_id;
	entry->group = group;
	if (!init) {
		entry->ref++;
	}

	STAILQ_INSERT_TAIL(&g_placement_id_map, entry, link);
	pthread_mutex_unlock(&g_map_table_mutex);
@@ -154,11 +157,11 @@ static int
sock_get_placement_id(struct spdk_sock *sock)
{
	int rc;
	int placement_id;
	int placement_id = -1;

	if (!sock->placement_id) {
	if (sock->placement_id == -1) {
		rc = sock->net_impl->get_placement_id(sock, &placement_id);
		if (!rc && (placement_id != 0)) {
		if (!rc && (placement_id != -1)) {
			sock->placement_id = placement_id;
		}
	}
@@ -169,10 +172,10 @@ sock_get_placement_id(struct spdk_sock *sock)
int
spdk_sock_get_optimal_sock_group(struct spdk_sock *sock, struct spdk_sock_group **group)
{
	int placement_id;
	int placement_id = -1;

	placement_id = sock_get_placement_id(sock);
	if (placement_id != 0) {
	if (placement_id != -1) {
		sock_map_lookup(placement_id, group);
		return 0;
	} else {
@@ -336,6 +339,7 @@ spdk_sock_accept(struct spdk_sock *sock)
		new_sock->opts = sock->opts;
		memcpy(&new_sock->opts, &sock->opts, sizeof(new_sock->opts));
		new_sock->net_impl = sock->net_impl;
		new_sock->placement_id = -1;
		TAILQ_INIT(&new_sock->queued_reqs);
		TAILQ_INIT(&new_sock->pending_reqs);
	}
@@ -480,6 +484,9 @@ spdk_sock_group_create(void *ctx)
	struct spdk_net_impl *impl = NULL;
	struct spdk_sock_group *group;
	struct spdk_sock_group_impl *group_impl;
	struct spdk_sock_impl_opts sock_opts = {};
	size_t sock_len;
	bool enable_incoming_cpu = 0;

	group = calloc(1, sizeof(*group));
	if (group == NULL) {
@@ -494,10 +501,22 @@ spdk_sock_group_create(void *ctx)
			STAILQ_INSERT_TAIL(&group->group_impls, group_impl, link);
			TAILQ_INIT(&group_impl->socks);
			group_impl->net_impl = impl;

			sock_len = sizeof(sock_opts);
			spdk_sock_impl_get_opts(impl->name, &sock_opts, &sock_len);
			if (sock_opts.enable_placement_id == 2) {
				enable_incoming_cpu = 1;
			}
		}
	}

	group->ctx = ctx;

	/* if any net_impl is configured to use SO_INCOMING_CPU, initialize the sock map */
	if (enable_incoming_cpu) {
		sock_map_insert(spdk_env_get_current_core(), group, 1);
	}

	return group;
}

@@ -534,7 +553,7 @@ spdk_sock_group_add_sock(struct spdk_sock_group *group, struct spdk_sock *sock,

	placement_id = sock_get_placement_id(sock);
	if (placement_id != 0) {
		rc = sock_map_insert(placement_id, group);
		rc = sock_map_insert(placement_id, group, 0);
		if (rc < 0) {
			return -1;
		}
@@ -794,7 +813,7 @@ spdk_sock_write_config_json(struct spdk_json_write_ctx *w)
			spdk_json_write_named_bool(w, "enable_recv_pipe", opts.enable_recv_pipe);
			spdk_json_write_named_bool(w, "enable_zerocopy_send", opts.enable_zerocopy_send);
			spdk_json_write_named_bool(w, "enable_quickack", opts.enable_quickack);
			spdk_json_write_named_bool(w, "enable_placement_id", opts.enable_placement_id);
			spdk_json_write_named_uint32(w, "enable_placement_id", opts.enable_placement_id);
			spdk_json_write_object_end(w);
			spdk_json_write_object_end(w);
		} else {
+2 −2
Original line number Diff line number Diff line
@@ -76,7 +76,7 @@ rpc_sock_impl_get_options(struct spdk_jsonrpc_request *request,
	spdk_json_write_named_bool(w, "enable_recv_pipe", sock_opts.enable_recv_pipe);
	spdk_json_write_named_bool(w, "enable_zerocopy_send", sock_opts.enable_zerocopy_send);
	spdk_json_write_named_bool(w, "enable_quickack", sock_opts.enable_quickack);
	spdk_json_write_named_bool(w, "enable_placement_id", sock_opts.enable_placement_id);
	spdk_json_write_named_uint32(w, "enable_placement_id", sock_opts.enable_placement_id);
	spdk_json_write_object_end(w);
	spdk_jsonrpc_end_result(request, w);
	free(impl_name);
@@ -116,7 +116,7 @@ static const struct spdk_json_object_decoder rpc_sock_impl_set_opts_decoders[] =
	},
	{
		"enable_placement_id", offsetof(struct spdk_rpc_sock_impl_set_opts, sock_opts.enable_placement_id),
		spdk_json_decode_bool, true
		spdk_json_decode_uint32, true
	},

};
Loading