Commit 41ff6dce authored by Jim Harris's avatar Jim Harris Committed by Ben Walker
Browse files

env: add enforce_numa environment option



We have started to add some NUMA based memory allocations (specifically
NVMe SSD CQ memory). We will likely have more in the future.

Some configurations may not have memory allocated across all NUMA
nodes, for example, setup.sh used to require special flags
to make sure memory was alllocated across NUMA nodes. setup.sh
has since been changed to allocate across NUMA nodes by default,
but other users may have their own scripts that have not yet
been adjusted.

So as a transition point, env layer currently tries to allocate from
DPDK based on user-specified numa_id, and if that fails, will revert
to SOCKET_ID_ANY instead. But specifying this new enforce_numa
flag will not do the fallback and will just return NULL to the user.

Signed-off-by: default avatarJim Harris <jim.harris@samsung.com>
Change-Id: Ieb346ac194fdd46c97e6fa77c117f637feb23b02
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/23966


Reviewed-by: default avatarBen Walker <ben@nvidia.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
parent 9ae05589
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -78,9 +78,12 @@ struct spdk_env_opts {

	size_t			opts_size;

	bool			enforce_numa;
	uint8_t			reserved2[7];

	/* All new fields must be added at the end of this structure. */
};
SPDK_STATIC_ASSERT(sizeof(struct spdk_env_opts) == 120, "Incorrect size");
SPDK_STATIC_ASSERT(sizeof(struct spdk_env_opts) == 128, "Incorrect size");

/**
 * Allocate dma/sharable memory based on a given dma_flg. It is a memory buffer
+12 −5
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@
#include <rte_eal.h>

static __thread bool g_is_thread_unaffinitized;
static bool g_enforce_numa;

SPDK_STATIC_ASSERT(SOCKET_ID_ANY == SPDK_ENV_NUMA_ID_ANY, "SOCKET_ID_ANY mismatch");

@@ -35,7 +36,7 @@ spdk_malloc(size_t size, size_t align, uint64_t *unused, int numa_id, uint32_t f

	align = spdk_max(align, RTE_CACHE_LINE_SIZE);
	buf = rte_malloc_socket(NULL, size, align, numa_id);
	if (buf == NULL && numa_id != SOCKET_ID_ANY) {
	if (buf == NULL && !g_enforce_numa && numa_id != SOCKET_ID_ANY) {
		buf = rte_malloc_socket(NULL, size, align, SOCKET_ID_ANY);
	}
	return buf;
@@ -52,7 +53,7 @@ spdk_zmalloc(size_t size, size_t align, uint64_t *unused, int numa_id, uint32_t

	align = spdk_max(align, RTE_CACHE_LINE_SIZE);
	buf = rte_zmalloc_socket(NULL, size, align, numa_id);
	if (buf == NULL && numa_id != SOCKET_ID_ANY) {
	if (buf == NULL && !g_enforce_numa && numa_id != SOCKET_ID_ANY) {
		buf = rte_zmalloc_socket(NULL, size, align, SOCKET_ID_ANY);
	}
	return buf;
@@ -127,7 +128,7 @@ spdk_memzone_reserve_aligned(const char *name, size_t len, int numa_id,
	}

	mz = rte_memzone_reserve_aligned(name, len, numa_id, dpdk_flags, align);
	if (mz == NULL && numa_id != SOCKET_ID_ANY) {
	if (mz == NULL && !g_enforce_numa && numa_id != SOCKET_ID_ANY) {
		mz = rte_memzone_reserve_aligned(name, len, SOCKET_ID_ANY, dpdk_flags, align);
	}

@@ -201,7 +202,7 @@ spdk_mempool_create_ctor(const char *name, size_t count,
	mp = rte_mempool_create(name, count, ele_size, cache_size,
				0, NULL, NULL, (rte_mempool_obj_cb_t *)obj_init, obj_init_arg,
				numa_id, 0);
	if (mp == NULL && numa_id != SOCKET_ID_ANY) {
	if (mp == NULL && !g_enforce_numa && numa_id != SOCKET_ID_ANY) {
		mp = rte_mempool_create(name, count, ele_size, cache_size,
					0, NULL, NULL, (rte_mempool_obj_cb_t *)obj_init, obj_init_arg,
					SOCKET_ID_ANY, 0);
@@ -413,7 +414,7 @@ spdk_ring_create(enum spdk_ring_type type, size_t count, int numa_id)
		 __atomic_fetch_add(&ring_num, 1, __ATOMIC_RELAXED), getpid());

	ring = rte_ring_create(ring_name, count, numa_id, flags);
	if (ring == NULL && numa_id != SOCKET_ID_ANY) {
	if (ring == NULL && !g_enforce_numa && numa_id != SOCKET_ID_ANY) {
		ring = rte_ring_create(ring_name, count, SOCKET_ID_ANY, flags);
	}
	return (struct spdk_ring *)ring;
@@ -466,3 +467,9 @@ spdk_get_tid(void)
{
	return rte_sys_gettid();
}

void
mem_enforce_numa(void)
{
	g_enforce_numa = true;
}
+5 −0
Original line number Diff line number Diff line
@@ -57,4 +57,9 @@ void vtophys_pci_device_removed(struct rte_pci_device *pci_device);
 */
void mem_disable_huge_pages(void);

/**
 * Enforce socket ID allocations.
 */
void mem_enforce_numa(void);

#endif
+8 −0
Original line number Diff line number Diff line
@@ -112,6 +112,8 @@ spdk_env_opts_init(struct spdk_env_opts *opts)
		opts->field = value; \
	}

	SET_FIELD(enforce_numa, false);

#undef SET_FIELD
}

@@ -305,6 +307,10 @@ build_eal_cmdline(const struct spdk_env_opts *opts)
		mem_disable_huge_pages();
	}

	if (opts->enforce_numa) {
		mem_enforce_numa();
	}

	/* set the main core */
	if (opts->main_core > 0) {
		args = push_arg(args, &argcount, _sprintf_alloc("%s=%d",
@@ -610,6 +616,8 @@ env_copy_opts(struct spdk_env_opts *opts, const struct spdk_env_opts *opts_user,
		opts->field = opts_user->field; \
	}

	SET_FIELD(enforce_numa);

#undef SET_FIELD
}