Commit 462fd69e authored by SeungYeon Shin's avatar SeungYeon Shin Committed by Jim Harris
Browse files

lib/event: Add support for core isolation in scheduling



Added new 'scheduler_set_options' RPC.
1. isolated_core_mask: Users are allowed to select cores to isolate during scheduling
2. scheduling_core: Users are allowed to select main core of scheduling
This RPC can only be called before SPDK subsystems have been initialized.
These parameters can only be set once.

The following options are for general mechanisms.
For example, dynamic scheduler now allows specific cores to be isolated from scheduling
through the newly added isolated_core_mask. This means that no threads can enter
or leave the cores designated as isolated cores.

Core isolation can be useful in the following situation:
The user may want the main core to be dedicated to orchestration tasks(such as RPCs, etc.)
and does not want any IO work to be assigned to it while running dynamic scheduler.
These changes allow user to dedicate main thread for RPC, assign nvmf poll groups to
non-main thread cores, and then ensure that dynamic(or any other) scheduler will
not place any other spdk_threads on the main core.

Change-Id: Idb5e564150d014cec7106d48d609d83490561a94
Signed-off-by: default avatarSeungYeon Shin <syeon.shin@samsung.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/23156


Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
parent 11ff66fe
Loading
Loading
Loading
Loading
+44 −1
Original line number Diff line number Diff line
@@ -790,6 +790,8 @@ Name | Description
scheduler_name          | Current scheduler name
scheduler_period        | Currently set scheduler period in microseconds
governor_name           | Governor name
scheduling_core         | Current scheduling core
isolated_core_mask      | Current isolated core mask of scheduler

#### Example

@@ -812,7 +814,9 @@ Example response:
  "result": {
    "scheduler_name": "static",
    "scheduler_period": 2800000000,
    "governor_name": "default"
    "governor_name": "default",
    "scheduling_core": 1,
    "isolated_core_mask": "0x4"
  }
}
~~~
@@ -868,6 +872,45 @@ Example response:
}
~~~

### scheduler_set_options

Set options for scheduler.

This RPC may only be called before SPDK subsystems have been initialized. This RPC can be called only once.

#### Parameters

Name                    | Optional | Type        | Description
----------------------- | -------- | ----------- | -----------
scheduling_core         | Optional | number      | Main core of scheduler. Idle threads move to the scheduling core. Can be set only once
isolated_core_mask      | Optional | string      | Select CPU cores to isolate from scheduling changes. Can be set only once

#### Example

Example request:

~~~json
{
  "jsonrpc": "2.0",
  "id": 1,
  "method": "scheduler_set_options",
  "params": {
    "scheduling_core": 1,
    "isolated_core_mask": "0x4"
  }
}
~~~

Example response:

~~~json
{
  "jsonrpc": "2.0",
  "id": 1,
  "result": true
}
~~~

### framework_enable_cpumask_locks

Enable CPU core lock files to block multiple SPDK applications from running on the same cpumask.
+11 −0
Original line number Diff line number Diff line
@@ -192,6 +192,7 @@ struct spdk_scheduler_core_info {
	uint32_t threads_count;
	bool interrupt_mode;
	struct spdk_scheduler_thread_info *thread_infos;
	bool isolated;
};

/**
@@ -292,6 +293,16 @@ void spdk_scheduler_register(struct spdk_scheduler *scheduler);
 */
uint32_t spdk_scheduler_get_scheduling_lcore(void);

/**
 * Set scheduling reactor.
 *
 * All scheduler operations are performed from the scheduling reactor.
 *
 * \param lcore lcore of scheduling reactor
 */
bool spdk_scheduler_set_scheduling_lcore(uint32_t lcore);


/*
 * Macro used to register new scheduler.
 */
+61 −0
Original line number Diff line number Diff line
@@ -538,6 +538,7 @@ rpc_framework_get_scheduler(struct spdk_jsonrpc_request *request,
	struct spdk_scheduler *scheduler = spdk_scheduler_get();
	uint64_t scheduler_period = spdk_scheduler_get_period();
	struct spdk_governor *governor = spdk_governor_get();
	uint32_t scheduling_core = spdk_scheduler_get_scheduling_lcore();

	if (params) {
		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
@@ -551,6 +552,8 @@ rpc_framework_get_scheduler(struct spdk_jsonrpc_request *request,
		spdk_json_write_named_string(w, "scheduler_name", scheduler->name);
	}
	spdk_json_write_named_uint64(w, "scheduler_period", scheduler_period);
	spdk_json_write_named_string(w, "isolated_core_mask", scheduler_get_isolated_core_mask());
	spdk_json_write_named_uint32(w, "scheduling_core", scheduling_core);
	if (governor != NULL) {
		spdk_json_write_named_string(w, "governor_name", governor->name);
	}
@@ -621,6 +624,64 @@ rpc_framework_get_governor(struct spdk_jsonrpc_request *request,
}
SPDK_RPC_REGISTER("framework_get_governor", rpc_framework_get_governor, SPDK_RPC_RUNTIME)

struct rpc_set_scheduler_opts_ctx {
	char *isolated_core_mask;
	uint32_t scheduling_core;
};

static const struct spdk_json_object_decoder rpc_set_scheduler_opts_decoders[] = {
	{"isolated_core_mask", offsetof(struct rpc_set_scheduler_opts_ctx, isolated_core_mask), spdk_json_decode_string, true},
	{"scheduling_core", offsetof(struct rpc_set_scheduler_opts_ctx, scheduling_core), spdk_json_decode_uint32, true},
};

static void
free_rpc_scheduler_set_options(struct rpc_set_scheduler_opts_ctx *r)
{
	free(r->isolated_core_mask);
}

static void
rpc_scheduler_set_options(struct spdk_jsonrpc_request *request,
			  const struct spdk_json_val *params)
{
	struct rpc_set_scheduler_opts_ctx req = {NULL};
	struct spdk_cpuset core_mask;

	req.scheduling_core = spdk_scheduler_get_scheduling_lcore();

	if (spdk_json_decode_object(params, rpc_set_scheduler_opts_decoders,
				    SPDK_COUNTOF(rpc_set_scheduler_opts_decoders), &req)) {
		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
						 "Invalid parameters");
		goto end;
	}

	if (req.isolated_core_mask != NULL) {
		spdk_cpuset_parse(&core_mask, req.isolated_core_mask);
		if (spdk_cpuset_get_cpu(&core_mask, req.scheduling_core)) {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
							 "Scheduling core cannot be included in isolated core mask.\n");
			goto end;
		}
		if (scheduler_set_isolated_core_mask(core_mask) == false) {
			spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
							 "Invalid isolated core mask\n");
			goto end;
		}
	}

	if (spdk_scheduler_set_scheduling_lcore(req.scheduling_core) == false) {
		spdk_jsonrpc_send_error_response(request, SPDK_JSONRPC_ERROR_INVALID_PARAMS,
						 "Invalid scheduling core.\n");
		goto end;
	}

	spdk_jsonrpc_send_bool_response(request, true);
end:
	free_rpc_scheduler_set_options(&req);
}
SPDK_RPC_REGISTER("scheduler_set_options", rpc_scheduler_set_options, SPDK_RPC_STARTUP)

struct rpc_thread_set_cpumask_ctx {
	struct spdk_jsonrpc_request *request;
	struct spdk_cpuset cpumask;
+12 −1
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#define EVENT_INTERNAL_H

#include "spdk/stdinc.h"
#include "spdk/cpuset.h"

#ifdef __cplusplus
extern "C" {
@@ -27,4 +28,14 @@ int app_get_proc_stat(unsigned int core, uint64_t *usr, uint64_t *sys, uint64_t
}
#endif

#endif
/**
 * Get isolated CPU core mask.
 */
const char *scheduler_get_isolated_core_mask(void);

/**
 * Set isolated CPU core mask.
 */
bool scheduler_set_isolated_core_mask(struct spdk_cpuset isolated_core_mask);

#endif /* EVENT_INTERNAL_H */
+51 −2
Original line number Diff line number Diff line
@@ -6,6 +6,8 @@
#include "spdk/stdinc.h"
#include "spdk/likely.h"

#include "event_internal.h"

#include "spdk_internal/event.h"
#include "spdk_internal/usdt.h"

@@ -46,6 +48,7 @@ bool g_scheduling_in_progress = false;
static uint64_t g_scheduler_period = 0;
static uint32_t g_scheduler_core_number;
static struct spdk_scheduler_core_info *g_core_infos = NULL;
static struct spdk_cpuset g_scheduler_isolated_core_mask;

TAILQ_HEAD(, spdk_governor) g_governor_list
	= TAILQ_HEAD_INITIALIZER(g_governor_list);
@@ -158,6 +161,46 @@ spdk_scheduler_get_scheduling_lcore(void)
	return g_scheduling_reactor->lcore;
}

bool
spdk_scheduler_set_scheduling_lcore(uint32_t core)
{
	struct spdk_reactor *reactor = spdk_reactor_get(core);
	if (reactor == NULL) {
		SPDK_ERRLOG("Failed to set scheduling reactor. Reactor(lcore:%d) does not exist", core);
		return false;
	}

	g_scheduling_reactor = reactor;
	return true;
}

bool
scheduler_set_isolated_core_mask(struct spdk_cpuset isolated_core_mask)
{
	struct spdk_cpuset tmp_mask;

	spdk_cpuset_copy(&tmp_mask, spdk_app_get_core_mask());
	spdk_cpuset_or(&tmp_mask, &isolated_core_mask);
	if (spdk_cpuset_equal(&tmp_mask, spdk_app_get_core_mask()) == false) {
		SPDK_ERRLOG("Isolated core mask is not included in app core mask.\n");
		return false;
	}
	spdk_cpuset_copy(&g_scheduler_isolated_core_mask, &isolated_core_mask);
	return true;
}

const char *
scheduler_get_isolated_core_mask(void)
{
	return spdk_cpuset_fmt(&g_scheduler_isolated_core_mask);
}

static bool
scheduler_is_isolated_core(uint32_t core)
{
	return spdk_cpuset_get_cpu(&g_scheduler_isolated_core_mask, core);
}

static void
reactor_construct(struct spdk_reactor *reactor, uint32_t lcore)
{
@@ -703,6 +746,11 @@ _threads_reschedule(struct spdk_scheduler_core_info *cores_info)
		for (j = 0; j < core->threads_count; j++) {
			thread_info = &core->thread_infos[j];
			if (thread_info->lcore != i) {
				if (core->isolated || cores_info[thread_info->lcore].isolated) {
					SPDK_ERRLOG("A thread cannot be moved from an isolated core or \
								moved to an isolated core. Skip rescheduling thread\n");
					continue;
				}
				_threads_reschedule_thread(thread_info);
			}
		}
@@ -798,6 +846,7 @@ _reactors_scheduler_gather_metrics(void *arg1, void *arg2)
	core_info->total_busy_tsc = reactor->busy_tsc;
	core_info->interrupt_mode = reactor->in_interrupt;
	core_info->threads_count = 0;
	core_info->isolated = scheduler_is_isolated_core(reactor->lcore);

	SPDK_DEBUGLOG(reactor, "Gathering metrics on %u\n", reactor->lcore);

@@ -807,7 +856,7 @@ _reactors_scheduler_gather_metrics(void *arg1, void *arg2)
			SPDK_ERRLOG("Failed to allocate memory when gathering metrics on %u\n", reactor->lcore);

			/* Cancel this round of schedule work */
			_event_call(g_scheduling_reactor->lcore, _reactors_scheduler_cancel, NULL, NULL);
			_event_call(spdk_scheduler_get_scheduling_lcore(), _reactors_scheduler_cancel, NULL, NULL);
			return;
		}

@@ -832,7 +881,7 @@ _reactors_scheduler_gather_metrics(void *arg1, void *arg2)
	}

	/* If we've looped back around to the scheduler thread, move to the next phase */
	if (next_core == g_scheduling_reactor->lcore) {
	if (next_core == spdk_scheduler_get_scheduling_lcore()) {
		/* Phase 2 of scheduling is rebalancing - deciding which threads to move where */
		_event_call(next_core, _reactors_scheduler_balance, NULL, NULL);
		return;
Loading