Commit c2a4702d authored by Jaylyn Ren's avatar Jaylyn Ren Committed by Tomasz Zawadzki
Browse files

perf: Add option to display perf cores usage



Run perf tool with parameter "-m" to display real-time overall cpu usage on used cores.

Signed-off-by: default avatarJaylyn Ren <jaylyn.ren@arm.com>
Change-Id: I1f60315ac1e1ed1dfd238eb0a42a4b93c773e21e
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6444


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Community-CI: Mellanox Build Bot
parent 83adcc14
Loading
Loading
Loading
Loading
+57 −11
Original line number Diff line number Diff line
@@ -136,6 +136,11 @@ struct ns_worker_stats {
	uint64_t		total_tsc;
	uint64_t		min_tsc;
	uint64_t		max_tsc;
	uint64_t		last_tsc;
	uint64_t		busy_tsc;
	uint64_t		idle_tsc;
	uint64_t		last_busy_tsc;
	uint64_t		last_idle_tsc;
};

struct ns_worker_ctx {
@@ -203,7 +208,7 @@ struct ns_fn_table {
	int	(*submit_io)(struct perf_task *task, struct ns_worker_ctx *ns_ctx,
			     struct ns_entry *entry, uint64_t offset_in_ios);

	void	(*check_io)(struct ns_worker_ctx *ns_ctx);
	int64_t	(*check_io)(struct ns_worker_ctx *ns_ctx);

	void	(*verify_io)(struct perf_task *task, struct ns_entry *entry);

@@ -231,6 +236,8 @@ static pthread_barrier_t g_worker_sync_barrier;

static uint64_t g_tsc_rate;

static bool g_monitor_perf_cores = false;

static uint32_t g_io_align = 0x200;
static bool g_io_align_specified;
static uint32_t g_io_size_bytes;
@@ -447,10 +454,10 @@ uring_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx,
	return 0;
}

static void
static int64_t
uring_check_io(struct ns_worker_ctx *ns_ctx)
{
	int i, count, to_complete, to_submit, ret = 0;
	int i, to_complete, to_submit, count = 0, ret = 0;
	struct perf_task *task;

	to_submit = ns_ctx->u.uring.io_pending;
@@ -460,7 +467,7 @@ uring_check_io(struct ns_worker_ctx *ns_ctx)
		 * It will automatically call spdk_io_uring_enter appropriately. */
		ret = io_uring_submit(&ns_ctx->u.uring.ring);
		if (ret < 0) {
			return;
			return -1;
		}
		ns_ctx->u.uring.io_pending = 0;
		ns_ctx->u.uring.io_inflight += to_submit;
@@ -481,6 +488,7 @@ uring_check_io(struct ns_worker_ctx *ns_ctx)
			task_complete(task);
		}
	}
	return count;
}

static void
@@ -580,7 +588,7 @@ aio_submit_io(struct perf_task *task, struct ns_worker_ctx *ns_ctx,
	}
}

static void
static int64_t
aio_check_io(struct ns_worker_ctx *ns_ctx)
{
	int count, i;
@@ -598,6 +606,7 @@ aio_check_io(struct ns_worker_ctx *ns_ctx)
	for (i = 0; i < count; i++) {
		task_complete(ns_ctx->u.aio.events[i].data);
	}
	return count;
}

static void
@@ -884,7 +893,7 @@ perf_disconnect_cb(struct spdk_nvme_qpair *qpair, void *ctx)

}

static void
static int64_t
nvme_check_io(struct ns_worker_ctx *ns_ctx)
{
	int64_t rc;
@@ -894,6 +903,7 @@ nvme_check_io(struct ns_worker_ctx *ns_ctx)
		fprintf(stderr, "NVMe io qpair process completion error\n");
		exit(1);
	}
	return rc;
}

static void
@@ -1404,6 +1414,11 @@ print_periodic_performance(bool warmup)
	double mb_this_second;
	struct worker_thread *worker;
	struct ns_worker_ctx *ns_ctx;
	uint64_t busy_tsc;
	uint64_t idle_tsc;
	uint64_t core_busy_tsc = 0;
	uint64_t core_idle_tsc = 0;
	double core_busy_perc = 0;

	if (!isatty(STDOUT_FILENO)) {
		/* Don't print periodic stats if output is not going
@@ -1411,17 +1426,34 @@ print_periodic_performance(bool warmup)
		 */
		return;
	}

	io_this_second = 0;
	TAILQ_FOREACH(worker, &g_workers, link) {
		busy_tsc = 0;
		idle_tsc = 0;
		TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) {
			io_this_second += ns_ctx->stats.io_completed - ns_ctx->stats.last_io_completed;
			ns_ctx->stats.last_io_completed = ns_ctx->stats.io_completed;

			if (g_monitor_perf_cores) {
				busy_tsc += ns_ctx->stats.busy_tsc - ns_ctx->stats.last_busy_tsc;
				idle_tsc += ns_ctx->stats.idle_tsc - ns_ctx->stats.last_idle_tsc;
				ns_ctx->stats.last_busy_tsc = ns_ctx->stats.busy_tsc;
				ns_ctx->stats.last_idle_tsc = ns_ctx->stats.idle_tsc;
			}
		}
		if (g_monitor_perf_cores) {
			core_busy_tsc += busy_tsc;
			core_idle_tsc += idle_tsc;
			core_busy_perc += (double)core_busy_tsc / (core_idle_tsc + core_busy_tsc) * 100;
		}
	}

	mb_this_second = (double)io_this_second * g_io_size_bytes / (1024 * 1024);
	printf("%s%9ju IOPS, %8.2f MiB/s\r", warmup ? "[warmup] " : "", io_this_second, mb_this_second);

	printf("%s%9ju IOPS, %8.2f MiB/s", warmup ? "[warmup] " : "", io_this_second, mb_this_second);
	if (g_monitor_perf_cores) {
		printf("%3d Core(s): %6.2f%% Busy", g_num_workers, core_busy_perc);
	}
	printf("\r");
	fflush(stdout);
}

@@ -1434,6 +1466,8 @@ work_fn(void *arg)
	uint32_t unfinished_ns_ctx;
	bool warmup = false;
	int rc;
	int64_t check_rc;
	uint64_t check_now;

	/* Allocate queue pairs for each namespace. */
	TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) {
@@ -1473,7 +1507,15 @@ work_fn(void *arg)
		 * to replace each I/O that is completed.
		 */
		TAILQ_FOREACH(ns_ctx, &worker->ns_ctx, link) {
			ns_ctx->entry->fn_table->check_io(ns_ctx);
			check_now = spdk_get_ticks();
			check_rc = ns_ctx->entry->fn_table->check_io(ns_ctx);

			if (check_rc > 0) {
				ns_ctx->stats.busy_tsc += check_now - ns_ctx->stats.last_tsc;
			} else {
				ns_ctx->stats.idle_tsc += check_now - ns_ctx->stats.last_tsc;
			}
			ns_ctx->stats.last_tsc = check_now;
		}

		tsc_current = spdk_get_ticks();
@@ -1590,6 +1632,7 @@ static void usage(char *program_name)
	printf("\t[-A IO buffer alignment. Must be power of 2 and not less than cache line (%u)]\n",
	       SPDK_CACHE_LINE_SIZE);
	printf("\t[-S set the default sock impl, e.g. \"posix\"]\n");
	printf("\t[-m display real-time overall cpu usage on used cores]\n");
#ifdef SPDK_CONFIG_URING
	printf("\t[-R enable using liburing to drive kernel devices (Default: libaio)]\n");
#endif
@@ -2022,7 +2065,7 @@ parse_args(int argc, char **argv)
	int rc;

	while ((op = getopt(argc, argv,
			    "a:b:c:e:gi:lo:q:r:k:s:t:w:z:A:C:DGHILM:NO:P:Q:RS:T:U:VZ:")) != -1) {
			    "a:b:c:e:gi:lmo:q:r:k:s:t:w:z:A:C:DGHILM:NO:P:Q:RS:T:U:VZ:")) != -1) {
		switch (op) {
		case 'a':
		case 'A':
@@ -2117,6 +2160,9 @@ parse_args(int argc, char **argv)
		case 'l':
			g_latency_ssd_tracking_enable = true;
			break;
		case 'm':
			g_monitor_perf_cores = true;
			break;
		case 'r':
			if (add_trid(optarg)) {
				usage(argv[0]);