Commit 723dd06e authored by Richael Zhuang's avatar Richael Zhuang Committed by Tomasz Zawadzki
Browse files

bdevperf: support latency info in bdevperf output



Get latency info from bdev histogram and print latency info in
bdevperf output.
print min/max/average latency info by default;
print summarized histogram info if run bdevperf with '-l';
print detailed histogram info if run bdevperf with '-ll'.

Change-Id: I3261574a12c038acde81a58727e9942c0f8c57b2
Signed-off-by: default avatarRichael Zhuang <richael.zhuang@arm.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14640


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent a1fb441c
Loading
Loading
Loading
Loading
+270 −22
Original line number Diff line number Diff line
@@ -19,6 +19,7 @@
#include "spdk/bit_array.h"
#include "spdk/conf.h"
#include "spdk/zipf.h"
#include "spdk/histogram_data.h"

#define BDEVPERF_CONFIG_MAX_FILENAME 1024
#define BDEVPERF_CONFIG_UNDEFINED -1
@@ -75,6 +76,34 @@ static struct spdk_poller *g_perf_timer = NULL;
static void bdevperf_submit_single(struct bdevperf_job *job, struct bdevperf_task *task);
static void rpc_perform_tests_cb(void);

static uint32_t g_bdev_count = 0;
static uint32_t g_latency_display_level;

static const double g_latency_cutoffs[] = {
	0.01,
	0.10,
	0.25,
	0.50,
	0.75,
	0.90,
	0.95,
	0.98,
	0.99,
	0.995,
	0.999,
	0.9999,
	0.99999,
	0.999999,
	0.9999999,
	-1,
};

struct latency_info {
	uint64_t	min;
	uint64_t	max;
	uint64_t	total;
};

struct bdevperf_job {
	char				*name;
	struct spdk_bdev		*bdev;
@@ -116,6 +145,9 @@ struct bdevperf_job {
	struct spdk_zipf		*zipf;
	TAILQ_HEAD(, bdevperf_task)	task_list;
	uint64_t			run_time_in_usec;

	/* keep channel's histogram data before being destroyed */
	struct spdk_histogram_data	*histogram;
};

struct spdk_bdevperf {
@@ -169,9 +201,13 @@ struct bdevperf_aggregate_stats {
	double				total_mb_per_second;
	double				total_failed_per_second;
	double				total_timeout_per_second;
	double				min_latency;
	double				max_latency;
	uint64_t			total_io_completed;
	uint64_t			total_tsc;
};

static struct bdevperf_aggregate_stats g_stats = {};
static struct bdevperf_aggregate_stats g_stats = {.min_latency = (double)UINT64_MAX};

/*
 * Cumulative Moving Average (CMA): average of all data up to current
@@ -201,11 +237,36 @@ get_ema_io_per_second(struct bdevperf_job *job, uint64_t ema_period)
	return job->ema_io_per_second;
}

static void
get_avg_latency(void *ctx, uint64_t start, uint64_t end, uint64_t count,
		uint64_t total, uint64_t so_far)
{
	struct latency_info *latency_info = ctx;

	if (count == 0) {
		return;
	}

	latency_info->total += (start + end) / 2 * count;

	if (so_far == count) {
		latency_info->min = start;
	}

	if (so_far == total) {
		latency_info->max = end;
	}
}

static void
performance_dump_job(struct bdevperf_aggregate_stats *stats, struct bdevperf_job *job)
{
	double io_per_second, mb_per_second, failed_per_second, timeout_per_second;
	double average_latency = 0.0, min_latency, max_latency;
	uint64_t time_in_usec;
	uint64_t tsc_rate;
	uint64_t total_io;
	struct latency_info latency_info = {};

	printf("\r Job: %s (Core Mask 0x%s)\n", spdk_thread_get_name(job->thread),
	       spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread)));
@@ -235,20 +296,41 @@ performance_dump_job(struct bdevperf_aggregate_stats *stats, struct bdevperf_job
	} else {
		io_per_second = get_ema_io_per_second(job, stats->ema_period);
	}

	tsc_rate = spdk_get_ticks_hz();
	mb_per_second = io_per_second * job->io_size / (1024 * 1024);

	spdk_histogram_data_iterate(job->histogram, get_avg_latency, &latency_info);

	total_io = job->io_completed + job->io_failed;
	if (total_io != 0) {
		average_latency = (double)latency_info.total / total_io * 1000 * 1000 / tsc_rate;
	}
	min_latency = (double)latency_info.min * 1000 * 1000 / tsc_rate;
	max_latency = (double)latency_info.max * 1000 * 1000 / tsc_rate;

	failed_per_second = (double)job->io_failed * 1000000 / time_in_usec;
	timeout_per_second = (double)job->io_timeout * 1000000 / time_in_usec;

	printf("\t %-20s: %10.2f %10.2f %10.2f",
	       job->name, (float)time_in_usec / 1000000, io_per_second, mb_per_second);
	printf(" %10.2f %8.2f\n",
	printf(" %10.2f %8.2f",
	       failed_per_second, timeout_per_second);
	printf(" %10.2f %10.2f %10.2f\n",
	       average_latency, min_latency, max_latency);

	stats->total_io_per_second += io_per_second;
	stats->total_mb_per_second += mb_per_second;
	stats->total_failed_per_second += failed_per_second;
	stats->total_timeout_per_second += timeout_per_second;
	stats->total_io_completed += job->io_completed + job->io_failed;
	stats->total_tsc += latency_info.total;
	if (min_latency < stats->min_latency) {
		stats->min_latency = min_latency;
	}
	if (max_latency > stats->max_latency) {
		stats->max_latency = max_latency;
	}
}

static void
@@ -364,6 +446,7 @@ free_job_config(void)
static void
bdevperf_job_free(struct bdevperf_job *job)
{
	spdk_histogram_data_free(job->histogram);
	spdk_bit_array_free(&job->outstanding);
	spdk_zipf_free(&job->zipf);
	free(job->name);
@@ -376,13 +459,53 @@ job_thread_exit(void *ctx)
	spdk_thread_exit(spdk_get_thread());
}

static void
check_cutoff(void *ctx, uint64_t start, uint64_t end, uint64_t count,
	     uint64_t total, uint64_t so_far)
{
	double so_far_pct;
	double **cutoff = ctx;
	uint64_t tsc_rate;

	if (count == 0) {
		return;
	}

	tsc_rate = spdk_get_ticks_hz();
	so_far_pct = (double)so_far / total;
	while (so_far_pct >= **cutoff && **cutoff > 0) {
		printf("%9.5f%% : %9.3fus\n", **cutoff * 100, (double)end * 1000 * 1000 / tsc_rate);
		(*cutoff)++;
	}
}

static void
print_bucket(void *ctx, uint64_t start, uint64_t end, uint64_t count,
	     uint64_t total, uint64_t so_far)
{
	double so_far_pct;
	uint64_t tsc_rate;

	if (count == 0) {
		return;
	}

	tsc_rate = spdk_get_ticks_hz();
	so_far_pct = (double)so_far * 100 / total;
	printf("%9.3f - %9.3f: %9.4f%%  (%9ju)\n",
	       (double)start * 1000 * 1000 / tsc_rate,
	       (double)end * 1000 * 1000 / tsc_rate,
	       so_far_pct, count);
}

static void
bdevperf_test_done(void *ctx)
{
	struct bdevperf_job *job, *jtmp;
	struct bdevperf_task *task, *ttmp;
	int rc;
	double average_latency = 0.0;
	uint64_t time_in_usec;
	int rc;

	if (g_time_in_usec) {
		g_stats.io_time_in_usec = g_time_in_usec;
@@ -405,13 +528,63 @@ bdevperf_test_done(void *ctx)
		       (double)g_time_in_usec / 1000000);
	}

	printf("\n\r %-*s: %10s %10s %10s %10s %8s\n",
	       28, "Device Information", "runtime(s)", "IOPS", "MiB/s", "Fail/s", "TO/s");
	printf("\n%*s\n", 107, "Latency(us)");
	printf("\r %-*s: %10s %10s %10s %10s %8s %10s %10s %10s\n",
	       28, "Device Information", "runtime(s)", "IOPS", "MiB/s", "Fail/s", "TO/s", "Average", "min", "max");

	TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) {
		TAILQ_REMOVE(&g_bdevperf.jobs, job, link);

		performance_dump_job(&g_stats, job);
	}

	printf("\r =================================================================================="
	       "=================================\n");
	printf("\r %-28s: %10s %10.2f %10.2f",
	       "Total", "", g_stats.total_io_per_second, g_stats.total_mb_per_second);
	printf(" %10.2f %8.2f",
	       g_stats.total_failed_per_second, g_stats.total_timeout_per_second);

	if (g_stats.total_io_completed != 0) {
		average_latency = ((double)g_stats.total_tsc / g_stats.total_io_completed) * 1000 * 1000 /
				  spdk_get_ticks_hz();
	}
	printf(" %10.2f %10.2f %10.2f\n", average_latency, g_stats.min_latency, g_stats.max_latency);

	fflush(stdout);

	if (g_latency_display_level == 0 || g_stats.total_io_completed == 0) {
		goto clean;
	}

	printf("\n Latency summary\n");
	TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) {
		printf("\r =============================================\n");
		printf("\r Job: %s (Core Mask 0x%s)\n", spdk_thread_get_name(job->thread),
		       spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread)));

		const double *cutoff = g_latency_cutoffs;

		spdk_histogram_data_iterate(job->histogram, check_cutoff, &cutoff);

		printf("\n");
	}

	if (g_latency_display_level == 1) {
		goto clean;
	}

	printf("\r Latency histogram\n");
	TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) {
		printf("\r =============================================\n");
		printf("\r Job: %s (Core Mask 0x%s)\n", spdk_thread_get_name(job->thread),
		       spdk_cpuset_fmt(spdk_thread_get_cpumask(job->thread)));

		spdk_histogram_data_iterate(job->histogram, print_bucket, NULL);
		printf("\n");
	}

clean:
	TAILQ_FOREACH_SAFE(job, &g_bdevperf.jobs, link, jtmp) {
		TAILQ_REMOVE(&g_bdevperf.jobs, job, link);

		spdk_thread_send_msg(job->thread, job_thread_exit, NULL);

@@ -425,13 +598,6 @@ bdevperf_test_done(void *ctx)
		bdevperf_job_free(job);
	}

	printf("\r ==================================================================================\n");
	printf("\r %-28s: %10s %10.2f %10.2f",
	       "Total", "", g_stats.total_io_per_second, g_stats.total_mb_per_second);
	printf(" %10.2f %8.2f\n",
	       g_stats.total_failed_per_second, g_stats.total_timeout_per_second);
	fflush(stdout);

	rc = g_run_rc;
	if (g_request && !g_shutdown) {
		rpc_perform_tests_cb();
@@ -453,6 +619,13 @@ bdevperf_job_end(void *ctx)
	}
}

static void
bdevperf_channel_get_histogram_cb(void *cb_arg, int status, struct spdk_histogram_data *histogram)
{
	struct spdk_histogram_data *job_hist = cb_arg;
	spdk_histogram_data_merge(job_hist, histogram);
}

static void
bdevperf_end_task(struct bdevperf_task *task)
{
@@ -464,6 +637,11 @@ bdevperf_end_task(struct bdevperf_task *task)
		if (job->current_queue_depth == 0) {
			end_tsc = spdk_get_ticks() - g_start_tsc;
			job->run_time_in_usec = end_tsc * 1000000 / spdk_get_ticks_hz();

			/* keep histogram info before channel is destroyed */
			spdk_bdev_channel_get_histogram(job->bdev, job->ch, bdevperf_channel_get_histogram_cb,
							job->histogram);

			spdk_put_io_channel(job->ch);
			spdk_bdev_close(job->bdev_desc);
			spdk_thread_send_msg(g_main_thread, bdevperf_job_end, NULL);
@@ -1061,12 +1239,20 @@ static void
_performance_dump_done(void *ctx)
{
	struct bdevperf_aggregate_stats *stats = ctx;
	double average_latency;

	printf("\r ==================================================================================\n");
	printf("\r =================================================================================="
	       "=================================\n");
	printf("\r %-28s: %10s %10.2f %10.2f",
	       "Total", "", stats->total_io_per_second, stats->total_mb_per_second);
	printf(" %10.2f %8.2f\n",
	printf(" %10.2f %8.2f",
	       stats->total_failed_per_second, stats->total_timeout_per_second);

	average_latency = ((double)stats->total_tsc / stats->total_io_completed) * 1000 * 1000 /
			  spdk_get_ticks_hz();
	printf(" %10.2f %10.2f %10.2f\n", average_latency, stats->min_latency, stats->max_latency);
	printf("\n");

	fflush(stdout);

	g_performance_dump_active = false;
@@ -1107,6 +1293,8 @@ performance_statistics_thread(void *arg)
		return -1;
	}

	stats->min_latency = (double)UINT64_MAX;

	g_show_performance_period_num++;

	stats->io_time_in_usec = g_show_performance_period_num * g_show_performance_period_in_usec;
@@ -1137,8 +1325,9 @@ bdevperf_test(void)
	/* Start a timer to dump performance numbers */
	g_start_tsc = spdk_get_ticks();
	if (g_show_performance_real_time && !g_perf_timer) {
		printf("\r %-*s: %10s %10s %10s %10s %8s\n",
		       28, "Device Information", "runtime(s)", "IOPS", "MiB/s", "Fail/s", "TO/s");
		printf("%*s\n", 107, "Latency(us)");
		printf("\r %-*s: %10s %10s %10s %10s %8s %10s %10s %10s\n",
		       28, "Device Information", "runtime(s)", "IOPS", "MiB/s", "Fail/s", "TO/s", "Average", "min", "max");

		g_perf_timer = SPDK_POLLER_REGISTER(performance_statistics_thread, NULL,
						    g_show_performance_period_in_usec);
@@ -1161,21 +1350,70 @@ bdevperf_bdev_removed(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, vo
	}
}

static void
bdevperf_histogram_status_cb(void *cb_arg, int status)
{
	if (status != 0) {
		g_run_rc = status;
		if (g_continue_on_failure == false) {
			g_error_to_exit = true;
		}
	}

	if (--g_bdev_count == 0) {
		if (g_run_rc == 0) {
			/* Ready to run the test */
			bdevperf_test();
		} else {
			bdevperf_test_done(NULL);
		}
	}
}

static uint32_t g_construct_job_count = 0;

static void
_bdevperf_enable_histogram(bool enable)
{
	struct spdk_bdev *bdev;
	/* increment initial g_bdev_count so that it will never reach 0 in the middle of iteration */
	g_bdev_count = 1;

	if (g_job_bdev_name != NULL) {
		bdev = spdk_bdev_get_by_name(g_job_bdev_name);
		if (bdev) {
			g_bdev_count++;

			spdk_bdev_histogram_enable(bdev, bdevperf_histogram_status_cb, NULL, enable);
		} else {
			fprintf(stderr, "Unable to find bdev '%s'\n", g_job_bdev_name);
		}
	} else {
		bdev = spdk_bdev_first_leaf();

		while (bdev != NULL) {
			g_bdev_count++;

			spdk_bdev_histogram_enable(bdev, bdevperf_histogram_status_cb, NULL, enable);
			bdev = spdk_bdev_next_leaf(bdev);
		}
	}

	bdevperf_histogram_status_cb(NULL, 0);
}

static void
_bdevperf_construct_job_done(void *ctx)
{
	if (--g_construct_job_count == 0) {

		if (g_run_rc != 0) {
			/* Something failed. */
			bdevperf_test_done(NULL);
			return;
		}

		/* Ready to run the test */
		bdevperf_test();
		/* always enable histogram. */
		_bdevperf_enable_histogram(true);
	} else if (g_run_rc != 0) {
		/* Reset error as some jobs constructed right */
		g_run_rc = 0;
@@ -1397,6 +1635,13 @@ bdevperf_construct_job(struct spdk_bdev *bdev, struct job_config *config,
		}
	}

	job->histogram = spdk_histogram_data_alloc();
	if (job->histogram == NULL) {
		fprintf(stderr, "Failed to allocate histogram\n");
		bdevperf_job_free(job);
		return -ENOMEM;
	}

	TAILQ_INIT(&job->task_list);

	task_num = job->queue_depth;
@@ -2041,6 +2286,8 @@ bdevperf_parse_arg(int ch, char *arg)
			fprintf(stderr, "Illegal zipf theta value %s\n", optarg);
			return -EINVAL;
		}
	} else if (ch == 'l') {
		g_latency_display_level++;
	} else {
		tmp = spdk_strtoll(optarg, 10);
		if (tmp < 0) {
@@ -2104,6 +2351,7 @@ bdevperf_usage(void)
	printf(" -X                        abort timed out I/O\n");
	printf(" -C                        enable every core to send I/Os to each bdev\n");
	printf(" -j <filename>             use job config file\n");
	printf(" -l                        display latency histogram, default: disable. -l display summary, -ll display details\n");
}

static int
@@ -2216,7 +2464,7 @@ main(int argc, char **argv)
	opts.rpc_addr = NULL;
	opts.shutdown_cb = spdk_bdevperf_shutdown_cb;

	if ((rc = spdk_app_parse_args(argc, argv, &opts, "Zzfq:o:t:w:k:CF:M:P:S:T:Xj:", NULL,
	if ((rc = spdk_app_parse_args(argc, argv, &opts, "Zzfq:o:t:w:k:CF:M:P:S:T:Xlj:", NULL,
				      bdevperf_parse_arg, bdevperf_usage)) !=
	    SPDK_APP_PARSE_ARGS_SUCCESS) {
		return rc;