histogram: move nvme/perf histogram code to a common header (5044e4f6) · Commits · Public Repositories / spdk

examples/nvme/perf/perf.c

+42 −127

Original line number	Diff line number	Diff line
		@@ -43,6 +43,7 @@
		#include "spdk/queue.h"
		#include "spdk/string.h"
		#include "spdk/nvme_intel.h"
		#include "spdk/histogram_data.h"

		#if HAVE_LIBAIO
		#include <libaio.h>
		@@ -81,41 +82,6 @@ struct ns_entry {
		char name[1024];
		};

		/*
		* Latency tracking is done with ranges of bucket arrays. The bucket
		* for any given I/O is determined solely by the TSC delta - any
		* translation to microseconds is only done after the test is finished
		* and statistics are printed.
		*
		* Each range has a number of buckets determined by NUM_BUCKETS_PER_RANGE
		* which is 128. The buckets in ranges 0 and 1 each map to one specific
		* TSC delta. The buckets in subsequent ranges each map to twice as many
		* TSC deltas as buckets in the range before it:
		*
		* Range 0: 1 TSC each - 128 buckets cover 0 to 127 (2^7-1)
		* Range 1: 1 TSC each - 128 buckets cover 128 to 255 (2^8-1)
		* Range 2: 2 TSC each - 128 buckets cover 256 to 511 (2^9-1)
		* Range 3: 4 TSC each - 128 buckets cover 512 to 1023 (2^10-1)
		* Range 4: 8 TSC each - 128 buckets cover 1024 to 2047 (2^11-1)
		* Range 5: 16 TSC each - 128 buckets cover 2048 to 4095 (2^12-1)
		* ...
		* Range 55: 2^54 TSC each - 128 buckets cover 2^61 to 2^62-1
		* Range 56: 2^55 TSC each - 128 buckets cover 2^62 to 2^63-1
		* Range 57: 2^56 TSC each - 128 buckets cover 2^63 to 2^64-1
		*
		* On a 2.3GHz processor, this strategy results in 50ns buckets in the
		* 7-14us range (sweet spot for Intel Optane SSD latency testing).
		*
		* Buckets can be made more granular by increasing BUCKET_SHIFT. This
		* comes at the cost of additional storage per namespace context to
		* store the bucket data.
		*/
		#define BUCKET_SHIFT 7
		#define BUCKET_LSB (64 - BUCKET_SHIFT)
		#define NUM_BUCKETS_PER_RANGE (1ULL << BUCKET_SHIFT)
		#define BUCKET_MASK (NUM_BUCKETS_PER_RANGE - 1)
		#define NUM_BUCKET_RANGES (BUCKET_LSB + 1)

		static const double g_latency_cutoffs[] = {
		0.01,
		0.10,
		@@ -157,7 +123,7 @@ struct ns_worker_ctx {

		struct ns_worker_ctx *next;

		uint64_t bucket[NUM_BUCKET_RANGES][NUM_BUCKETS_PER_RANGE];
		struct spdk_histogram_data histogram;
		};

		struct perf_task {
		@@ -215,63 +181,6 @@ static int g_aio_optind; /* Index of first AIO filename in argv */
		static void
		task_complete(struct perf_task *task);

		static uint32_t
		get_bucket_range(uint64_t tsc)
		{
		uint32_t clz, range;

		assert(tsc != 0);

		clz = __builtin_clzll(tsc);

		if (clz <= BUCKET_LSB) {
		range = BUCKET_LSB - clz;
		} else {
		range = 0;
		}

		return range;
		}

		static uint32_t
		get_bucket_index(uint64_t tsc, uint32_t range)
		{
		uint32_t shift;

		if (range == 0) {
		shift = 0;
		} else {
		shift = range - 1;
		}

		return (tsc >> shift) & BUCKET_MASK;
		}

		static double
		get_us_from_bucket(uint32_t range, uint32_t index)
		{
		uint64_t tsc;

		index += 1;
		if (range > 0) {
		tsc = 1ULL << (range + BUCKET_SHIFT - 1);
		tsc += (uint64_t)index << (range - 1);
		} else {
		tsc = index;
		}

		return (double)tsc * 1000 * 1000 / g_tsc_rate;
		}

		static void
		track_latency(struct ns_worker_ctx *ns_ctx, uint64_t tsc)
		{
		uint32_t range = get_bucket_range(tsc);
		uint32_t index = get_bucket_index(tsc, range);

		ns_ctx->bucket[range][index]++;
		}

		static void
		register_ns(struct spdk_nvme_ctrlr ctrlr, struct spdk_nvme_ns ns)
		{
		@@ -607,7 +516,7 @@ task_complete(struct perf_task *task)
		ns_ctx->max_tsc = tsc_diff;
		}
		if (g_latency_sw_tracking_level > 0) {
		track_latency(ns_ctx, tsc_diff);
		spdk_histogram_data_tally(&ns_ctx->histogram, tsc_diff);
		}
		rte_mempool_put(task_pool, task);

		@@ -792,6 +701,41 @@ static void usage(char *program_name)
		printf("\t[-i shared memory group ID]\n");
		}

		static void
		check_cutoff(void *ctx, uint64_t start, uint64_t end, uint64_t count,
		uint64_t total, uint64_t so_far)
		{
		double so_far_pct;
		double **cutoff = ctx;

		if (count == 0) {
		return;
		}

		so_far_pct = (double)so_far / total;
		while (so_far_pct >= cutoff && cutoff > 0) {
		printf("%8.4f%% : %9.3fus\n", *cutoff 100, (double)end * 1000 * 1000 / g_tsc_rate);
		(*cutoff)++;
		}
		}

		static void
		print_bucket(void *ctx, uint64_t start, uint64_t end, uint64_t count,
		uint64_t total, uint64_t so_far)
		{
		double so_far_pct;

		if (count == 0) {
		return;
		}

		so_far_pct = (double)so_far * 100 / total;
		printf("%9.3f - %9.3f: %9.4f%% (%9ju)\n",
		(double)start * 1000 * 1000 / g_tsc_rate,
		(double)end * 1000 * 1000 / g_tsc_rate,
		so_far_pct, count);
		}

		static void
		print_performance(void)
		{
		@@ -858,27 +802,13 @@ print_performance(void)
		while (worker) {
		ns_ctx = worker->ns_ctx;
		while (ns_ctx) {
		uint64_t i, j, so_far = 0;
		double so_far_pct = 0, bucket = 0;
		const double *cutoff = g_latency_cutoffs;

		printf("Summary latency data for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore);
		printf("=================================================================================\n");

		for (i = 0; i < NUM_BUCKET_RANGES; i++) {
		for (j = 0; j < NUM_BUCKETS_PER_RANGE; j++) {
		so_far += ns_ctx->bucket[i][j];
		so_far_pct = (double)so_far / total_io_completed;
		bucket = get_us_from_bucket(i, j);
		if (ns_ctx->bucket[i][j] == 0) {
		continue;
		}
		while (so_far_pct >= cutoff && cutoff > 0) {
		printf("%8.4f%% : %9.3fus\n", cutoff 100, bucket);
		cutoff++;
		}
		}
		}
		spdk_histogram_data_iterate(&ns_ctx->histogram, check_cutoff, &cutoff);

		printf("\n");
		ns_ctx = ns_ctx->next;
		}
		@@ -893,27 +823,11 @@ print_performance(void)
		while (worker) {
		ns_ctx = worker->ns_ctx;
		while (ns_ctx) {
		uint64_t i, j, so_far = 0;
		float so_far_pct = 0;
		double last_bucket, bucket = 0;

		printf("Latency histogram for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore);
		printf("==============================================================================\n");
		printf(" Range in us Cumulative IO count\n");

		for (i = 0; i < NUM_BUCKET_RANGES; i++) {
		for (j = 0; j < NUM_BUCKETS_PER_RANGE; j++) {
		so_far += ns_ctx->bucket[i][j];
		so_far_pct = (float)so_far * 100 / total_io_completed;
		last_bucket = bucket;
		bucket = get_us_from_bucket(i, j);
		if (ns_ctx->bucket[i][j] == 0) {
		continue;
		}
		printf("%9.3f - %9.3f: %9.4f%% (%9ju)\n",
		last_bucket, bucket, so_far_pct, ns_ctx->bucket[i][j]);
		}
		}
		spdk_histogram_data_iterate(&ns_ctx->histogram, print_bucket, NULL);
		printf("\n");
		ns_ctx = ns_ctx->next;
		}
		@@ -1376,6 +1290,7 @@ associate_workers_with_ns(void)
		ns_ctx->min_tsc = UINT64_MAX;
		ns_ctx->entry = entry;
		ns_ctx->next = worker->ns_ctx;
		spdk_histogram_data_reset(&ns_ctx->histogram);
		worker->ns_ctx = ns_ctx;

		worker = worker->next;

include/spdk/histogram_data.h

0 → 100644

+189 −0

Original line number	Diff line number	Diff line
		/*-
		* BSD LICENSE
		*
		* Copyright (c) Intel Corporation.
		* All rights reserved.
		*
		* Redistribution and use in source and binary forms, with or without
		* modification, are permitted provided that the following conditions
		* are met:
		*
		* * Redistributions of source code must retain the above copyright
		* notice, this list of conditions and the following disclaimer.
		* * Redistributions in binary form must reproduce the above copyright
		* notice, this list of conditions and the following disclaimer in
		* the documentation and/or other materials provided with the
		* distribution.
		* * Neither the name of Intel Corporation nor the names of its
		* contributors may be used to endorse or promote products derived
		* from this software without specific prior written permission.
		*
		* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
		* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
		* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
		* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
		* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
		* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
		* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
		* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
		* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
		* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
		* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
		*/

		/**
		* \file
		* Generic histogram library
		*/

		#ifndef _SPDK_HISTOGRAM_DATA_H_
		#define _SPDK_HISTOGRAM_DATA_H_

		#include "spdk/stdinc.h"

		#ifdef __cplusplus
		extern "C" {
		#endif

		#define SPDK_HISTOGRAM_BUCKET_SHIFT 7
		#define SPDK_HISTOGRAM_BUCKET_LSB (64 - SPDK_HISTOGRAM_BUCKET_SHIFT)
		#define SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE (1ULL << SPDK_HISTOGRAM_BUCKET_SHIFT)
		#define SPDK_HISTOGRAM_BUCKET_MASK (SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE - 1)
		#define SPDK_HISTOGRAM_NUM_BUCKET_RANGES (SPDK_HISTOGRAM_BUCKET_LSB + 1)

		/*
		* SPDK histograms are implemented using ranges of bucket arrays. The most common usage
		* model is using TSC datapoints to capture an I/O latency histogram. For this usage model,
		* the histogram tracks only TSC deltas - any translation to microseconds is done by the
		* histogram user calling spdk_histogram_data_iterate() to iterate over the buckets to perform
		* the translations.
		*
		* Each range has a number of buckets determined by SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE
		* which is 128. The buckets in ranges 0 and 1 each map to one specific datapoint value.
		* The buckets in subsequent ranges each map to twice as many datapoint values as buckets
		* in the range before it:
		*
		* Range 0: 1 value each - 128 buckets cover 0 to 127 (2^7-1)
		* Range 1: 1 value each - 128 buckets cover 128 to 255 (2^8-1)
		* Range 2: 2 values each - 128 buckets cover 256 to 511 (2^9-1)
		* Range 3: 4 values each - 128 buckets cover 512 to 1023 (2^10-1)
		* Range 4: 8 values each - 128 buckets cover 1024 to 2047 (2^11-1)
		* Range 5: 16 values each - 128 buckets cover 2048 to 4095 (2^12-1)
		* ...
		* Range 55: 2^54 values each - 128 buckets cover 2^61 to 2^62-1
		* Range 56: 2^55 values each - 128 buckets cover 2^62 to 2^63-1
		* Range 57: 2^56 values each - 128 buckets cover 2^63 to 2^64-1
		*
		* On a 2.3GHz processor, this strategy results in 50ns buckets in the 7-14us range (sweet
		* spot for Intel Optane SSD latency testing).
		*
		* Buckets can be made more granular by increasing SPDK_HISTOGRAM_BUCKET_SHIFT. This
		* comes at the cost of additional storage per namespace context to store the bucket data.
		*/

		struct spdk_histogram_data {

		uint64_t bucket[SPDK_HISTOGRAM_NUM_BUCKET_RANGES][SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE];

		};

		static inline void
		spdk_histogram_data_reset(struct spdk_histogram_data *histogram)
		{
		memset(histogram, 0, sizeof(*histogram));
		}

		static inline uint32_t
		__spdk_histogram_data_get_bucket_range(uint64_t datapoint)
		{
		uint32_t clz, range;

		assert(datapoint != 0);

		clz = __builtin_clzll(datapoint);

		if (clz <= SPDK_HISTOGRAM_BUCKET_LSB) {
		range = SPDK_HISTOGRAM_BUCKET_LSB - clz;
		} else {
		range = 0;
		}

		return range;
		}

		static inline uint32_t
		__spdk_histogram_data_get_bucket_index(uint64_t datapoint, uint32_t range)
		{
		uint32_t shift;

		if (range == 0) {
		shift = 0;
		} else {
		shift = range - 1;
		}

		return (datapoint >> shift) & SPDK_HISTOGRAM_BUCKET_MASK;
		}

		static inline void
		spdk_histogram_data_tally(struct spdk_histogram_data *histogram, uint64_t datapoint)
		{
		uint32_t range = __spdk_histogram_data_get_bucket_range(datapoint);
		uint32_t index = __spdk_histogram_data_get_bucket_index(datapoint, range);

		histogram->bucket[range][index]++;
		}

		static inline uint64_t
		__spdk_histogram_data_get_bucket_start(uint32_t range, uint32_t index)
		{
		uint64_t bucket;

		index += 1;
		if (range > 0) {
		bucket = 1ULL << (range + SPDK_HISTOGRAM_BUCKET_SHIFT - 1);
		bucket += (uint64_t)index << (range - 1);
		} else {
		bucket = index;
		}

		return bucket;
		}

		typedef void (spdk_histogram_data_fn)(void ctx, uint64_t start, uint64_t end, uint64_t count,
		uint64_t total, uint64_t so_far);

		static inline void
		spdk_histogram_data_iterate(const struct spdk_histogram_data *histogram,
		spdk_histogram_data_fn fn, void *ctx)
		{
		uint64_t i, j, count, so_far, total;
		uint64_t bucket, last_bucket;

		total = 0;

		for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKET_RANGES; i++) {
		for (j = 0; j < SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE; j++) {
		total += histogram->bucket[i][j];
		}
		}

		so_far = 0;
		bucket = 0;

		for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKET_RANGES; i++) {
		for (j = 0; j < SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE; j++) {
		count = histogram->bucket[i][j];
		so_far += count;
		last_bucket = bucket;
		bucket = __spdk_histogram_data_get_bucket_start(i, j);
		fn(ctx, last_bucket, bucket, count, total, so_far);
		}
		}
		}

		#ifdef __cplusplus
		}
		#endif

		#endif