Commit 5044e4f6 authored by Jim Harris's avatar Jim Harris
Browse files

histogram: move nvme/perf histogram code to a common header



Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: Ia0083365b2da63cb38aebb9f7bbc02f4dfd1ae94

Reviewed-on: https://review.gerrithub.io/365263


Tested-by: default avatarSPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarDaniel Verkamp <daniel.verkamp@intel.com>
parent ad20a6dd
Loading
Loading
Loading
Loading
+42 −127
Original line number Diff line number Diff line
@@ -43,6 +43,7 @@
#include "spdk/queue.h"
#include "spdk/string.h"
#include "spdk/nvme_intel.h"
#include "spdk/histogram_data.h"

#if HAVE_LIBAIO
#include <libaio.h>
@@ -81,41 +82,6 @@ struct ns_entry {
	char			name[1024];
};

/*
 * Latency tracking is done with ranges of bucket arrays.  The bucket
 * for any given I/O is determined solely by the TSC delta - any
 * translation to microseconds is only done after the test is finished
 * and statistics are printed.
 *
 * Each range has a number of buckets determined by NUM_BUCKETS_PER_RANGE
 * which is 128.  The buckets in ranges 0 and 1 each map to one specific
 * TSC delta.  The buckets in subsequent ranges each map to twice as many
 * TSC deltas as buckets in the range before it:
 *
 * Range 0:  1 TSC each - 128 buckets cover 0 to 127 (2^7-1)
 * Range 1:  1 TSC each - 128 buckets cover 128 to 255 (2^8-1)
 * Range 2:  2 TSC each - 128 buckets cover 256 to 511 (2^9-1)
 * Range 3:  4 TSC each - 128 buckets cover 512 to 1023 (2^10-1)
 * Range 4:  8 TSC each - 128 buckets cover 1024 to 2047 (2^11-1)
 * Range 5: 16 TSC each - 128 buckets cover 2048 to 4095 (2^12-1)
 * ...
 * Range 55: 2^54 TSC each - 128 buckets cover 2^61 to 2^62-1
 * Range 56: 2^55 TSC each - 128 buckets cover 2^62 to 2^63-1
 * Range 57: 2^56 TSC each - 128 buckets cover 2^63 to 2^64-1
 *
 * On a 2.3GHz processor, this strategy results in 50ns buckets in the
 * 7-14us range (sweet spot for Intel Optane SSD latency testing).
 *
 * Buckets can be made more granular by increasing BUCKET_SHIFT.  This
 * comes at the cost of additional storage per namespace context to
 * store the bucket data.
 */
#define BUCKET_SHIFT 7
#define BUCKET_LSB (64 - BUCKET_SHIFT)
#define NUM_BUCKETS_PER_RANGE (1ULL << BUCKET_SHIFT)
#define BUCKET_MASK (NUM_BUCKETS_PER_RANGE - 1)
#define NUM_BUCKET_RANGES (BUCKET_LSB + 1)

static const double g_latency_cutoffs[] = {
	0.01,
	0.10,
@@ -157,7 +123,7 @@ struct ns_worker_ctx {

	struct ns_worker_ctx	*next;

	uint64_t		bucket[NUM_BUCKET_RANGES][NUM_BUCKETS_PER_RANGE];
	struct spdk_histogram_data	histogram;
};

struct perf_task {
@@ -215,63 +181,6 @@ static int g_aio_optind; /* Index of first AIO filename in argv */
static void
task_complete(struct perf_task *task);

static uint32_t
get_bucket_range(uint64_t tsc)
{
	uint32_t clz, range;

	assert(tsc != 0);

	clz = __builtin_clzll(tsc);

	if (clz <= BUCKET_LSB) {
		range = BUCKET_LSB - clz;
	} else {
		range = 0;
	}

	return range;
}

static uint32_t
get_bucket_index(uint64_t tsc, uint32_t range)
{
	uint32_t shift;

	if (range == 0) {
		shift = 0;
	} else {
		shift = range - 1;
	}

	return (tsc >> shift) & BUCKET_MASK;
}

static double
get_us_from_bucket(uint32_t range, uint32_t index)
{
	uint64_t tsc;

	index += 1;
	if (range > 0) {
		tsc = 1ULL << (range + BUCKET_SHIFT - 1);
		tsc += (uint64_t)index << (range - 1);
	} else {
		tsc = index;
	}

	return (double)tsc * 1000 * 1000 / g_tsc_rate;
}

static void
track_latency(struct ns_worker_ctx *ns_ctx, uint64_t tsc)
{
	uint32_t range = get_bucket_range(tsc);
	uint32_t index = get_bucket_index(tsc, range);

	ns_ctx->bucket[range][index]++;
}

static void
register_ns(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns)
{
@@ -607,7 +516,7 @@ task_complete(struct perf_task *task)
		ns_ctx->max_tsc = tsc_diff;
	}
	if (g_latency_sw_tracking_level > 0) {
		track_latency(ns_ctx, tsc_diff);
		spdk_histogram_data_tally(&ns_ctx->histogram, tsc_diff);
	}
	rte_mempool_put(task_pool, task);

@@ -792,6 +701,41 @@ static void usage(char *program_name)
	printf("\t[-i shared memory group ID]\n");
}

static void
check_cutoff(void *ctx, uint64_t start, uint64_t end, uint64_t count,
	     uint64_t total, uint64_t so_far)
{
	double so_far_pct;
	double **cutoff = ctx;

	if (count == 0) {
		return;
	}

	so_far_pct = (double)so_far / total;
	while (so_far_pct >= **cutoff && **cutoff > 0) {
		printf("%8.4f%% : %9.3fus\n", **cutoff * 100, (double)end * 1000 * 1000 / g_tsc_rate);
		(*cutoff)++;
	}
}

static void
print_bucket(void *ctx, uint64_t start, uint64_t end, uint64_t count,
	     uint64_t total, uint64_t so_far)
{
	double so_far_pct;

	if (count == 0) {
		return;
	}

	so_far_pct = (double)so_far * 100 / total;
	printf("%9.3f - %9.3f: %9.4f%%  (%9ju)\n",
	       (double)start * 1000 * 1000 / g_tsc_rate,
	       (double)end * 1000 * 1000 / g_tsc_rate,
	       so_far_pct, count);
}

static void
print_performance(void)
{
@@ -858,27 +802,13 @@ print_performance(void)
	while (worker) {
		ns_ctx = worker->ns_ctx;
		while (ns_ctx) {
			uint64_t i, j, so_far = 0;
			double so_far_pct = 0, bucket = 0;
			const double *cutoff = g_latency_cutoffs;

			printf("Summary latency data for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore);
			printf("=================================================================================\n");

			for (i = 0; i < NUM_BUCKET_RANGES; i++) {
				for (j = 0; j < NUM_BUCKETS_PER_RANGE; j++) {
					so_far += ns_ctx->bucket[i][j];
					so_far_pct = (double)so_far / total_io_completed;
					bucket = get_us_from_bucket(i, j);
					if (ns_ctx->bucket[i][j] == 0) {
						continue;
					}
					while (so_far_pct >= *cutoff && *cutoff > 0) {
						printf("%8.4f%% : %9.3fus\n", *cutoff * 100, bucket);
						cutoff++;
					}
				}
			}
			spdk_histogram_data_iterate(&ns_ctx->histogram, check_cutoff, &cutoff);

			printf("\n");
			ns_ctx = ns_ctx->next;
		}
@@ -893,27 +823,11 @@ print_performance(void)
	while (worker) {
		ns_ctx = worker->ns_ctx;
		while (ns_ctx) {
			uint64_t i, j, so_far = 0;
			float so_far_pct = 0;
			double last_bucket, bucket = 0;

			printf("Latency histogram for %-43.43s from core %u:\n", ns_ctx->entry->name, worker->lcore);
			printf("==============================================================================\n");
			printf("       Range in us     Cumulative    IO count\n");

			for (i = 0; i < NUM_BUCKET_RANGES; i++) {
				for (j = 0; j < NUM_BUCKETS_PER_RANGE; j++) {
					so_far += ns_ctx->bucket[i][j];
					so_far_pct = (float)so_far * 100 / total_io_completed;
					last_bucket = bucket;
					bucket = get_us_from_bucket(i, j);
					if (ns_ctx->bucket[i][j] == 0) {
						continue;
					}
					printf("%9.3f - %9.3f: %9.4f%%  (%9ju)\n",
					       last_bucket, bucket, so_far_pct, ns_ctx->bucket[i][j]);
				}
			}
			spdk_histogram_data_iterate(&ns_ctx->histogram, print_bucket, NULL);
			printf("\n");
			ns_ctx = ns_ctx->next;
		}
@@ -1376,6 +1290,7 @@ associate_workers_with_ns(void)
		ns_ctx->min_tsc = UINT64_MAX;
		ns_ctx->entry = entry;
		ns_ctx->next = worker->ns_ctx;
		spdk_histogram_data_reset(&ns_ctx->histogram);
		worker->ns_ctx = ns_ctx;

		worker = worker->next;
+189 −0
Original line number Diff line number Diff line
/*-
 *   BSD LICENSE
 *
 *   Copyright (c) Intel Corporation.
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/**
 * \file
 * Generic histogram library
 */

#ifndef _SPDK_HISTOGRAM_DATA_H_
#define _SPDK_HISTOGRAM_DATA_H_

#include "spdk/stdinc.h"

#ifdef __cplusplus
extern "C" {
#endif

#define SPDK_HISTOGRAM_BUCKET_SHIFT		7
#define SPDK_HISTOGRAM_BUCKET_LSB		(64 - SPDK_HISTOGRAM_BUCKET_SHIFT)
#define SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE	(1ULL << SPDK_HISTOGRAM_BUCKET_SHIFT)
#define SPDK_HISTOGRAM_BUCKET_MASK		(SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE - 1)
#define SPDK_HISTOGRAM_NUM_BUCKET_RANGES	(SPDK_HISTOGRAM_BUCKET_LSB + 1)

/*
 * SPDK histograms are implemented using ranges of bucket arrays.  The most common usage
 * model is using TSC datapoints to capture an I/O latency histogram.  For this usage model,
 * the histogram tracks only TSC deltas - any translation to microseconds is done by the
 * histogram user calling spdk_histogram_data_iterate() to iterate over the buckets to perform
 * the translations.
 *
 * Each range has a number of buckets determined by SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE
 * which is 128.  The buckets in ranges 0 and 1 each map to one specific datapoint value.
 * The buckets in subsequent ranges each map to twice as many datapoint values as buckets
 * in the range before it:
 *
 * Range 0:  1 value each  - 128 buckets cover 0 to 127 (2^7-1)
 * Range 1:  1 value each  - 128 buckets cover 128 to 255 (2^8-1)
 * Range 2:  2 values each - 128 buckets cover 256 to 511 (2^9-1)
 * Range 3:  4 values each - 128 buckets cover 512 to 1023 (2^10-1)
 * Range 4:  8 values each - 128 buckets cover 1024 to 2047 (2^11-1)
 * Range 5: 16 values each - 128 buckets cover 2048 to 4095 (2^12-1)
 * ...
 * Range 55: 2^54 values each - 128 buckets cover 2^61 to 2^62-1
 * Range 56: 2^55 values each - 128 buckets cover 2^62 to 2^63-1
 * Range 57: 2^56 values each - 128 buckets cover 2^63 to 2^64-1
 *
 * On a 2.3GHz processor, this strategy results in 50ns buckets in the 7-14us range (sweet
 * spot for Intel Optane SSD latency testing).
 *
 * Buckets can be made more granular by increasing SPDK_HISTOGRAM_BUCKET_SHIFT.  This
 * comes at the cost of additional storage per namespace context to store the bucket data.
 */

struct spdk_histogram_data {

	uint64_t	bucket[SPDK_HISTOGRAM_NUM_BUCKET_RANGES][SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE];

};

static inline void
spdk_histogram_data_reset(struct spdk_histogram_data *histogram)
{
	memset(histogram, 0, sizeof(*histogram));
}

static inline uint32_t
__spdk_histogram_data_get_bucket_range(uint64_t datapoint)
{
	uint32_t clz, range;

	assert(datapoint != 0);

	clz = __builtin_clzll(datapoint);

	if (clz <= SPDK_HISTOGRAM_BUCKET_LSB) {
		range = SPDK_HISTOGRAM_BUCKET_LSB - clz;
	} else {
		range = 0;
	}

	return range;
}

static inline uint32_t
__spdk_histogram_data_get_bucket_index(uint64_t datapoint, uint32_t range)
{
	uint32_t shift;

	if (range == 0) {
		shift = 0;
	} else {
		shift = range - 1;
	}

	return (datapoint >> shift) & SPDK_HISTOGRAM_BUCKET_MASK;
}

static inline void
spdk_histogram_data_tally(struct spdk_histogram_data *histogram, uint64_t datapoint)
{
	uint32_t range = __spdk_histogram_data_get_bucket_range(datapoint);
	uint32_t index = __spdk_histogram_data_get_bucket_index(datapoint, range);

	histogram->bucket[range][index]++;
}

static inline uint64_t
__spdk_histogram_data_get_bucket_start(uint32_t range, uint32_t index)
{
	uint64_t bucket;

	index += 1;
	if (range > 0) {
		bucket = 1ULL << (range + SPDK_HISTOGRAM_BUCKET_SHIFT - 1);
		bucket += (uint64_t)index << (range - 1);
	} else {
		bucket = index;
	}

	return bucket;
}

typedef void (*spdk_histogram_data_fn)(void *ctx, uint64_t start, uint64_t end, uint64_t count,
				       uint64_t total, uint64_t so_far);

static inline void
spdk_histogram_data_iterate(const struct spdk_histogram_data *histogram,
			    spdk_histogram_data_fn fn, void *ctx)
{
	uint64_t i, j, count, so_far, total;
	uint64_t bucket, last_bucket;

	total = 0;

	for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKET_RANGES; i++) {
		for (j = 0; j < SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE; j++) {
			total += histogram->bucket[i][j];
		}
	}

	so_far = 0;
	bucket = 0;

	for (i = 0; i < SPDK_HISTOGRAM_NUM_BUCKET_RANGES; i++) {
		for (j = 0; j < SPDK_HISTOGRAM_NUM_BUCKETS_PER_RANGE; j++) {
			count = histogram->bucket[i][j];
			so_far += count;
			last_bucket = bucket;
			bucket = __spdk_histogram_data_get_bucket_start(i, j);
			fn(ctx, last_bucket, bucket, count, total, so_far);
		}
	}
}

#ifdef __cplusplus
}
#endif

#endif