Commit 199fa601 authored by Jim Harris's avatar Jim Harris Committed by Tomasz Zawadzki
Browse files

util: add zipf random number generator



zipf is a power law probability distribution. When
applied to performance testing of block devices, it
will select blocks over the full range of LBAs, but
will more frequently select lower-numbered LBAs.
The theta parameter governs the distribution - higher
values of theta will concentrate the distribution on
a smaller number of LBAs.

Note that fio supports zipf, so adding it to SPDK
will enable our perf tools (bdevperf, nvme-perf) to
provide similar functionality.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I7df129c9d61996a2070188c6cd9f1fde631ac208
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7779


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
parent da976633
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -50,6 +50,7 @@ extern "C" {
#include <errno.h>
#include <inttypes.h>
#include <limits.h>
#include <math.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>

include/spdk/zipf.h

0 → 100644
+83 −0
Original line number Diff line number Diff line
/*-
 *   BSD LICENSE
 *
 *   Copyright (c) Intel Corporation.
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/** \file
 * Zipf random number distribution
 */

#ifndef SPDK_ZIPF_H
#define SPDK_ZIPF_H

#include "spdk/stdinc.h"

#ifdef __cplusplus
extern "C" {
#endif

struct spdk_zipf;

/**
 * Create a zipf random number generator.
 *
 * Numbers from [0, range) will be returned by the generator when
 * calling \ref spdk_zipf_generate.
 *
 * \param range Range of values for the zipf distribution.
 * \param theta Theta distribution parameter.
 * \param seed Seed value for the random number generator.
 *
 * \return a pointer to the new zipf generator.
 */
struct spdk_zipf *spdk_zipf_create(uint64_t range, double theta, uint32_t seed);

/**
 * Free a zipf generator and set the pointer to NULL.
 *
 * \param zipfp Zipf generator to free.
 */
void spdk_zipf_free(struct spdk_zipf **zipfp);

/**
 * Generate a value from the zipf generator.
 *
 * \param zipf Zipf generator to generate the value from.
 *
 * \return value in the range [0, range)
 */
uint64_t spdk_zipf_generate(struct spdk_zipf *zipf);

#ifdef __cplusplus
}
#endif

#endif
+2 −2
Original line number Diff line number Diff line
@@ -35,11 +35,11 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk

SO_VER := 3
SO_MINOR := 0
SO_MINOR := 1

C_SRCS = base64.c bit_array.c cpuset.c crc16.c crc32.c crc32c.c crc32_ieee.c \
	 dif.c fd.c file.c iov.c math.c pipe.c strerror_tls.c string.c uuid.c \
	 fd_group.c
	 fd_group.c zipf.c
LIBNAME = util
LOCAL_SYS_LIBS = -luuid

+5 −0
Original line number Diff line number Diff line
@@ -144,5 +144,10 @@
	spdk_fd_group_event_modify;
	spdk_fd_group_get_fd;

	# public functions in zipf.h
	spdk_zipf_create;
	spdk_zipf_free;
	spdk_zipf_generate;

	local: *;
};

lib/util/zipf.c

0 → 100644
+139 −0
Original line number Diff line number Diff line
/*-
 *   BSD LICENSE
 *
 *   Copyright(c) Intel Corporation. All rights reserved.
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "spdk/stdinc.h"
#include "spdk/util.h"
#include "spdk/zipf.h"

struct spdk_zipf {
	uint64_t	range;
	double		alpha;
	double		eta;
	double		theta;
	double		zetan;
	double		val1_limit;
	uint32_t	seed;
};

static double
zeta_increment(uint64_t n, double theta)
{
	return pow((double) 1.0 / (n + 1), theta);
}

static double
zeta(uint64_t range, double theta)
{
	double zetan = 0;
	double inc1, inc2;
	uint64_t i, calc, count;
	const uint32_t ZIPF_MAX_ZETA_CALC = 10 * 1000 * 1000;
	const uint32_t ZIPF_ZETA_ESTIMATE = 1 * 1000 * 1000;

	/* Cumulate zeta discretely for the first ZIPF_MAX_ZETA_CALC
	 * entries in the range.
	 */
	calc = spdk_min(ZIPF_MAX_ZETA_CALC, range);
	for (i = 0; i < calc; i++) {
		zetan += zeta_increment(i, theta);
	}

	/* For the remaining values in the range, increment zetan
	 * with an approximation for every ZIPF_ZETA_ESTIMATE
	 * entries.  We will take an average of the increment
	 * for (i) and (i + ZIPF_ZETA_ESTIMATE), and then multiply
	 * that by ZIPF_ZETA_ESTIMATE.
	 *
	 * Of course, we'll cap ZIPF_ZETA_ESTIMATE to something
	 * smaller if necessary at the end of the range.
	 */
	while (i < range) {
		count = spdk_min(ZIPF_ZETA_ESTIMATE, range - i);
		inc1 = zeta_increment(i, theta);
		inc2 = zeta_increment(i + count, theta);
		zetan += (inc1 + inc2) * count / 2;
		i += count;
	}

	return zetan;
}

struct spdk_zipf *
spdk_zipf_create(uint64_t range, double theta, uint32_t seed)
{
	struct spdk_zipf *zipf;

	zipf = calloc(1, sizeof(*zipf));
	if (zipf == NULL) {
		return NULL;
	}

	zipf->range = range;
	zipf->seed = seed;

	zipf->theta = theta;
	zipf->alpha = 1.0 / (1.0 - zipf->theta);
	zipf->zetan = zeta(range, theta);
	zipf->eta = (1.0 - pow(2.0 / zipf->range, 1.0 - zipf->theta)) /
		    (1.0 - zeta(2, theta) / zipf->zetan);
	zipf->val1_limit = 1.0 + pow(0.5, zipf->theta);

	return zipf;
}

void
spdk_zipf_free(struct spdk_zipf **zipfp)
{
	assert(zipfp != NULL);
	free(*zipfp);
	*zipfp = NULL;
}

uint64_t
spdk_zipf_generate(struct spdk_zipf *zipf)
{
	double randu, randz;
	uint64_t val;

	randu = (double)rand_r(&zipf->seed) / RAND_MAX;
	randz = randu * zipf->zetan;

	if (randz < 1.0) {
		return 0;
	} else if (randz < zipf->val1_limit) {
		return 1;
	} else {
		val = zipf->range * pow(zipf->eta * (randu - 1.0) + 1.0, zipf->alpha);
		return val % zipf->range;
	}
}
Loading