Commit 88754353 authored by Ziye Yang's avatar Ziye Yang Committed by Tomasz Zawadzki
Browse files

lib/accel: Add the real chained crc32 support with the example.



This patch added the chained crc32 support API for both batched
and non batched mode usage. And also update the accel_perf
program in order to use the revised accelerated crc32 function.

For example, you can use the following command:

./build/examples/accel_perf -C 4 -q 128 -o 4096 -t 5 -w crc32c -y

In this command, "-C 4" means that caculate the chained
crc32 for an iov array.
(even if you do not have the accelerated DSA hardware)

Signed-off-by: default avatarZiye Yang <ziye.yang@intel.com>
Change-Id: Ifede26f9040980b5791da8e5afef41177eede9f6
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6457


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
parent 1f49ee23
Loading
Loading
Loading
Loading
+82 −19
Original line number Diff line number Diff line
@@ -54,6 +54,7 @@ static int g_ops_per_batch = 0;
static int g_threads_per_core = 1;
static int g_time_in_sec = 5;
static uint32_t g_crc32c_seed = 0;
static uint32_t g_crc32c_chained_count = 1;
static int g_fail_percent_goal = 0;
static uint8_t g_fill_pattern = 255;
static bool g_verify = false;
@@ -74,7 +75,8 @@ struct display_info {

struct ap_task {
	void			*src;
	struct iovec		iov;
	struct iovec		*iovs;
	uint32_t		iov_cnt;
	void			*dst;
	void			*dst2;
	struct worker_thread	*worker;
@@ -121,6 +123,7 @@ dump_user_config(struct spdk_app_opts *opts)
	printf("Workload Type:  %s\n", g_workload_type);
	if (g_workload_selection == ACCEL_CRC32C) {
		printf("CRC-32C seed:   %u\n", g_crc32c_seed);
		printf("vector size:    %u\n", g_crc32c_chained_count);
	} else if (g_workload_selection == ACCEL_FILL) {
		printf("Fill pattern:   0x%x\n", g_fill_pattern);
	} else if ((g_workload_selection == ACCEL_COMPARE) && g_fail_percent_goal > 0) {
@@ -144,7 +147,9 @@ usage(void)
	printf("accel_perf options:\n");
	printf("\t[-h help message]\n");
	printf("\t[-q queue depth per core]\n");
	printf("\t[-C for crc32c workload, use this value to configre the io vector size to test (default 1)\n");
	printf("\t[-T number of threads per core\n");
	printf("\t[-n number of channels]\n");
	printf("\t[-o transfer size in bytes]\n");
	printf("\t[-t time in seconds]\n");
	printf("\t[-w workload type must be one of these: copy, fill, crc32c, compare, dualcast\n");
@@ -162,6 +167,9 @@ parse_args(int argc, char *argv)
	case 'b':
		g_ops_per_batch = spdk_strtol(optarg, 10);
		break;
	case 'C':
		g_crc32c_chained_count = spdk_strtol(optarg, 10);
		break;
	case 'f':
		g_fill_pattern = (uint8_t)spdk_strtol(optarg, 10);
		break;
@@ -204,6 +212,7 @@ parse_args(int argc, char *argv)
		usage();
		return 1;
	}

	return 0;
}

@@ -230,6 +239,7 @@ static int
_get_task_data_bufs(struct ap_task *task)
{
	uint32_t align = 0;
	uint32_t i = 0;

	/* For dualcast, the DSA HW requires 4K alignment on destination addresses but
	 * we do this for all engines to keep it simple.
@@ -238,14 +248,38 @@ _get_task_data_bufs(struct ap_task *task)
		align = ALIGN_4K;
	}

	if (g_workload_selection == ACCEL_CRC32C) {
		assert(g_crc32c_chained_count > 0);
		task->iov_cnt = g_crc32c_chained_count;
		task->iovs = calloc(task->iov_cnt, sizeof(struct iovec));
		if (!task->iovs) {
			fprintf(stderr, "cannot allocated task->iovs fot task=%p\n", task);
			return -ENOMEM;
		}

		for (i = 0; i < task->iov_cnt; i++) {
			task->iovs[i].iov_base = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL);
			if (task->iovs[i].iov_base == NULL) {
				return -ENOMEM;
			}
			memset(task->iovs[i].iov_base, DATA_PATTERN, g_xfer_size_bytes);
			task->iovs[i].iov_len = g_xfer_size_bytes;
		}

	} else {
		task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL);
		if (task->src == NULL) {
			fprintf(stderr, "Unable to alloc src buffer\n");
			return -ENOMEM;
		}

		/* For fill, set the entire src buffer so we can check if verify is enabled. */
		if (g_workload_selection == ACCEL_FILL) {
			memset(task->src, g_fill_pattern, g_xfer_size_bytes);
		} else {
			memset(task->src, DATA_PATTERN, g_xfer_size_bytes);
	task->iov.iov_base = task->src;
	task->iov.iov_len = g_xfer_size_bytes;
		}
	}

	task->dst = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL);
	if (task->dst == NULL) {
@@ -260,11 +294,6 @@ _get_task_data_bufs(struct ap_task *task)
		memset(task->dst, ~DATA_PATTERN, g_xfer_size_bytes);
	}

	/* For fill, set the entire src buffer so we can check if verify is enabled. */
	if (g_workload_selection == ACCEL_FILL) {
		memset(task->src, g_fill_pattern, g_xfer_size_bytes);
	}

	if (g_workload_selection == ACCEL_DUALCAST) {
		task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL);
		if (task->dst2 == NULL) {
@@ -316,7 +345,7 @@ _submit_single(struct worker_thread *worker, struct ap_task *task)
		break;
	case ACCEL_CRC32C:
		rc = spdk_accel_submit_crc32cv(worker->ch, (uint32_t *)task->dst,
					       &task->iov, 1, g_crc32c_seed,
					       task->iovs, task->iov_cnt, g_crc32c_seed,
					       accel_done, task);
		break;
	case ACCEL_COMPARE:
@@ -376,7 +405,7 @@ _batch_prep_cmd(struct worker_thread *worker, struct ap_task *task,
		break;
	case ACCEL_CRC32C:
		rc = spdk_accel_batch_prep_crc32cv(worker->ch, batch, (uint32_t *)task->dst,
						   &task->iov, 1, g_crc32c_seed, accel_done, task);
						   task->iovs, task->iov_cnt, g_crc32c_seed, accel_done, task);
		break;
	default:
		assert(false);
@@ -389,7 +418,21 @@ _batch_prep_cmd(struct worker_thread *worker, struct ap_task *task,
static void
_free_task_buffers(struct ap_task *task)
{
	uint32_t i;

	if (g_workload_selection == ACCEL_CRC32C) {
		if (task->iovs) {
			for (i = 0; i < task->iov_cnt; i++) {
				if (task->iovs[i].iov_base) {
					spdk_dma_free(task->iovs[i].iov_base);
				}
			}
			free(task->iovs);
		}
	} else {
		spdk_dma_free(task->src);
	}

	spdk_dma_free(task->dst);
	if (g_workload_selection == ACCEL_DUALCAST) {
		spdk_dma_free(task->dst2);
@@ -517,6 +560,20 @@ batch_done(void *cb_arg, int status)
	spdk_thread_send_msg(worker_batch->worker->thread, _batch_done, worker_batch);
}

static uint32_t
_update_crc32c_iov(struct iovec *iov, int iovcnt, uint32_t crc32c)
{
	int i;

	for (i = 0; i < iovcnt; i++) {
		assert(iov[i].iov_base != NULL);
		assert(iov[i].iov_len != 0);
		crc32c = spdk_crc32c_update(iov[i].iov_base, iov[i].iov_len, crc32c);

	}
	return crc32c;
}

static void
_accel_done(void *arg1)
{
@@ -530,8 +587,7 @@ _accel_done(void *arg1)
	if (g_verify && task->status == 0) {
		switch (g_workload_selection) {
		case ACCEL_CRC32C:
			/* calculate sw CRC-32C and compare to sw aceel result. */
			sw_crc32c = spdk_crc32c_update(task->src, g_xfer_size_bytes, ~g_crc32c_seed);
			sw_crc32c = _update_crc32c_iov(task->iovs, task->iov_cnt, ~g_crc32c_seed);
			if (*(uint32_t *)task->dst != sw_crc32c) {
				SPDK_NOTICELOG("CRC-32C miscompare\n");
				worker->xfer_failed++;
@@ -912,7 +968,7 @@ main(int argc, char **argv)
	pthread_mutex_init(&g_workers_lock, NULL);
	spdk_app_opts_init(&opts, sizeof(opts));
	opts.reactor_mask = "0x1";
	if (spdk_app_parse_args(argc, argv, &opts, "o:q:t:yw:P:f:b:T:", NULL, parse_args,
	if (spdk_app_parse_args(argc, argv, &opts, "C:o:q:t:yw:P:f:b:T:", NULL, parse_args,
				usage) != SPDK_APP_PARSE_ARGS_SUCCESS) {
		g_rc = -1;
		goto cleanup;
@@ -935,6 +991,13 @@ main(int argc, char **argv)
		goto cleanup;
	}

	if (g_workload_selection == ACCEL_CRC32C &&
	    g_crc32c_chained_count == 0) {
		usage();
		g_rc = -1;
		goto cleanup;
	}

	dump_user_config(&opts);
	g_rc = spdk_app_start(&opts, accel_perf_start, NULL);
	if (g_rc) {
+16 −4
Original line number Diff line number Diff line
@@ -80,14 +80,26 @@ struct spdk_accel_task {
	struct spdk_accel_batch		*batch;
	spdk_accel_completion_cb	cb_fn;
	void				*cb_arg;
	union {
		struct {
			struct iovec			*iovs; /* iovs passed by the caller */
			uint32_t			iovcnt; /* iovcnt passed by the caller */
		} v;
		void				*src;
	};
	union {
		void			*dst;
		void			*src2;
	};
	union {
		struct {
			spdk_accel_completion_cb	cb_fn;
			void				*cb_arg;
		} chained;
		void				*dst2;
		uint32_t			seed;
		uint64_t			fill_pattern;
	};
	enum accel_opcode		op_code;
	uint64_t			nbytes;
	TAILQ_ENTRY(spdk_accel_task)	link;
+157 −13
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#include "spdk_internal/accel_engine.h"

#include "spdk/env.h"
#include "spdk/likely.h"
#include "spdk/log.h"
#include "spdk/thread.h"
#include "spdk/json.h"
@@ -72,6 +73,7 @@ static void _sw_accel_copy(void *dst, void *src, uint64_t nbytes);
static int _sw_accel_compare(void *src1, void *src2, uint64_t nbytes);
static void _sw_accel_fill(void *dst, uint8_t fill, uint64_t nbytes);
static void _sw_accel_crc32c(uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes);
static void _sw_accel_crc32cv(uint32_t *dst, struct iovec *iov, uint32_t iovcnt, uint32_t seed);

/* Registration of hw modules (currently supports only 1 at a time) */
void
@@ -111,13 +113,19 @@ void
spdk_accel_task_complete(struct spdk_accel_task *accel_task, int status)
{
	struct accel_io_channel *accel_ch = accel_task->accel_ch;
	struct spdk_accel_batch *batch;
	struct spdk_accel_batch *batch = accel_task->batch;
	spdk_accel_completion_cb	cb_fn = accel_task->cb_fn;
	void				*cb_arg = accel_task->cb_arg;

	accel_task->cb_fn(accel_task->cb_arg, status);
	/* We should put the accel_task into the list firstly in order to avoid
	 * the accel task list is exhausted when there is recursive call to
	 * allocate accel_task in user's call back function (cb_fn)
	 */
	TAILQ_INSERT_TAIL(&accel_ch->task_pool, accel_task, link);

	cb_fn(cb_arg, status);
	/* If this task is part of a batch, check for completion of the batch. */
	if (accel_task->batch) {
		batch = accel_task->batch;
	if (batch) {
		assert(batch->count > 0);
		batch->count--;
		if (batch->count == 0) {
@@ -129,8 +137,6 @@ spdk_accel_task_complete(struct spdk_accel_task *accel_task, int status)
			TAILQ_INSERT_TAIL(&accel_ch->batch_pool, batch, link);
		}
	}

	TAILQ_INSERT_TAIL(&accel_ch->task_pool, accel_task, link);
}

/* Accel framework public API for discovering current engine capabilities. */
@@ -308,6 +314,7 @@ spdk_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src, u

	accel_task->dst = (void *)dst;
	accel_task->src = src;
	accel_task->v.iovcnt = 0;
	accel_task->seed = seed;
	accel_task->nbytes = nbytes;
	accel_task->op_code = ACCEL_OPCODE_CRC32C;
@@ -321,21 +328,79 @@ spdk_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src, u
	}
}

static void
crc32cv_done(void *cb_arg, int status)
{
	struct spdk_accel_task *accel_task = cb_arg;
	struct spdk_io_channel *ch = spdk_io_channel_from_ctx(accel_task->accel_ch);

	assert(accel_task->chained.cb_fn != NULL);
	assert(accel_task->chained.cb_arg != NULL);

	if (spdk_likely(!status)) {
		status = spdk_accel_submit_crc32cv(ch, accel_task->dst, ++accel_task->v.iovs,
						   accel_task->v.iovcnt - 1, ~(*((uint32_t *)accel_task->dst)),
						   accel_task->chained.cb_fn, accel_task->chained.cb_arg);
		if (spdk_likely(!status)) {
			return;
		}
	}

	accel_task->chained.cb_fn(accel_task->chained.cb_arg, status);
}

/* Accel framework public API for chained CRC-32C function */
int
spdk_accel_submit_crc32cv(struct spdk_io_channel *ch, uint32_t *dst, struct iovec *iov,
			  uint32_t iov_cnt, uint32_t seed, spdk_accel_completion_cb cb_fn, void *cb_arg)
{
	struct accel_io_channel *accel_ch;
	struct spdk_accel_task *accel_task;

	if (iov == NULL) {
		SPDK_ERRLOG("iov should not be NULL");
		return -EINVAL;
	}

	assert(iov_cnt == 1);
	if (!iov_cnt) {
		SPDK_ERRLOG("iovcnt should not be zero value\n");
		return -EINVAL;
	}

	if (iov_cnt == 1) {
		return spdk_accel_submit_crc32c(ch, dst, iov[0].iov_base, seed, iov[0].iov_len, cb_fn, cb_arg);
	}

	accel_ch = spdk_io_channel_get_ctx(ch);
	accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg);
	if (accel_task == NULL) {
		SPDK_ERRLOG("no memory\n");
		assert(0);
		return -ENOMEM;
	}

	accel_task->v.iovs = iov;
	accel_task->v.iovcnt = iov_cnt;
	accel_task->dst = (void *)dst;
	accel_task->op_code = ACCEL_OPCODE_CRC32C;

	if (_is_supported(accel_ch->engine, ACCEL_CRC32C)) {
		accel_task->cb_fn = crc32cv_done;
		accel_task->cb_arg = accel_task;
		accel_task->chained.cb_fn = cb_fn;
		accel_task->chained.cb_arg = cb_arg;

		accel_task->src = iov[0].iov_base;
		accel_task->nbytes = iov[0].iov_len;

		return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
	} else {
		_sw_accel_crc32cv(dst, iov, iov_cnt, seed);
		spdk_accel_task_complete(accel_task, 0);
		return 0;
	}
}

/* Accel framework public API for getting max operations for a batch. */
uint32_t
spdk_accel_batch_get_max(struct spdk_io_channel *ch)
@@ -474,6 +539,7 @@ spdk_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch

	accel_task->dst = dst;
	accel_task->src = src;
	accel_task->v.iovcnt = 0;
	accel_task->seed = seed;
	accel_task->nbytes = nbytes;
	accel_task->op_code = ACCEL_OPCODE_CRC32C;
@@ -487,21 +553,81 @@ spdk_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch
	return 0;
}

static void
batched_crc32cv_done(void *cb_arg, int status)
{
	struct spdk_accel_task *accel_task = cb_arg;
	struct spdk_io_channel *ch = spdk_io_channel_from_ctx(accel_task->accel_ch);
	struct spdk_accel_batch *batch;

	batch = accel_task->batch;
	assert(batch != NULL);
	assert(accel_task->chained.cb_fn != NULL);
	assert(accel_task->chained.cb_arg != NULL);

	if (spdk_likely(!status)) {
		status = spdk_accel_batch_prep_crc32cv(ch, batch, accel_task->dst,
						       ++accel_task->v.iovs, accel_task->v.iovcnt - 1,  ~(*((uint32_t *)accel_task->dst)),
						       accel_task->chained.cb_fn, accel_task->chained.cb_arg);
		if (spdk_likely(!status)) {
			return;
		}
	}

	accel_task->chained.cb_fn(accel_task->chained.cb_arg, status);
}

int
spdk_accel_batch_prep_crc32cv(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
			      uint32_t *dst, struct iovec *iovs, uint32_t iov_cnt, uint32_t seed,
			      spdk_accel_completion_cb cb_fn, void *cb_arg)
{
	struct accel_io_channel *accel_ch;
	struct spdk_accel_task *accel_task;

	if (iovs == NULL) {
		SPDK_ERRLOG("iovs should not be NULL\n");
		return -EINVAL;
	}

	assert(iov_cnt == 1);
	if (iov_cnt == 0) {
		SPDK_ERRLOG("iovcnt should not be zero value\n");
		return -EINVAL;
	}

	if (iov_cnt == 1) {
		return spdk_accel_batch_prep_crc32c(ch, batch, dst, iovs[0].iov_base, seed, iovs[0].iov_len, cb_fn,
						    cb_arg);
	}

	accel_ch = spdk_io_channel_get_ctx(ch);
	accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg);
	if (accel_task == NULL) {
		return -ENOMEM;
	}

	accel_task->v.iovs = iovs;
	accel_task->v.iovcnt = iov_cnt;
	accel_task->dst = dst;
	accel_task->seed = seed;
	accel_task->op_code = ACCEL_OPCODE_CRC32C;

	if (_is_supported(accel_ch->engine, ACCEL_CRC32C)) {
		accel_task->cb_arg = accel_task;
		accel_task->cb_fn = batched_crc32cv_done;
		accel_task->cb_arg = accel_task;
		accel_task->chained.cb_fn = cb_fn;
		accel_task->chained.cb_arg = cb_arg;

		accel_task->src = iovs[0].iov_base;
		accel_task->nbytes = iovs[0].iov_len;

		TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link);
	} else {
		TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link);
	}

	return 0;
}

/* Accel framework public API for batch_create function. */
@@ -583,8 +709,12 @@ spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *bat
			batch->status |= rc;
			break;
		case ACCEL_OPCODE_CRC32C:
			if (accel_task->v.iovcnt == 0) {
				_sw_accel_crc32c(accel_task->dst, accel_task->src, accel_task->seed,
						 accel_task->nbytes);
			} else {
				_sw_accel_crc32cv(accel_task->dst, accel_task->v.iovs, accel_task->v.iovcnt, accel_task->seed);
			}
			spdk_accel_task_complete(accel_task, 0);
			break;
		case ACCEL_OPCODE_DUALCAST:
@@ -836,6 +966,20 @@ _sw_accel_crc32c(uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes)
	*dst = spdk_crc32c_update(src, nbytes, ~seed);
}

static void
_sw_accel_crc32cv(uint32_t *dst, struct iovec *iov, uint32_t iovcnt, uint32_t seed)
{
	uint32_t i, crc32c = ~seed;

	for (i = 0; i < iovcnt; i++) {
		assert(iov[i].iov_base != NULL);
		assert(iov[i].iov_len != 0);
		crc32c = spdk_crc32c_update(iov[i].iov_base, iov[i].iov_len, crc32c);
	}

	*dst = crc32c;
}

static struct spdk_io_channel *sw_accel_get_io_channel(void);

static uint32_t