Commit 221eb3f4 authored by paul luse's avatar paul luse Committed by Jim Harris
Browse files

examples/accel_perf: add support for copy_crc commands



For COPY+CRC operations, we allocate a destination buffer for each
task that's big enough to hold the copied input data for all stages
of the chain. For example, if the operation is 4 stages of 4K input
buffers, the destination will be 16K in size.

Signed-off-by: default avatarpaul luse <paul.e.luse@intel.com>
Change-Id: Id32a98c1dbada2881a8423c050e892d5c37bf6cc
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8204


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 71f0600c
Loading
Loading
Loading
Loading
+74 −17
Original line number Diff line number Diff line
@@ -82,6 +82,7 @@ struct ap_task {
	uint32_t		iov_cnt;
	void			*dst;
	void			*dst2;
	uint32_t		crc_dst;
	struct worker_thread	*worker;
	int			status;
	int			expected_status; /* used for the compare operation */
@@ -124,15 +125,20 @@ dump_user_config(struct spdk_app_opts *opts)
	printf("Core mask:      %s\n\n", opts->reactor_mask);
	printf("Accel Perf Configuration:\n");
	printf("Workload Type:  %s\n", g_workload_type);
	if (g_workload_selection == ACCEL_CRC32C) {
	if (g_workload_selection == ACCEL_CRC32C || g_workload_selection == ACCEL_COPY_CRC32C) {
		printf("CRC-32C seed:   %u\n", g_crc32c_seed);
		printf("vector size:    %u\n", g_crc32c_chained_count);
		printf("vector count    %u\n", g_crc32c_chained_count);
	} else if (g_workload_selection == ACCEL_FILL) {
		printf("Fill pattern:   0x%x\n", g_fill_pattern);
	} else if ((g_workload_selection == ACCEL_COMPARE) && g_fail_percent_goal > 0) {
		printf("Failure inject: %u percent\n", g_fail_percent_goal);
	}
	if (g_workload_selection == ACCEL_COPY_CRC32C) {
		printf("Vector size:    %u bytes\n", g_xfer_size_bytes);
		printf("Transfer size:  %u bytes\n", g_xfer_size_bytes * g_crc32c_chained_count);
	} else {
		printf("Transfer size:  %u bytes\n", g_xfer_size_bytes);
	}
	printf("Queue depth:    %u\n", g_queue_depth);
	printf("Allocate depth: %u\n", g_allocate_depth);
	printf("# threads/core: %u\n", g_threads_per_core);
@@ -151,12 +157,12 @@ usage(void)
	printf("accel_perf options:\n");
	printf("\t[-h help message]\n");
	printf("\t[-q queue depth per core]\n");
	printf("\t[-C for crc32c workload, use this value to configre the io vector size to test (default 1)\n");
	printf("\t[-C for crc32c workload, use this value to configure the io vector size to test (default 1)\n");
	printf("\t[-T number of threads per core\n");
	printf("\t[-n number of channels]\n");
	printf("\t[-o transfer size in bytes]\n");
	printf("\t[-t time in seconds]\n");
	printf("\t[-w workload type must be one of these: copy, fill, crc32c, compare, dualcast\n");
	printf("\t[-w workload type must be one of these: copy, fill, crc32c, copy_crc32c, compare, dualcast\n");
	printf("\t[-s for crc32c workload, use this seed value (default 0)\n");
	printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n");
	printf("\t[-f for fill workload, use this BYTE value (default 255)\n");
@@ -235,6 +241,8 @@ parse_args(int argc, char *argv)
			g_workload_selection = ACCEL_FILL;
		} else if (!strcmp(g_workload_type, "crc32c")) {
			g_workload_selection = ACCEL_CRC32C;
		} else if (!strcmp(g_workload_type, "copy_crc32c")) {
			g_workload_selection = ACCEL_COPY_CRC32C;
		} else if (!strcmp(g_workload_type, "compare")) {
			g_workload_selection = ACCEL_COMPARE;
		} else if (!strcmp(g_workload_type, "dualcast")) {
@@ -273,6 +281,7 @@ _get_task_data_bufs(struct ap_task *task)
{
	uint32_t align = 0;
	uint32_t i = 0;
	int dst_buff_len = g_xfer_size_bytes;

	/* For dualcast, the DSA HW requires 4K alignment on destination addresses but
	 * we do this for all engines to keep it simple.
@@ -281,7 +290,7 @@ _get_task_data_bufs(struct ap_task *task)
		align = ALIGN_4K;
	}

	if (g_workload_selection == ACCEL_CRC32C) {
	if (g_workload_selection == ACCEL_CRC32C || g_workload_selection == ACCEL_COPY_CRC32C) {
		assert(g_crc32c_chained_count > 0);
		task->iov_cnt = g_crc32c_chained_count;
		task->iovs = calloc(task->iov_cnt, sizeof(struct iovec));
@@ -290,6 +299,10 @@ _get_task_data_bufs(struct ap_task *task)
			return -ENOMEM;
		}

		if (g_workload_selection == ACCEL_COPY_CRC32C) {
			dst_buff_len = g_xfer_size_bytes * g_crc32c_chained_count;
		}

		for (i = 0; i < task->iov_cnt; i++) {
			task->iovs[i].iov_base = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL);
			if (task->iovs[i].iov_base == NULL) {
@@ -314,7 +327,8 @@ _get_task_data_bufs(struct ap_task *task)
		}
	}

	task->dst = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL);
	if (g_workload_selection != ACCEL_COPY_CRC32C) {
		task->dst = spdk_dma_zmalloc(dst_buff_len, align, NULL);
		if (task->dst == NULL) {
			fprintf(stderr, "Unable to alloc dst buffer\n");
			return -ENOMEM;
@@ -322,9 +336,10 @@ _get_task_data_bufs(struct ap_task *task)

		/* For compare we want the buffers to match, otherwise not. */
		if (g_workload_selection == ACCEL_COMPARE) {
		memset(task->dst, DATA_PATTERN, g_xfer_size_bytes);
			memset(task->dst, DATA_PATTERN, dst_buff_len);
		} else {
		memset(task->dst, ~DATA_PATTERN, g_xfer_size_bytes);
			memset(task->dst, ~DATA_PATTERN, dst_buff_len);
		}
	}

	if (g_workload_selection == ACCEL_DUALCAST) {
@@ -379,6 +394,10 @@ _submit_single(struct worker_thread *worker, struct ap_task *task)
					       task->iovs, task->iov_cnt, g_crc32c_seed,
					       accel_done, task);
		break;
	case ACCEL_COPY_CRC32C:
		rc = spdk_accel_submit_copy_crc32cv(worker->ch, task->dst, task->iovs, task->iov_cnt,
						    &task->crc_dst, g_crc32c_seed, accel_done, task);
		break;
	case ACCEL_COMPARE:
		random_num = rand() % 100;
		if (random_num < g_fail_percent_goal) {
@@ -434,6 +453,10 @@ _batch_prep_cmd(struct worker_thread *worker, struct ap_task *task,
						*(uint8_t *)task->src,
						g_xfer_size_bytes, accel_done, task);
		break;
	case ACCEL_COPY_CRC32C:
		rc = spdk_accel_batch_prep_copy_crc32c(worker->ch, batch, task->dst, task->src, &task->crc_dst,
						       g_crc32c_seed, g_xfer_size_bytes, accel_done, task);
		break;
	case ACCEL_CRC32C:
		rc = spdk_accel_batch_prep_crc32cv(worker->ch, batch, (uint32_t *)task->dst,
						   task->iovs, task->iov_cnt, g_crc32c_seed, accel_done, task);
@@ -591,6 +614,28 @@ batch_done(void *cb_arg, int status)
	spdk_thread_send_msg(worker_batch->worker->thread, _batch_done, worker_batch);
}

static int
_vector_memcmp(void *_dst, struct iovec *src_iovs, uint32_t iovcnt)
{
	uint32_t i;
	uint32_t ttl_len = 0;
	uint8_t *dst = (uint8_t *)_dst;

	for (i = 0; i < iovcnt; i++) {
		if (memcmp(dst, src_iovs[i].iov_base, src_iovs[i].iov_len)) {
			return -1;
		}
		dst += src_iovs[i].iov_len;
		ttl_len += src_iovs[i].iov_len;
	}

	if (ttl_len != iovcnt * g_xfer_size_bytes) {
		return -1;
	}

	return 0;
}

static void
_accel_done(void *arg1)
{
@@ -603,6 +648,17 @@ _accel_done(void *arg1)

	if (g_verify && task->status == 0) {
		switch (g_workload_selection) {
		case ACCEL_COPY_CRC32C:
			sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed);
			if (task->crc_dst != sw_crc32c) {
				SPDK_NOTICELOG("CRC-32C miscompare\n");
				worker->xfer_failed++;
			}
			if (_vector_memcmp(task->dst, task->iovs, task->iov_cnt)) {
				SPDK_NOTICELOG("Data miscompare\n");
				worker->xfer_failed++;
			}
			break;
		case ACCEL_CRC32C:
			sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed);
			if (*(uint32_t *)task->dst != sw_crc32c) {
@@ -1002,6 +1058,7 @@ main(int argc, char **argv)
	if ((g_workload_selection != ACCEL_COPY) &&
	    (g_workload_selection != ACCEL_FILL) &&
	    (g_workload_selection != ACCEL_CRC32C) &&
	    (g_workload_selection != ACCEL_COPY_CRC32C) &&
	    (g_workload_selection != ACCEL_COMPARE) &&
	    (g_workload_selection != ACCEL_DUALCAST)) {
		usage();
@@ -1027,7 +1084,7 @@ main(int argc, char **argv)
		g_allocate_depth = g_queue_depth;
	}

	if (g_workload_selection == ACCEL_CRC32C &&
	if ((g_workload_selection == ACCEL_CRC32C || g_workload_selection == ACCEL_COPY_CRC32C) &&
	    g_crc32c_chained_count == 0) {
		usage();
		g_rc = -1;