Commit 1219d86a authored by Artur Paszkiewicz's avatar Artur Paszkiewicz Committed by Konrad Sztyber
Browse files

raid5f: convert to use accel framework for xor



Change-Id: Id8fb521549342564bcf4288d74337fb4dd41fa03
Signed-off-by: default avatarArtur Paszkiewicz <artur.paszkiewicz@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16396


Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Community-CI: Mellanox Build Bot
parent b618f056
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -145,7 +145,7 @@ DEPDIRS-bdev_null := $(BDEV_DEPS_THREAD)
DEPDIRS-bdev_nvme = $(BDEV_DEPS_THREAD) accel nvme trace
DEPDIRS-bdev_ocf := $(BDEV_DEPS_THREAD)
DEPDIRS-bdev_passthru := $(BDEV_DEPS_THREAD)
DEPDIRS-bdev_raid := $(BDEV_DEPS_THREAD)
DEPDIRS-bdev_raid := $(BDEV_DEPS_THREAD) accel
DEPDIRS-bdev_rbd := $(BDEV_DEPS_THREAD)
DEPDIRS-bdev_uring := $(BDEV_DEPS_THREAD)
DEPDIRS-bdev_virtio := $(BDEV_DEPS_THREAD) virtio
+204 −143
Original line number Diff line number Diff line
@@ -11,7 +11,7 @@
#include "spdk/util.h"
#include "spdk/likely.h"
#include "spdk/log.h"
#include "spdk/xor.h"
#include "spdk/accel.h"

/* Maximum concurrent full stripe writes per io channel */
#define RAID5F_MAX_STRIPES 32
@@ -54,6 +54,28 @@ struct stripe_request {
	/* Buffer for stripe io metadata parity */
	void *parity_md_buf;

	/* Array of iovec iterators for each data chunk */
	struct iov_iter {
		struct iovec *iovs;
		int iovcnt;
		int index;
		size_t offset;
	} *chunk_iov_iters;

	/* Array of source buffer pointers for parity calculation */
	void **chunk_xor_buffers;

	/* Array of source buffer pointers for parity calculation of io metadata */
	void **chunk_xor_md_buffers;

	struct {
		void *dest;
		size_t len;
		size_t remaining;
		size_t remaining_md;
		int status;
	} xor;

	TAILQ_ENTRY(stripe_request) link;

	/* Array of chunks corresponding to base_bdevs */
@@ -78,22 +100,11 @@ struct raid5f_io_channel {
	/* All available stripe requests on this channel */
	TAILQ_HEAD(, stripe_request) free_stripe_requests;

	/* Array of iovec iterators for each data chunk */
	struct iov_iter {
		struct iovec *iovs;
		int iovcnt;
		int index;
		size_t offset;
	} *chunk_iov_iters;
	/* accel_fw channel */
	struct spdk_io_channel *accel_ch;

	/* Array of source buffer pointers for parity calculation */
	void **chunk_xor_buffers;

	/* Array of source buffer pointers for parity calculation of io metadata */
	void **chunk_xor_md_buffers;

	/* Bounce buffers for parity calculation in case of unaligned source buffers */
	struct iovec *chunk_xor_bounce_buffers;
	/* For retrying xor if accel_ch runs out of resources */
	TAILQ_HEAD(, stripe_request) xor_retry_queue;
};

#define __CHUNK_IN_RANGE(req, c) \
@@ -142,102 +153,183 @@ raid5f_stripe_request_release(struct stripe_request *stripe_req)
	TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests, stripe_req, link);
}

static int
raid5f_xor_stripe(struct stripe_request *stripe_req)
static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req);
static void raid5f_xor_stripe_retry(struct stripe_request *stripe_req);

static void
raid5f_xor_stripe_done(struct stripe_request *stripe_req)
{
	if (stripe_req->xor.status != 0) {
		SPDK_ERRLOG("stripe xor failed: %s\n", spdk_strerror(-stripe_req->xor.status));
		raid_bdev_io_complete(stripe_req->raid_io, SPDK_BDEV_IO_STATUS_FAILED);
	} else {
		raid5f_stripe_request_submit_chunks(stripe_req);
	}

	if (!TAILQ_EMPTY(&stripe_req->r5ch->xor_retry_queue)) {
		stripe_req = TAILQ_FIRST(&stripe_req->r5ch->xor_retry_queue);
		TAILQ_REMOVE(&stripe_req->r5ch->xor_retry_queue, stripe_req, link);
		raid5f_xor_stripe_retry(stripe_req);
	}
}

static void raid5f_xor_stripe_continue(struct stripe_request *stripe_req);

static void
_raid5f_xor_stripe_cb(struct stripe_request *stripe_req, int status)
{
	if (status != 0) {
		stripe_req->xor.status = status;
	}

	if (stripe_req->xor.remaining + stripe_req->xor.remaining_md == 0) {
		raid5f_xor_stripe_done(stripe_req);
	}
}

static void
raid5f_xor_stripe_cb(void *_stripe_req, int status)
{
	struct stripe_request *stripe_req = _stripe_req;
	size_t len = stripe_req->xor.len;

	stripe_req->xor.remaining -= len;

	if (stripe_req->xor.remaining > 0) {
		struct raid_bdev_io *raid_io = stripe_req->raid_io;
	struct raid5f_io_channel *r5ch = stripe_req->r5ch;
		struct raid_bdev *raid_bdev = raid_io->raid_bdev;
	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
	size_t remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift;
		uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
	void *dest = stripe_req->parity_buf;
	size_t alignment_mask = spdk_xor_get_optimal_alignment() - 1;
	void *raid_md = spdk_bdev_io_get_md_buf(bdev_io);
	uint32_t raid_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev);
	struct chunk *chunk;
	int ret;
	uint8_t c;
		uint8_t i;

	c = 0;
	FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
		struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[c];
		bool aligned = true;
		int i;
		for (i = 0; i < n_src; i++) {
			struct iov_iter *iov_iter = &stripe_req->chunk_iov_iters[i];
			struct iovec *iov = &iov_iter->iovs[iov_iter->index];

		for (i = 0; i < chunk->iovcnt; i++) {
			if (((uintptr_t)chunk->iovs[i].iov_base & alignment_mask) ||
			    (chunk->iovs[i].iov_len & alignment_mask)) {
				aligned = false;
				break;
			iov_iter->offset += len;
			if (iov_iter->offset == iov->iov_len) {
				iov_iter->offset = 0;
				iov_iter->index++;
			}
		}

		if (aligned) {
			iov_iter->iovs = chunk->iovs;
			iov_iter->iovcnt = chunk->iovcnt;
		} else {
			iov_iter->iovs = &r5ch->chunk_xor_bounce_buffers[c];
			iov_iter->iovcnt = 1;
			spdk_iovcpy(chunk->iovs, chunk->iovcnt, iov_iter->iovs, iov_iter->iovcnt);
		stripe_req->xor.dest += len;

		raid5f_xor_stripe_continue(stripe_req);
	}

		iov_iter->index = 0;
		iov_iter->offset = 0;
	_raid5f_xor_stripe_cb(stripe_req, status);
}

		c++;
static void
raid5f_xor_stripe_md_cb(void *_stripe_req, int status)
{
	struct stripe_request *stripe_req = _stripe_req;

	stripe_req->xor.remaining_md = 0;

	_raid5f_xor_stripe_cb(stripe_req, status);
}

	while (remaining > 0) {
		size_t len = remaining;
static void
raid5f_xor_stripe_continue(struct stripe_request *stripe_req)
{
	struct raid_bdev_io *raid_io = stripe_req->raid_io;
	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
	uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
	size_t len = stripe_req->xor.remaining;
	uint8_t i;
	int ret;

	assert(stripe_req->xor.remaining > 0);

	for (i = 0; i < n_src; i++) {
			struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i];
		struct iov_iter *iov_iter = &stripe_req->chunk_iov_iters[i];
		struct iovec *iov = &iov_iter->iovs[iov_iter->index];

		len = spdk_min(len, iov->iov_len - iov_iter->offset);
			r5ch->chunk_xor_buffers[i] = iov->iov_base + iov_iter->offset;
		stripe_req->chunk_xor_buffers[i] = iov->iov_base + iov_iter->offset;
	}

	assert(len > 0);
	stripe_req->xor.len = len;

		ret = spdk_xor_gen(dest, r5ch->chunk_xor_buffers, n_src, len);
	ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, stripe_req->xor.dest,
				    stripe_req->chunk_xor_buffers, n_src, len,
				    raid5f_xor_stripe_cb, stripe_req);
	if (spdk_unlikely(ret)) {
			SPDK_ERRLOG("stripe xor failed\n");
			return ret;
		if (ret == -ENOMEM) {
			TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link);
		} else {
			stripe_req->xor.status = ret;
			raid5f_xor_stripe_done(stripe_req);
		}
		return;
	}
}

		for (i = 0; i < n_src; i++) {
			struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i];
			struct iovec *iov = &iov_iter->iovs[iov_iter->index];
static void
raid5f_xor_stripe(struct stripe_request *stripe_req)
{
	struct raid_bdev_io *raid_io = stripe_req->raid_io;
	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
	struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
	void *raid_md = spdk_bdev_io_get_md_buf(bdev_io);
	uint32_t raid_md_size = spdk_bdev_get_md_size(&raid_bdev->bdev);
	struct chunk *chunk;
	uint8_t c;

			iov_iter->offset += len;
			if (iov_iter->offset == iov->iov_len) {
	c = 0;
	FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
		struct iov_iter *iov_iter = &stripe_req->chunk_iov_iters[c++];

		iov_iter->iovs = chunk->iovs;
		iov_iter->iovcnt = chunk->iovcnt;
		iov_iter->index = 0;
		iov_iter->offset = 0;
				iov_iter->index++;
	}
		}
		dest += len;

		remaining -= len;
	}
	stripe_req->xor.dest = stripe_req->parity_buf;
	stripe_req->xor.remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift;
	stripe_req->xor.status = 0;

	if (raid_md != NULL) {
		uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
		uint64_t len = raid_bdev->strip_size * raid_md_size;
		int ret;

		stripe_req->xor.remaining_md = len;

		c = 0;
		FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
			r5ch->chunk_xor_md_buffers[c] = chunk->md_buf;
			stripe_req->chunk_xor_md_buffers[c] = chunk->md_buf;
			c++;
		}
		ret = spdk_xor_gen(stripe_req->parity_md_buf, r5ch->chunk_xor_md_buffers, n_src, len);

		ret = spdk_accel_submit_xor(stripe_req->r5ch->accel_ch, stripe_req->parity_md_buf,
					    stripe_req->chunk_xor_md_buffers, n_src, len,
					    raid5f_xor_stripe_md_cb, stripe_req);
		if (spdk_unlikely(ret)) {
			SPDK_ERRLOG("stripe io metadata xor failed\n");
			return ret;
			if (ret == -ENOMEM) {
				TAILQ_INSERT_HEAD(&stripe_req->r5ch->xor_retry_queue, stripe_req, link);
			} else {
				stripe_req->xor.status = ret;
				raid5f_xor_stripe_done(stripe_req);
			}
			return;
		}
	}

	return 0;
	raid5f_xor_stripe_continue(stripe_req);
}

static void
raid5f_xor_stripe_retry(struct stripe_request *stripe_req)
{
	if (stripe_req->xor.remaining_md) {
		raid5f_xor_stripe(stripe_req);
	} else {
		raid5f_xor_stripe_continue(stripe_req);
	}
}

static void
@@ -261,8 +353,6 @@ raid5f_chunk_write_complete_bdev_io(struct spdk_bdev_io *bdev_io, bool success,
				    SPDK_BDEV_IO_STATUS_FAILED);
}

static void raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req);

static void
raid5f_chunk_write_retry(void *_raid_io)
{
@@ -416,17 +506,6 @@ raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
	}
}

static void
raid5f_submit_stripe_request(struct stripe_request *stripe_req)
{
	if (spdk_unlikely(raid5f_xor_stripe(stripe_req) != 0)) {
		raid_bdev_io_complete(stripe_req->raid_io, SPDK_BDEV_IO_STATUS_FAILED);
		return;
	}

	raid5f_stripe_request_submit_chunks(stripe_req);
}

static int
raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
{
@@ -455,7 +534,7 @@ raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
	raid_io->module_private = stripe_req;
	raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;

	raid5f_submit_stripe_request(stripe_req);
	raid5f_xor_stripe(stripe_req);

	return 0;
}
@@ -556,6 +635,10 @@ raid5f_stripe_request_free(struct stripe_request *stripe_req)
	spdk_dma_free(stripe_req->parity_buf);
	spdk_dma_free(stripe_req->parity_md_buf);

	free(stripe_req->chunk_xor_buffers);
	free(stripe_req->chunk_xor_md_buffers);
	free(stripe_req->chunk_iov_iters);

	free(stripe_req);
}

@@ -599,6 +682,24 @@ raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch)
		}
	}

	stripe_req->chunk_iov_iters = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
					     sizeof(stripe_req->chunk_iov_iters[0]));
	if (!stripe_req->chunk_iov_iters) {
		goto err;
	}

	stripe_req->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
					       sizeof(stripe_req->chunk_xor_buffers[0]));
	if (!stripe_req->chunk_xor_buffers) {
		goto err;
	}

	stripe_req->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
					   sizeof(stripe_req->chunk_xor_md_buffers[0]));
	if (!stripe_req->chunk_xor_md_buffers) {
		goto err;
	}

	return stripe_req;
err:
	raid5f_stripe_request_free(stripe_req);
@@ -609,26 +710,18 @@ static void
raid5f_ioch_destroy(void *io_device, void *ctx_buf)
{
	struct raid5f_io_channel *r5ch = ctx_buf;
	struct raid5f_info *r5f_info = io_device;
	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
	struct stripe_request *stripe_req;
	int i;

	assert(TAILQ_EMPTY(&r5ch->xor_retry_queue));

	while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests))) {
		TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);
		raid5f_stripe_request_free(stripe_req);
	}

	if (r5ch->chunk_xor_bounce_buffers) {
		for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
			free(r5ch->chunk_xor_bounce_buffers[i].iov_base);
		}
		free(r5ch->chunk_xor_bounce_buffers);
	if (r5ch->accel_ch) {
		spdk_put_io_channel(r5ch->accel_ch);
	}

	free(r5ch->chunk_xor_buffers);
	free(r5ch->chunk_xor_md_buffers);
	free(r5ch->chunk_iov_iters);
}

static int
@@ -636,8 +729,6 @@ raid5f_ioch_create(void *io_device, void *ctx_buf)
{
	struct raid5f_io_channel *r5ch = ctx_buf;
	struct raid5f_info *r5f_info = io_device;
	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
	size_t chunk_len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
	int status = 0;
	int i;

@@ -655,42 +746,13 @@ raid5f_ioch_create(void *io_device, void *ctx_buf)
		TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests, stripe_req, link);
	}

	r5ch->chunk_iov_iters = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
				       sizeof(r5ch->chunk_iov_iters[0]));
	if (!r5ch->chunk_iov_iters) {
		status = -ENOMEM;
		goto out;
	}

	r5ch->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
					 sizeof(r5ch->chunk_xor_buffers[0]));
	if (!r5ch->chunk_xor_buffers) {
		status = -ENOMEM;
		goto out;
	}

	r5ch->chunk_xor_md_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
					    sizeof(r5ch->chunk_xor_md_buffers[0]));
	if (!r5ch->chunk_xor_md_buffers) {
		status = -ENOMEM;
	r5ch->accel_ch = spdk_accel_get_io_channel();
	if (!r5ch->accel_ch) {
		SPDK_ERRLOG("Failed to get accel framework's IO channel\n");
		goto out;
	}

	r5ch->chunk_xor_bounce_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
						sizeof(r5ch->chunk_xor_bounce_buffers[0]));
	if (!r5ch->chunk_xor_bounce_buffers) {
		status = -ENOMEM;
		goto out;
	}

	for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
		status = posix_memalign(&r5ch->chunk_xor_bounce_buffers[i].iov_base,
					spdk_xor_get_optimal_alignment(), chunk_len);
		if (status) {
			goto out;
		}
		r5ch->chunk_xor_bounce_buffers[i].iov_len = chunk_len;
	}
	TAILQ_INIT(&r5ch->xor_retry_queue);
out:
	if (status) {
		SPDK_ERRLOG("Failed to initialize io channel\n");
@@ -705,7 +767,7 @@ raid5f_start(struct raid_bdev *raid_bdev)
	uint64_t min_blockcnt = UINT64_MAX;
	struct raid_base_bdev_info *base_info;
	struct raid5f_info *r5f_info;
	size_t alignment;
	size_t alignment = 0;

	r5f_info = calloc(1, sizeof(*r5f_info));
	if (!r5f_info) {
@@ -714,7 +776,6 @@ raid5f_start(struct raid_bdev *raid_bdev)
	}
	r5f_info->raid_bdev = raid_bdev;

	alignment = spdk_xor_get_optimal_alignment();
	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
		min_blockcnt = spdk_min(min_blockcnt, base_info->bdev->blockcnt);
		alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_info->bdev));
+61 −0
Original line number Diff line number Diff line
@@ -6,15 +6,26 @@
#include "spdk/stdinc.h"
#include "spdk_cunit.h"
#include "spdk/env.h"
#include "spdk/xor.h"

#include "common/lib/ut_multithread.c"

#include "bdev/raid/raid5f.c"
#include "../common.c"

static void *g_accel_p = (void *)0xdeadbeaf;

DEFINE_STUB_V(raid_bdev_module_list_add, (struct raid_bdev_module *raid_module));
DEFINE_STUB(spdk_bdev_get_buf_align, size_t, (const struct spdk_bdev *bdev), 0);
DEFINE_STUB_V(raid_bdev_module_stop_done, (struct raid_bdev *raid_bdev));
DEFINE_STUB(accel_channel_create, int, (void *io_device, void *ctx_buf), 0);
DEFINE_STUB_V(accel_channel_destroy, (void *io_device, void *ctx_buf));

struct spdk_io_channel *
spdk_accel_get_io_channel(void)
{
	return spdk_get_io_channel(g_accel_p);
}

void *
spdk_bdev_io_get_md_buf(struct spdk_bdev_io *bdev_io)
@@ -28,6 +39,38 @@ spdk_bdev_get_md_size(const struct spdk_bdev *bdev)
	return bdev->md_len;
}

struct xor_ctx {
	spdk_accel_completion_cb cb_fn;
	void *cb_arg;
};

static void
finish_xor(void *_ctx)
{
	struct xor_ctx *ctx = _ctx;

	ctx->cb_fn(ctx->cb_arg, 0);

	free(ctx);
}

int
spdk_accel_submit_xor(struct spdk_io_channel *ch, void *dst, void **sources, uint32_t nsrcs,
		      uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
{
	struct xor_ctx *ctx;

	ctx = malloc(sizeof(*ctx));
	SPDK_CU_ASSERT_FATAL(ctx != NULL);
	ctx->cb_fn = cb_fn;
	ctx->cb_arg = cb_arg;
	SPDK_CU_ASSERT_FATAL(spdk_xor_gen(dst, sources, nsrcs, nbytes) == 0);

	spdk_thread_send_msg(spdk_get_thread(), finish_xor, ctx);

	return 0;
}

void
raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
{
@@ -57,6 +100,19 @@ raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed,
	}
}

static void
init_accel(void)
{
	spdk_io_device_register(g_accel_p, accel_channel_create, accel_channel_destroy,
				sizeof(int), "accel_p");
}

static void
fini_accel(void)
{
	spdk_io_device_unregister(g_accel_p, NULL);
}

static int
test_setup(void)
{
@@ -105,12 +161,15 @@ test_setup(void)
		}
	}

	init_accel();

	return 0;
}

static int
test_cleanup(void)
{
	fini_accel();
	raid_test_params_free();
	return 0;
}
@@ -516,6 +575,8 @@ test_raid5f_write_request(struct raid_io_info *io_info)

	raid5f_submit_rw_request(raid_io);

	poll_threads();

	process_io_completions(io_info);

	if (io_info->status == SPDK_BDEV_IO_STATUS_SUCCESS) {