Commit c2eea87a authored by Artur Paszkiewicz's avatar Artur Paszkiewicz Committed by Tomasz Zawadzki
Browse files

raid5f: calculate and write parity



Signed-off-by: default avatarArtur Paszkiewicz <artur.paszkiewicz@intel.com>
Change-Id: Ia1b82d555c966b9b291eeb2426c42846b93e7fec
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/7703


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 69c448a3
Loading
Loading
Loading
Loading
+182 −17
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include "spdk/util.h"
#include "spdk/likely.h"
#include "spdk/log.h"
#include "spdk/xor.h"

/* Maximum concurrent full stripe writes per io channel */
#define RAID5F_MAX_STRIPES 32
@@ -41,6 +42,9 @@ struct stripe_request {
	/* The stripe's parity chunk */
	struct chunk *parity_chunk;

	/* Buffer for stripe parity */
	void *parity_buf;

	TAILQ_ENTRY(stripe_request) link;

	/* Array of chunks corresponding to base_bdevs */
@@ -56,11 +60,28 @@ struct raid5f_info {

	/* Number of stripes on this array */
	uint64_t total_stripes;

	/* Alignment for buffer allocation */
	size_t buf_alignment;
};

struct raid5f_io_channel {
	/* All available stripe requests on this channel */
	TAILQ_HEAD(, stripe_request) free_stripe_requests;

	/* Array of iovec iterators for each data chunk */
	struct iov_iter {
		struct iovec *iovs;
		int iovcnt;
		int index;
		size_t offset;
	} *chunk_iov_iters;

	/* Array of source buffer pointers for parity calculation */
	void **chunk_xor_buffers;

	/* Bounce buffers for parity calculation in case of unaligned source buffers */
	struct iovec *chunk_xor_bounce_buffers;
};

#define __CHUNK_IN_RANGE(req, c) \
@@ -109,6 +130,87 @@ raid5f_stripe_request_release(struct stripe_request *stripe_req)
	TAILQ_INSERT_HEAD(&stripe_req->r5ch->free_stripe_requests, stripe_req, link);
}

static int
raid5f_xor_stripe(struct stripe_request *stripe_req)
{
	struct raid_bdev_io *raid_io = stripe_req->raid_io;
	struct raid5f_io_channel *r5ch = stripe_req->r5ch;
	struct raid_bdev *raid_bdev = raid_io->raid_bdev;
	size_t remaining = raid_bdev->strip_size << raid_bdev->blocklen_shift;
	uint8_t n_src = raid5f_stripe_data_chunks_num(raid_bdev);
	void *dest = stripe_req->parity_buf;
	size_t alignment_mask = spdk_xor_get_optimal_alignment() - 1;
	struct chunk *chunk;
	int ret;
	uint8_t c;

	c = 0;
	FOR_EACH_DATA_CHUNK(stripe_req, chunk) {
		struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[c];
		bool aligned = true;
		int i;

		for (i = 0; i < chunk->iovcnt; i++) {
			if (((uintptr_t)chunk->iovs[i].iov_base & alignment_mask) ||
			    (chunk->iovs[i].iov_len & alignment_mask)) {
				aligned = false;
				break;
			}
		}

		if (aligned) {
			iov_iter->iovs = chunk->iovs;
			iov_iter->iovcnt = chunk->iovcnt;
		} else {
			iov_iter->iovs = &r5ch->chunk_xor_bounce_buffers[c];
			iov_iter->iovcnt = 1;
			spdk_iovcpy(chunk->iovs, chunk->iovcnt, iov_iter->iovs, iov_iter->iovcnt);
		}

		iov_iter->index = 0;
		iov_iter->offset = 0;

		c++;
	}

	while (remaining > 0) {
		size_t len = remaining;
		uint8_t i;

		for (i = 0; i < n_src; i++) {
			struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i];
			struct iovec *iov = &iov_iter->iovs[iov_iter->index];

			len = spdk_min(len, iov->iov_len - iov_iter->offset);
			r5ch->chunk_xor_buffers[i] = iov->iov_base + iov_iter->offset;
		}

		assert(len > 0);

		ret = spdk_xor_gen(dest, r5ch->chunk_xor_buffers, n_src, len);
		if (spdk_unlikely(ret)) {
			SPDK_ERRLOG("stripe xor failed\n");
			return ret;
		}

		for (i = 0; i < n_src; i++) {
			struct iov_iter *iov_iter = &r5ch->chunk_iov_iters[i];
			struct iovec *iov = &iov_iter->iovs[iov_iter->index];

			iov_iter->offset += len;
			if (iov_iter->offset == iov->iov_len) {
				iov_iter->offset = 0;
				iov_iter->index++;
			}
		}
		dest += len;

		remaining -= len;
	}

	return 0;
}

static void
raid5f_chunk_write_complete(struct chunk *chunk, enum spdk_bdev_io_status status)
{
@@ -165,7 +267,7 @@ raid5f_chunk_write(struct chunk *chunk)
			 * these means there are no more to complete for the stripe request, we can
			 * release the stripe request as well.
			 */
			uint64_t base_bdev_io_not_submitted = raid5f_stripe_data_chunks_num(raid_bdev) -
			uint64_t base_bdev_io_not_submitted = raid_bdev->num_base_bdevs -
							      raid_io->base_bdev_io_submitted;

			if (raid_bdev_io_complete_part(stripe_req->raid_io, base_bdev_io_not_submitted,
@@ -238,6 +340,11 @@ raid5f_stripe_request_map_iovecs(struct stripe_request *stripe_req,
		}
	}

	stripe_req->parity_chunk->iovs[0].iov_base = stripe_req->parity_buf;
	stripe_req->parity_chunk->iovs[0].iov_len = raid_bdev->strip_size <<
			raid_bdev->blocklen_shift;
	stripe_req->parity_chunk->iovcnt = 1;

	return 0;
}

@@ -248,15 +355,7 @@ raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
	struct chunk *start = &stripe_req->chunks[raid_io->base_bdev_io_submitted];
	struct chunk *chunk;

	if (start >= stripe_req->parity_chunk) {
		start++;
	}

	FOR_EACH_CHUNK_FROM(stripe_req, chunk, start) {
		if (chunk == stripe_req->parity_chunk) {
			continue;
		}

		if (spdk_unlikely(raid5f_chunk_write(chunk) != 0)) {
			break;
		}
@@ -267,7 +366,10 @@ raid5f_stripe_request_submit_chunks(struct stripe_request *stripe_req)
static void
raid5f_submit_stripe_request(struct stripe_request *stripe_req)
{
	/* TODO: parity */
	if (spdk_unlikely(raid5f_xor_stripe(stripe_req) != 0)) {
		raid_bdev_io_complete(stripe_req->raid_io, SPDK_BDEV_IO_STATUS_FAILED);
		return;
	}

	raid5f_stripe_request_submit_chunks(stripe_req);
}
@@ -300,7 +402,7 @@ raid5f_submit_write_request(struct raid_bdev_io *raid_io, uint64_t stripe_index)
	TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);

	raid_io->module_private = stripe_req;
	raid_io->base_bdev_io_remaining = raid5f_stripe_data_chunks_num(raid_bdev);
	raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;

	raid5f_submit_stripe_request(stripe_req);

@@ -397,6 +499,8 @@ raid5f_stripe_request_free(struct stripe_request *stripe_req)
		free(chunk->iovs);
	}

	spdk_dma_free(stripe_req->parity_buf);

	free(stripe_req);
}

@@ -421,24 +525,45 @@ raid5f_stripe_request_alloc(struct raid5f_io_channel *r5ch)
		chunk->iovcnt_max = 4;
		chunk->iovs = calloc(chunk->iovcnt_max, sizeof(chunk->iovs[0]));
		if (!chunk->iovs) {
			raid5f_stripe_request_free(stripe_req);
			return NULL;
			goto err;
		}
	}

	stripe_req->parity_buf = spdk_dma_malloc(raid_bdev->strip_size << raid_bdev->blocklen_shift,
				 r5f_info->buf_alignment, NULL);
	if (!stripe_req->parity_buf) {
		goto err;
	}

	return stripe_req;
err:
	raid5f_stripe_request_free(stripe_req);
	return NULL;
}

static void
raid5f_ioch_destroy(void *io_device, void *ctx_buf)
{
	struct raid5f_io_channel *r5ch = ctx_buf;
	struct raid5f_info *r5f_info = io_device;
	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
	struct stripe_request *stripe_req;
	int i;

	while ((stripe_req = TAILQ_FIRST(&r5ch->free_stripe_requests))) {
		TAILQ_REMOVE(&r5ch->free_stripe_requests, stripe_req, link);
		raid5f_stripe_request_free(stripe_req);
	}

	if (r5ch->chunk_xor_bounce_buffers) {
		for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
			free(r5ch->chunk_xor_bounce_buffers[i].iov_base);
		}
		free(r5ch->chunk_xor_bounce_buffers);
	}

	free(r5ch->chunk_xor_buffers);
	free(r5ch->chunk_iov_iters);
}

static int
@@ -446,6 +571,9 @@ raid5f_ioch_create(void *io_device, void *ctx_buf)
{
	struct raid5f_io_channel *r5ch = ctx_buf;
	struct raid5f_info *r5f_info = io_device;
	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
	size_t chunk_len = raid_bdev->strip_size << raid_bdev->blocklen_shift;
	int status = 0;
	int i;

	TAILQ_INIT(&r5ch->free_stripe_requests);
@@ -455,15 +583,48 @@ raid5f_ioch_create(void *io_device, void *ctx_buf)

		stripe_req = raid5f_stripe_request_alloc(r5ch);
		if (!stripe_req) {
			SPDK_ERRLOG("Failed to initialize io channel\n");
			raid5f_ioch_destroy(r5f_info, r5ch);
			return -ENOMEM;
			status = -ENOMEM;
			goto out;
		}

		TAILQ_INSERT_HEAD(&r5ch->free_stripe_requests, stripe_req, link);
	}

	return 0;
	r5ch->chunk_iov_iters = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
				       sizeof(r5ch->chunk_iov_iters[0]));
	if (!r5ch->chunk_iov_iters) {
		status = -ENOMEM;
		goto out;
	}

	r5ch->chunk_xor_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
					 sizeof(r5ch->chunk_xor_buffers[0]));
	if (!r5ch->chunk_xor_buffers) {
		status = -ENOMEM;
		goto out;
	}

	r5ch->chunk_xor_bounce_buffers = calloc(raid5f_stripe_data_chunks_num(raid_bdev),
						sizeof(r5ch->chunk_xor_bounce_buffers[0]));
	if (!r5ch->chunk_xor_bounce_buffers) {
		status = -ENOMEM;
		goto out;
	}

	for (i = 0; i < raid5f_stripe_data_chunks_num(raid_bdev); i++) {
		status = posix_memalign(&r5ch->chunk_xor_bounce_buffers[i].iov_base,
					spdk_xor_get_optimal_alignment(), chunk_len);
		if (status) {
			goto out;
		}
		r5ch->chunk_xor_bounce_buffers[i].iov_len = chunk_len;
	}
out:
	if (status) {
		SPDK_ERRLOG("Failed to initialize io channel\n");
		raid5f_ioch_destroy(r5f_info, r5ch);
	}
	return status;
}

static int
@@ -472,6 +633,7 @@ raid5f_start(struct raid_bdev *raid_bdev)
	uint64_t min_blockcnt = UINT64_MAX;
	struct raid_base_bdev_info *base_info;
	struct raid5f_info *r5f_info;
	size_t alignment;

	r5f_info = calloc(1, sizeof(*r5f_info));
	if (!r5f_info) {
@@ -480,12 +642,15 @@ raid5f_start(struct raid_bdev *raid_bdev)
	}
	r5f_info->raid_bdev = raid_bdev;

	alignment = spdk_xor_get_optimal_alignment();
	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
		min_blockcnt = spdk_min(min_blockcnt, base_info->bdev->blockcnt);
		alignment = spdk_max(alignment, spdk_bdev_get_buf_align(base_info->bdev));
	}

	r5f_info->total_stripes = min_blockcnt / raid_bdev->strip_size;
	r5f_info->stripe_blocks = raid_bdev->strip_size * raid5f_stripe_data_chunks_num(raid_bdev);
	r5f_info->buf_alignment = alignment;

	raid_bdev->bdev.blockcnt = r5f_info->stripe_blocks * r5f_info->total_stripes;
	raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
+63 −8
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
DEFINE_STUB_V(raid_bdev_module_list_add, (struct raid_bdev_module *raid_module));
DEFINE_STUB_V(raid_bdev_queue_io_wait, (struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
					struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn));
DEFINE_STUB(spdk_bdev_get_buf_align, size_t, (const struct spdk_bdev *bdev), 0);

void
raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
@@ -209,6 +210,9 @@ struct raid_io_info {
	void *src_buf;
	void *dest_buf;
	size_t buf_size;
	void *parity_buf;
	void *reference_parity;
	size_t parity_buf_size;
	enum spdk_bdev_io_status status;
	bool failed;
	int remaining;
@@ -341,21 +345,27 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
	stripe_req = raid5f_chunk_stripe_req(chunk);
	test_raid_bdev_io = (struct test_raid_bdev_io *)spdk_bdev_io_from_ctx(stripe_req->raid_io);
	io_info = test_raid_bdev_io->io_info;
	raid_bdev = io_info->r5f_info->raid_bdev;

	SPDK_CU_ASSERT_FATAL(chunk != stripe_req->parity_chunk);
	raid_bdev = io_info->r5f_info->raid_bdev;

	stripe_idx_off = offset_blocks / raid_bdev->strip_size -
			 io_info->offset_blocks / io_info->r5f_info->stripe_blocks;

	if (chunk == stripe_req->parity_chunk) {
		if (io_info->parity_buf == NULL) {
			goto submit;
		}
		dest_buf = io_info->parity_buf + stripe_idx_off * raid_bdev->strip_size_kb * 1024;
	} else {
		data_chunk_idx = chunk < stripe_req->parity_chunk ? chunk->index : chunk->index - 1;
		dest_buf = test_raid_bdev_io->buf +
			   (stripe_idx_off * io_info->r5f_info->stripe_blocks +
			    data_chunk_idx * raid_bdev->strip_size) *
			   raid_bdev->bdev.blocklen;
	}

	memcpy(dest_buf, iov->iov_base, iov->iov_len);

submit:
	submit_io(test_raid_bdev_io->io_info, desc, cb, cb_arg);

	return 0;
@@ -382,6 +392,14 @@ spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
	return 0;
}

static void
xor_block(uint8_t *a, uint8_t *b, size_t size)
{
	while (size-- > 0) {
		a[size] ^= b[size];
	}
}

static void
test_raid5f_write_request(struct raid_io_info *io_info)
{
@@ -394,6 +412,11 @@ test_raid5f_write_request(struct raid_io_info *io_info)
	raid5f_submit_rw_request(raid_io);

	process_io_completions(io_info);

	if (io_info->status == SPDK_BDEV_IO_STATUS_SUCCESS && io_info->parity_buf) {
		CU_ASSERT(memcmp(io_info->parity_buf, io_info->reference_parity,
				 io_info->parity_buf_size) == 0);
	}
}

static void
@@ -424,6 +447,8 @@ deinit_io_info(struct raid_io_info *io_info)
{
	free(io_info->src_buf);
	free(io_info->dest_buf);
	free(io_info->parity_buf);
	free(io_info->reference_parity);
}

static void
@@ -463,6 +488,35 @@ init_io_info(struct raid_io_info *io_info, struct raid5f_info *r5f_info,
	TAILQ_INIT(&io_info->bdev_io_queue);
}

static void
io_info_setup_parity(struct raid_io_info *io_info)
{
	struct raid5f_info *r5f_info = io_info->r5f_info;
	struct raid_bdev *raid_bdev = r5f_info->raid_bdev;
	uint32_t blocklen = raid_bdev->bdev.blocklen;
	uint64_t num_stripes = io_info->num_blocks / r5f_info->stripe_blocks;
	size_t strip_len = raid_bdev->strip_size * blocklen;
	void *src = io_info->src_buf;
	void *dest;
	unsigned i, j;

	io_info->parity_buf_size = num_stripes * strip_len;
	io_info->parity_buf = calloc(1, io_info->parity_buf_size);
	SPDK_CU_ASSERT_FATAL(io_info->parity_buf != NULL);

	io_info->reference_parity = calloc(1, io_info->parity_buf_size);
	SPDK_CU_ASSERT_FATAL(io_info->reference_parity != NULL);

	dest = io_info->reference_parity;
	for (i = 0; i < num_stripes; i++) {
		for (j = 0; j < raid5f_stripe_data_chunks_num(raid_bdev); j++) {
			xor_block(dest, src, strip_len);
			src += strip_len;
		}
		dest += strip_len;
	}
}

static void
test_raid5f_submit_rw_request(struct raid5f_info *r5f_info, struct raid_bdev_io_channel *raid_ch,
			      enum spdk_bdev_io_type io_type, uint64_t stripe_index, uint64_t stripe_offset_blocks,
@@ -478,6 +532,7 @@ test_raid5f_submit_rw_request(struct raid5f_info *r5f_info, struct raid_bdev_io_
		test_raid5f_read_request(&io_info);
		break;
	case SPDK_BDEV_IO_TYPE_WRITE:
		io_info_setup_parity(&io_info);
		test_raid5f_write_request(&io_info);
		break;
	default: