Commit 5425ac4b authored by Artur Paszkiewicz's avatar Artur Paszkiewicz Committed by Tomasz Zawadzki
Browse files

raid: superblock interleaved md support



Correctly write and read superblock from bdevs with interleaved
metadata. For writes, use a second buffer and copy the sb contents there
before writing, leaving "gaps" for md. For reads, rewrite the buffer,
removing the md, before parsing the superblock.

Signed-off-by: default avatarArtur Paszkiewicz <artur.paszkiewicz@intel.com>
Change-Id: Id133bb34abecdc82d89604abfa3e3ba7b777961f
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/21680


Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
parent 0a5e4b49
Loading
Loading
Loading
Loading
+47 −7
Original line number Diff line number Diff line
@@ -36,6 +36,8 @@ raid_bdev_alloc_superblock(struct raid_bdev *raid_bdev, uint32_t block_size)
{
	struct raid_bdev_superblock *sb;

	assert(raid_bdev->sb == NULL);

	sb = spdk_dma_zmalloc(SPDK_ALIGN_CEIL(RAID_BDEV_SB_MAX_LENGTH, block_size), 0x1000, NULL);
	if (!sb) {
		SPDK_ERRLOG("Failed to allocate raid bdev sb buffer\n");
@@ -50,6 +52,11 @@ raid_bdev_alloc_superblock(struct raid_bdev *raid_bdev, uint32_t block_size)
void
raid_bdev_free_superblock(struct raid_bdev *raid_bdev)
{
	if (raid_bdev->sb_io_buf != NULL && raid_bdev->sb_io_buf != raid_bdev->sb) {
		assert(spdk_bdev_is_md_interleaved(&raid_bdev->bdev));
		spdk_dma_free(raid_bdev->sb_io_buf);
		raid_bdev->sb_io_buf = NULL;
	}
	spdk_dma_free(raid_bdev->sb);
	raid_bdev->sb = NULL;
}
@@ -67,7 +74,7 @@ raid_bdev_init_superblock(struct raid_bdev *raid_bdev)
	spdk_uuid_copy(&sb->uuid, &raid_bdev->bdev.uuid);
	snprintf(sb->name, RAID_BDEV_SB_NAME_SIZE, "%s", raid_bdev->bdev.name);
	sb->raid_size = raid_bdev->bdev.blockcnt;
	sb->block_size = raid_bdev->bdev.blocklen;
	sb->block_size = spdk_bdev_get_data_block_size(&raid_bdev->bdev);
	sb->level = raid_bdev->level;
	sb->strip_size = raid_bdev->strip_size;
	/* TODO: sb->state */
@@ -90,8 +97,18 @@ raid_bdev_alloc_sb_io_buf(struct raid_bdev *raid_bdev)
{
	struct raid_bdev_superblock *sb = raid_bdev->sb;

	if (spdk_bdev_is_md_interleaved(&raid_bdev->bdev)) {
		raid_bdev->sb_io_buf_size = spdk_divide_round_up(sb->length,
					    sb->block_size) * raid_bdev->bdev.blocklen;
		raid_bdev->sb_io_buf = spdk_dma_zmalloc(raid_bdev->sb_io_buf_size, 0x1000, NULL);
		if (!raid_bdev->sb_io_buf) {
			SPDK_ERRLOG("Failed to allocate raid bdev sb io buffer\n");
			return -ENOMEM;
		}
	} else {
		raid_bdev->sb_io_buf_size = SPDK_ALIGN_CEIL(sb->length, raid_bdev->bdev.blocklen);
		raid_bdev->sb_io_buf = raid_bdev->sb;
	}

	return 0;
}
@@ -126,7 +143,8 @@ raid_bdev_parse_superblock(struct raid_bdev_read_sb_ctx *ctx)
		return -EINVAL;
	}

	if (sb->length > ctx->buf_size) {
	if (spdk_divide_round_up(sb->length, spdk_bdev_get_data_block_size(bdev)) >
	    spdk_divide_round_up(ctx->buf_size, bdev->blocklen)) {
		if (sb->length > RAID_BDEV_SB_MAX_LENGTH) {
			SPDK_WARNLOG("Incorrect superblock length on bdev %s\n",
				     spdk_bdev_get_name(bdev));
@@ -175,8 +193,8 @@ raid_bdev_read_sb_remainder(struct raid_bdev_read_sb_ctx *ctx)
	int rc;

	buf_size_prev = ctx->buf_size;
	ctx->buf_size = spdk_divide_round_up(sb->length,
					     spdk_bdev_get_block_size(bdev)) * spdk_bdev_get_block_size(bdev);
	ctx->buf_size = spdk_divide_round_up(spdk_min(sb->length, RAID_BDEV_SB_MAX_LENGTH),
					     spdk_bdev_get_data_block_size(bdev)) * bdev->blocklen;
	buf = spdk_dma_realloc(ctx->buf, ctx->buf_size, spdk_bdev_get_buf_align(bdev), NULL);
	if (buf == NULL) {
		SPDK_ERRLOG("Failed to reallocate buffer\n");
@@ -198,10 +216,22 @@ raid_bdev_read_sb_remainder(struct raid_bdev_read_sb_ctx *ctx)
static void
raid_bdev_read_sb_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
	struct spdk_bdev *bdev = bdev_io->bdev;
	struct raid_bdev_read_sb_ctx *ctx = cb_arg;
	struct raid_bdev_superblock *sb = NULL;
	int status;

	if (spdk_bdev_is_md_interleaved(bdev_io->bdev) && ctx->buf_size > bdev->blocklen) {
		const uint32_t data_block_size = spdk_bdev_get_data_block_size(bdev);
		uint32_t i;

		for (i = 1; i < ctx->buf_size / bdev->blocklen; i++) {
			memmove(ctx->buf + (i * data_block_size),
				ctx->buf + (i * bdev->blocklen),
				data_block_size);
		}
	}

	spdk_bdev_free_io(bdev_io);

	if (!success) {
@@ -247,7 +277,7 @@ raid_bdev_load_base_bdev_superblock(struct spdk_bdev_desc *desc, struct spdk_io_
	ctx->cb = cb;
	ctx->cb_ctx = cb_ctx;
	ctx->buf_size = spdk_divide_round_up(sizeof(struct raid_bdev_superblock),
					     spdk_bdev_get_block_size(bdev)) * spdk_bdev_get_block_size(bdev);
					     spdk_bdev_get_data_block_size(bdev)) * bdev->blocklen;
	ctx->buf = spdk_dma_malloc(ctx->buf_size, spdk_bdev_get_buf_align(bdev), NULL);
	if (!ctx->buf) {
		rc = -ENOMEM;
@@ -370,6 +400,16 @@ raid_bdev_write_superblock(struct raid_bdev *raid_bdev, raid_bdev_write_sb_cb cb
	sb->seq_number++;
	raid_bdev_sb_update_crc(sb);

	if (spdk_bdev_is_md_interleaved(&raid_bdev->bdev)) {
		void *sb_buf = sb;
		uint32_t i;

		for (i = 0; i < raid_bdev->sb_io_buf_size / raid_bdev->bdev.blocklen; i++) {
			memcpy(raid_bdev->sb_io_buf + (i * raid_bdev->bdev.blocklen),
			       sb_buf + (i * sb->block_size), sb->block_size);
		}
	}

	_raid_bdev_write_superblock(ctx);
	return;
err:
+131 −27
Original line number Diff line number Diff line
@@ -12,10 +12,7 @@
#include "bdev/raid/bdev_raid_sb.c"

#define TEST_BUF_ALIGN	64
#define TEST_BLOCK_SIZE	512

DEFINE_STUB(spdk_bdev_desc_get_bdev, struct spdk_bdev *, (struct spdk_bdev_desc *desc), NULL);
DEFINE_STUB(spdk_bdev_get_block_size, uint32_t, (const struct spdk_bdev *bdev), TEST_BLOCK_SIZE);
DEFINE_STUB(spdk_bdev_queue_io_wait, int, (struct spdk_bdev *bdev, struct spdk_io_channel *ch,
		struct spdk_bdev_io_wait_entry *entry), 0);
DEFINE_STUB(spdk_bdev_get_name, const char *, (const struct spdk_bdev *bdev), "test_bdev");
@@ -24,14 +21,20 @@ DEFINE_STUB(spdk_bdev_get_buf_align, size_t, (const struct spdk_bdev *bdev), TES
void *g_buf;
TAILQ_HEAD(, spdk_bdev_io) g_bdev_io_queue = TAILQ_HEAD_INITIALIZER(g_bdev_io_queue);
int g_read_counter;
int g_write_counter;
struct spdk_bdev g_bdev;
struct spdk_bdev_io g_bdev_io = {
	.bdev = &g_bdev,
};

static int
test_setup(void)
_test_setup(uint32_t blocklen, uint32_t md_len)
{
	g_bdev.blocklen = TEST_BLOCK_SIZE;
	g_bdev.blocklen = blocklen;
	g_bdev.md_len = md_len;

	g_buf = spdk_dma_zmalloc(RAID_BDEV_SB_MAX_LENGTH, TEST_BUF_ALIGN, NULL);
	g_buf = spdk_dma_zmalloc(SPDK_ALIGN_CEIL(RAID_BDEV_SB_MAX_LENGTH,
				 spdk_bdev_get_data_block_size(&g_bdev)), TEST_BUF_ALIGN, NULL);
	if (!g_buf) {
		return -ENOMEM;
	}
@@ -39,6 +42,24 @@ test_setup(void)
	return 0;
}

static int
test_setup(void)
{
	return _test_setup(512, 0);
}

static int
test_setup_md(void)
{
	return _test_setup(512, 8);
}

static int
test_setup_md_interleaved(void)
{
	return _test_setup(512 + 8, 8);
}

static int
test_cleanup(void)
{
@@ -47,6 +68,24 @@ test_cleanup(void)
	return 0;
}

bool
spdk_bdev_is_md_interleaved(const struct spdk_bdev *bdev)
{
	return spdk_u32_is_pow2(bdev->blocklen) == false;
}

uint32_t
spdk_bdev_get_data_block_size(const struct spdk_bdev *bdev)
{
	return spdk_bdev_is_md_interleaved(bdev) ? bdev->blocklen - bdev->md_len : bdev->blocklen;
}

struct spdk_bdev *
spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc)
{
	return &g_bdev;
}

const struct spdk_uuid *
spdk_bdev_get_uuid(const struct spdk_bdev *bdev)
{
@@ -56,17 +95,34 @@ spdk_bdev_get_uuid(const struct spdk_bdev *bdev)
void
spdk_bdev_free_io(struct spdk_bdev_io *bdev_io)
{
	if (bdev_io != &g_bdev_io) {
		free(bdev_io);
	}
}

int
spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
	       void *buf, uint64_t offset, uint64_t nbytes,
	       spdk_bdev_io_completion_cb cb, void *cb_arg)
{
	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
	struct spdk_bdev_io *bdev_io = &g_bdev_io;
	uint64_t offset_blocks = offset / bdev->blocklen;
	uint32_t data_block_size = spdk_bdev_get_data_block_size(bdev);
	void *src = g_buf + offset_blocks * data_block_size;

	g_read_counter++;
	memcpy(buf, g_buf + offset, nbytes);
	cb(NULL, true, cb_arg);

	memset(buf, 0xab, nbytes);

	while (nbytes > 0) {
		memcpy(buf, src, data_block_size);
		src += data_block_size;
		buf += bdev->blocklen;
		nbytes -= bdev->blocklen;
	}

	cb(bdev_io, true, cb_arg);
	return 0;
}

@@ -75,16 +131,28 @@ spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
		void *buf, uint64_t offset, uint64_t nbytes,
		spdk_bdev_io_completion_cb cb, void *cb_arg)
{
	struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
	struct raid_bdev_superblock *sb = buf;
	struct spdk_bdev_io *bdev_io;
	void *dest = g_buf;
	uint32_t data_block_size = spdk_bdev_get_data_block_size(bdev);

	g_write_counter++;
	CU_ASSERT(offset == 0);
	CU_ASSERT(nbytes / TEST_BLOCK_SIZE == spdk_divide_round_up(sb->length, TEST_BLOCK_SIZE));
	CU_ASSERT(nbytes == spdk_divide_round_up(sb->length, data_block_size) * bdev->blocklen);

	while (nbytes > 0) {
		memcpy(dest, buf, data_block_size);
		dest += data_block_size;
		buf += bdev->blocklen;
		nbytes -= bdev->blocklen;
	}

	bdev_io = calloc(1, sizeof(*bdev_io));
	SPDK_CU_ASSERT_FATAL(bdev_io != NULL);
	bdev_io->internal.cb = cb;
	bdev_io->internal.caller_ctx = cb_arg;
	bdev_io->bdev = bdev;

	TAILQ_INSERT_TAIL(&g_bdev_io_queue, bdev_io, internal.link);

@@ -128,7 +196,6 @@ test_raid_bdev_write_superblock(void)
{
	struct raid_base_bdev_info base_info[3] = {{0}};
	struct raid_bdev raid_bdev = {
		.sb = g_buf,
		.num_base_bdevs = SPDK_COUNTOF(base_info),
		.base_bdev_info = base_info,
		.bdev = g_bdev,
@@ -136,17 +203,46 @@ test_raid_bdev_write_superblock(void)
	int status;
	uint8_t i;

	for (i = 1; i < SPDK_COUNTOF(base_info); i++) {
	for (i = 0; i < SPDK_COUNTOF(base_info); i++) {
		base_info[i].raid_bdev = &raid_bdev;
		if (i > 0) {
			base_info[i].desc = (void *)0x1;
		}
	}

	status = raid_bdev_alloc_superblock(&raid_bdev, spdk_bdev_get_data_block_size(&raid_bdev.bdev));
	CU_ASSERT(status == 0);

	/* test initial sb write */
	raid_bdev_init_superblock(&raid_bdev);

	prepare_sb(raid_bdev.sb);
	status = INT_MAX;
	g_write_counter = 0;
	raid_bdev_write_superblock(&raid_bdev, write_sb_cb, &status);
	CU_ASSERT(g_write_counter == raid_bdev.num_base_bdevs - 1);
	CU_ASSERT(TAILQ_EMPTY(&g_bdev_io_queue) == false);
	process_io_completions();
	CU_ASSERT(status == 0);
	CU_ASSERT(memcmp(raid_bdev.sb, g_buf, raid_bdev.sb->length) == 0);

	/* test max size sb write */
	raid_bdev.sb->length = RAID_BDEV_SB_MAX_LENGTH;
	if (spdk_bdev_is_md_interleaved(&raid_bdev.bdev)) {
		SPDK_CU_ASSERT_FATAL(raid_bdev.sb_io_buf != raid_bdev.sb);
		spdk_dma_free(raid_bdev.sb_io_buf);
	}
	raid_bdev.sb_io_buf = NULL;

	status = INT_MAX;
	g_write_counter = 0;
	raid_bdev_write_superblock(&raid_bdev, write_sb_cb, &status);
	CU_ASSERT(g_write_counter == raid_bdev.num_base_bdevs - 1);
	CU_ASSERT(TAILQ_EMPTY(&g_bdev_io_queue) == false);
	process_io_completions();
	CU_ASSERT(status == 0);
	CU_ASSERT(memcmp(raid_bdev.sb, g_buf, raid_bdev.sb->length) == 0);

	raid_bdev_free_superblock(&raid_bdev);
}

static void
@@ -164,6 +260,7 @@ load_sb_cb(const struct raid_bdev_superblock *sb, int status, void *ctx)
static void
test_raid_bdev_load_base_bdev_superblock(void)
{
	const uint32_t data_block_size = spdk_bdev_get_data_block_size(&g_bdev);
	struct raid_bdev_superblock *sb = g_buf;
	int rc;
	int status;
@@ -192,7 +289,8 @@ test_raid_bdev_load_base_bdev_superblock(void)

	/* make the sb longer than 1 bdev block - expect 2 reads */
	prepare_sb(sb);
	sb->length = TEST_BLOCK_SIZE * 3;
	sb->length = data_block_size * 3;
	memset(sb->base_bdevs, 0xef, sb->length - offsetof(struct raid_bdev_superblock, base_bdevs));
	raid_bdev_sb_update_crc(sb);

	g_read_counter = 0;
@@ -204,7 +302,7 @@ test_raid_bdev_load_base_bdev_superblock(void)

	/* corrupted sb contents, length > 1 bdev block - expect 2 reads */
	prepare_sb(sb);
	sb->length = TEST_BLOCK_SIZE * 3;
	sb->length = data_block_size * 3;
	raid_bdev_sb_update_crc(sb);
	sb->reserved[0] = 0xff;

@@ -218,7 +316,7 @@ test_raid_bdev_load_base_bdev_superblock(void)
	/* invalid signature, length > 1 bdev block - expect 1 read */
	prepare_sb(sb);
	sb->signature[3] = 'Z';
	sb->length = TEST_BLOCK_SIZE * 3;
	sb->length = data_block_size * 3;
	raid_bdev_sb_update_crc(sb);

	g_read_counter = 0;
@@ -235,7 +333,7 @@ test_raid_bdev_parse_superblock(void)
	struct raid_bdev_superblock *sb = g_buf;
	struct raid_bdev_read_sb_ctx ctx = {
		.buf = g_buf,
		.buf_size = TEST_BLOCK_SIZE,
		.buf_size = g_bdev.blocklen,
	};

	/* valid superblock */
@@ -266,26 +364,32 @@ test_raid_bdev_parse_superblock(void)

	/* sb longer than 1 bdev block */
	prepare_sb(sb);
	sb->length = TEST_BLOCK_SIZE * 3;
	sb->length = spdk_bdev_get_data_block_size(&g_bdev) * 3;
	raid_bdev_sb_update_crc(sb);
	CU_ASSERT(raid_bdev_parse_superblock(&ctx) == -EAGAIN);
	ctx.buf_size = sb->length;
	ctx.buf_size = g_bdev.blocklen * 3;
	CU_ASSERT(raid_bdev_parse_superblock(&ctx) == 0);
}

int
main(int argc, char **argv)
{
	CU_pSuite suite = NULL;
	unsigned int num_failures;
	CU_TestInfo tests[] = {
		{ "test_raid_bdev_write_superblock", test_raid_bdev_write_superblock },
		{ "test_raid_bdev_load_base_bdev_superblock", test_raid_bdev_load_base_bdev_superblock },
		{ "test_raid_bdev_parse_superblock", test_raid_bdev_parse_superblock },
		CU_TEST_INFO_NULL,
	};
	CU_SuiteInfo suites[] = {
		{ "raid_sb", test_setup, test_cleanup, NULL, NULL, tests },
		{ "raid_sb_md", test_setup_md, test_cleanup, NULL, NULL, tests },
		{ "raid_sb_md_interleaved", test_setup_md_interleaved, test_cleanup, NULL, NULL, tests },
		CU_SUITE_INFO_NULL,
	};

	CU_initialize_registry();

	suite = CU_add_suite("raid_sb", test_setup, test_cleanup);
	CU_ADD_TEST(suite, test_raid_bdev_write_superblock);
	CU_ADD_TEST(suite, test_raid_bdev_load_base_bdev_superblock);
	CU_ADD_TEST(suite, test_raid_bdev_parse_superblock);

	CU_register_suites(suites);
	num_failures = spdk_ut_run_tests(argc, argv, NULL);
	CU_cleanup_registry();
	return num_failures;