Commit deed7d2f authored by Artur Paszkiewicz's avatar Artur Paszkiewicz Committed by Tomasz Zawadzki
Browse files

module/raid: data offset and data size implementation



When raid bdev is created with superblock parameter then all data on
this bdev should be shifted by some offset. Such space at the beginning
of bdev will be used to store on-disk raid metadata.

Change-Id: I2545a2b00a651ef5332ca1757da0110a63914a43
Signed-off-by: default avatarKrzysztof Smolinski <krzysztof.smolinski@intel.com>
Signed-off-by: default avatarArtur Paszkiewicz <artur.paszkiewicz@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15246


Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent 614ca6d2
Loading
Loading
Loading
Loading
+32 −2
Original line number Diff line number Diff line
@@ -606,6 +606,7 @@ raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ct
	spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb);
	spdk_json_write_named_string(w, "state", raid_bdev_state_to_str(raid_bdev->state));
	spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
	spdk_json_write_named_bool(w, "superblock", raid_bdev->superblock_enabled);
	spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs);
	spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered);
	spdk_json_write_name(w, "base_bdevs_list");
@@ -663,6 +664,11 @@ raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *

	assert(spdk_get_thread() == spdk_thread_get_app_thread());

	if (raid_bdev->superblock_enabled) {
		/* raid bdev configuration is stored in the superblock */
		return;
	}

	spdk_json_write_object_begin(w);

	spdk_json_write_named_string(w, "method", "bdev_raid_create");
@@ -673,6 +679,7 @@ raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *
	spdk_json_write_named_string(w, "uuid", uuid_str);
	spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb);
	spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
	spdk_json_write_named_bool(w, "superblock", raid_bdev->superblock_enabled);

	spdk_json_write_named_array_begin(w, "base_bdevs");
	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
@@ -937,15 +944,17 @@ raid_bdev_init(void)
 * strip_size - strip size in KB
 * num_base_bdevs - number of base bdevs
 * level - raid level
 * raid_bdev_out - the created raid bdev
 * superblock_enabled - true if raid should have superblock
 * uuid - uuid to set for the bdev
 * raid_bdev_out - the created raid bdev
 * returns:
 * 0 - success
 * non zero - failure
 */
int
raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs,
		 enum raid_level level, struct raid_bdev **raid_bdev_out, const struct spdk_uuid *uuid)
		 enum raid_level level, bool superblock_enabled, const struct spdk_uuid *uuid,
		 struct raid_bdev **raid_bdev_out)
{
	struct raid_bdev *raid_bdev;
	struct spdk_bdev *raid_bdev_gen;
@@ -1039,6 +1048,7 @@ raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs,
	raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
	raid_bdev->level = level;
	raid_bdev->min_base_bdevs_operational = min_operational;
	raid_bdev->superblock_enabled = superblock_enabled;

	raid_bdev_gen = &raid_bdev->bdev;

@@ -1547,9 +1557,29 @@ raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info)

	base_info->desc = desc;
	base_info->blockcnt = bdev->blockcnt;
	base_info->data_offset = 0;
	base_info->data_size = bdev->blockcnt;
	raid_bdev->num_base_bdevs_discovered++;
	assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);

	if (raid_bdev->superblock_enabled) {
		assert((RAID_BDEV_MIN_DATA_OFFSET_SIZE % bdev->blocklen) == 0);
		base_info->data_offset = RAID_BDEV_MIN_DATA_OFFSET_SIZE / bdev->blocklen;

		if (bdev->optimal_io_boundary) {
			base_info->data_offset = spdk_divide_round_up(base_info->data_offset,
						 bdev->optimal_io_boundary) * bdev->optimal_io_boundary;
		}

		if (base_info->data_offset >= bdev->blockcnt) {
			SPDK_ERRLOG("Data offset %lu exceeds base bdev capacity %lu on bdev '%s'\n",
				    base_info->data_offset, bdev->blockcnt, base_info->name);
			return -EINVAL;
		}

		base_info->data_size = bdev->blockcnt - base_info->data_offset;
	}

	if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs) {
		rc = raid_bdev_configure(raid_bdev);
		if (rc != 0) {
+21 −7
Original line number Diff line number Diff line
@@ -9,6 +9,8 @@
#include "spdk/bdev_module.h"
#include "spdk/uuid.h"

#define RAID_BDEV_MIN_DATA_OFFSET_SIZE	(1024*1024) /* 1 MiB */

enum raid_level {
	INVALID_RAID_LEVEL	= -1,
	RAID0			= 0,
@@ -58,6 +60,12 @@ struct raid_base_bdev_info {
	/* pointer to base bdev descriptor opened by raid bdev */
	struct spdk_bdev_desc	*desc;

	/* offset in blocks from the start of the base bdev to the start of the data region */
	uint64_t		data_offset;

	/* size in blocks of the base bdev's data region */
	uint64_t		data_size;

	/*
	 * When underlying base device calls the hot plug function on drive removal,
	 * this flag will be set and later after doing some processing, base device
@@ -146,6 +154,9 @@ struct raid_bdev {
	/* Set to true if destroy of this raid bdev is started. */
	bool				destroy_started;

	/* Set to true if superblock metadata is enabled on this raid bdev */
	bool				superblock_enabled;

	/* Module for RAID-level specific operations */
	struct raid_bdev_module		*module;

@@ -179,7 +190,8 @@ extern struct raid_all_tailq g_raid_bdev_list;
typedef void (*raid_bdev_destruct_cb)(void *cb_ctx, int rc);

int raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs,
		     enum raid_level level, struct raid_bdev **raid_bdev_out, const struct spdk_uuid *uuid);
		     enum raid_level level, bool superblock, const struct spdk_uuid *uuid,
		     struct raid_bdev **raid_bdev_out);
void raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_ctx);
int raid_bdev_add_base_device(struct raid_bdev *raid_bdev, const char *name, uint8_t slot);
struct raid_bdev *raid_bdev_find_by_name(const char *name);
@@ -285,8 +297,8 @@ raid_bdev_readv_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_io
			   uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
			   struct spdk_bdev_ext_io_opts *opts)
{
	return spdk_bdev_readv_blocks_ext(base_info->desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb,
					  cb_arg, opts);
	return spdk_bdev_readv_blocks_ext(base_info->desc, ch, iov, iovcnt,
					  base_info->data_offset + offset_blocks, num_blocks, cb, cb_arg, opts);
}

/**
@@ -298,8 +310,8 @@ raid_bdev_writev_blocks_ext(struct raid_base_bdev_info *base_info, struct spdk_i
			    uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg,
			    struct spdk_bdev_ext_io_opts *opts)
{
	return spdk_bdev_writev_blocks_ext(base_info->desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb,
					   cb_arg, opts);
	return spdk_bdev_writev_blocks_ext(base_info->desc, ch, iov, iovcnt,
					   base_info->data_offset + offset_blocks, num_blocks, cb, cb_arg, opts);
}

/**
@@ -310,7 +322,8 @@ raid_bdev_unmap_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_cha
		       uint64_t offset_blocks, uint64_t num_blocks,
		       spdk_bdev_io_completion_cb cb, void *cb_arg)
{
	return spdk_bdev_unmap_blocks(base_info->desc, ch, offset_blocks, num_blocks, cb, cb_arg);
	return spdk_bdev_unmap_blocks(base_info->desc, ch, base_info->data_offset + offset_blocks,
				      num_blocks, cb, cb_arg);
}

/**
@@ -321,7 +334,8 @@ raid_bdev_flush_blocks(struct raid_base_bdev_info *base_info, struct spdk_io_cha
		       uint64_t offset_blocks, uint64_t num_blocks,
		       spdk_bdev_io_completion_cb cb, void *cb_arg)
{
	return spdk_bdev_flush_blocks(base_info->desc, ch, offset_blocks, num_blocks, cb, cb_arg);
	return spdk_bdev_flush_blocks(base_info->desc, ch, base_info->data_offset + offset_blocks,
				      num_blocks, cb, cb_arg);
}

#endif /* SPDK_BDEV_RAID_INTERNAL_H */
+1 −1
Original line number Diff line number Diff line
@@ -229,7 +229,7 @@ rpc_bdev_raid_create(struct spdk_jsonrpc_request *request,
	}

	rc = raid_bdev_create(req.name, req.strip_size_kb, req.base_bdevs.num_base_bdevs,
			      req.level, &raid_bdev, &req.uuid);
			      req.level, req.superblock_enabled, &req.uuid, &raid_bdev);
	if (rc != 0) {
		spdk_jsonrpc_send_error_response_fmt(request, rc,
						     "Failed to create RAID bdev %s: %s",
+3 −2
Original line number Diff line number Diff line
@@ -287,10 +287,11 @@ concat_start(struct raid_bdev *raid_bdev)

	int idx = 0;
	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
		uint64_t strip_cnt = spdk_bdev_desc_get_bdev(base_info->desc)->blockcnt >>
				     raid_bdev->strip_size_shift;
		uint64_t strip_cnt = base_info->data_size >> raid_bdev->strip_size_shift;
		uint64_t pd_block_cnt = strip_cnt << raid_bdev->strip_size_shift;

		base_info->data_size = pd_block_cnt;

		block_range[idx].start = total_blockcnt;
		block_range[idx].length = pd_block_cnt;
		total_blockcnt += pd_block_cnt;
+27 −12
Original line number Diff line number Diff line
@@ -335,15 +335,22 @@ raid0_submit_null_payload_request(struct raid_bdev_io *raid_io)
	}
}

static uint64_t
raid0_calculate_blockcnt(struct raid_bdev *raid_bdev)
static int
raid0_start(struct raid_bdev *raid_bdev)
{
	uint64_t min_blockcnt = UINT64_MAX;
	uint64_t base_bdev_data_size;
	struct raid_base_bdev_info *base_info;

	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
		/* Calculate minimum block count from all base bdevs */
		min_blockcnt = spdk_min(min_blockcnt, spdk_bdev_desc_get_bdev(base_info->desc)->blockcnt);
		min_blockcnt = spdk_min(min_blockcnt, base_info->data_size);
	}

	base_bdev_data_size = (min_blockcnt >> raid_bdev->strip_size_shift) << raid_bdev->strip_size_shift;

	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
		base_info->data_size = base_bdev_data_size;
	}

	/*
@@ -354,14 +361,7 @@ raid0_calculate_blockcnt(struct raid_bdev *raid_bdev)
	SPDK_DEBUGLOG(bdev_raid0, "min blockcount %" PRIu64 ",  numbasedev %u, strip size shift %u\n",
		      min_blockcnt, raid_bdev->num_base_bdevs, raid_bdev->strip_size_shift);

	return ((min_blockcnt >> raid_bdev->strip_size_shift) <<
		raid_bdev->strip_size_shift)  * raid_bdev->num_base_bdevs;
}

static int
raid0_start(struct raid_bdev *raid_bdev)
{
	raid_bdev->bdev.blockcnt = raid0_calculate_blockcnt(raid_bdev);
	raid_bdev->bdev.blockcnt = base_bdev_data_size * raid_bdev->num_base_bdevs;

	if (raid_bdev->num_base_bdevs > 1) {
		raid_bdev->bdev.optimal_io_boundary = raid_bdev->strip_size;
@@ -380,8 +380,18 @@ raid0_resize(struct raid_bdev *raid_bdev)
{
	uint64_t blockcnt;
	int rc;
	uint64_t min_blockcnt = UINT64_MAX;
	struct raid_base_bdev_info *base_info;
	uint64_t base_bdev_data_size;

	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
		struct spdk_bdev *base_bdev = spdk_bdev_desc_get_bdev(base_info->desc);

		min_blockcnt = spdk_min(min_blockcnt, base_bdev->blockcnt - base_info->data_offset);
	}

	blockcnt = raid0_calculate_blockcnt(raid_bdev);
	base_bdev_data_size = (min_blockcnt >> raid_bdev->strip_size_shift) << raid_bdev->strip_size_shift;
	blockcnt = base_bdev_data_size * raid_bdev->num_base_bdevs;

	if (blockcnt == raid_bdev->bdev.blockcnt) {
		return;
@@ -395,6 +405,11 @@ raid0_resize(struct raid_bdev *raid_bdev)
	rc = spdk_bdev_notify_blockcnt_change(&raid_bdev->bdev, blockcnt);
	if (rc != 0) {
		SPDK_ERRLOG("Failed to notify blockcount change\n");
		return;
	}

	RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
		base_info->data_size = base_bdev_data_size;
	}
}

Loading