Commit 1666fcfc authored by Damiano Cipriani's avatar Damiano Cipriani Committed by Tomasz Zawadzki
Browse files

blob: add shallow copy over a given device



A shallow copy will copy over the destination device only the
cluster allocated to the blob discarding those belonging
to the blob's parent snapshot, if any.

Change-Id: I763ba9d952b74bce2d5827abe1fc3f41b8ebd209
Signed-off-by: default avatarDamiano Cipriani <damiano.cipriani@suse.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/19247


Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
parent ed25c7b2
Loading
Loading
Loading
Loading
+22 −0
Original line number Diff line number Diff line
@@ -777,6 +777,28 @@ void spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *ch
void spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
				  spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg);

/**
 * Perform a shallow copy of a blob to a blobstore device.
 *
 * This makes a shallow copy from a blob to a blobstore device.
 * Only clusters allocated to the blob will be written on the device.
 * Blob must be read only and blob size must be less or equal than device size.
 * Blobstore block size must be a multiple of device block size.

 * \param bs Blobstore
 * \param channel IO channel used to copy the blob.
 * \param blobid The id of the blob.
 * \param ext_dev The device to copy on
 * \param cb_fn Called when the operation is complete.
 * \param cb_arg Argument passed to function cb_fn.
 *
 * \return 0 if operation starts correctly, negative errno on failure.
 */
int spdk_bs_blob_shallow_copy(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
			      spdk_blob_id blobid, struct spdk_bs_dev *ext_dev,
			      spdk_blob_op_complete cb_fn, void *cb_arg);


struct spdk_blob_open_opts {
	enum blob_clear_method  clear_method;

+214 −0
Original line number Diff line number Diff line
@@ -42,6 +42,8 @@ static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool inte
static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
				   struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);

static void bs_shallow_copy_cluster_find_next(void *cb_arg);

/*
 * External snapshots require a channel per thread per esnap bdev.  The tree
 * is populated lazily as blob IOs are handled by the back_bs_dev. When this
@@ -7097,6 +7099,218 @@ spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel
}
/* END spdk_bs_inflate_blob */

/* START spdk_bs_blob_shallow_copy */

struct shallow_copy_ctx {
	struct spdk_bs_cpl cpl;
	int bserrno;

	/* Blob source for copy */
	struct spdk_blob_store *bs;
	spdk_blob_id blobid;
	struct spdk_blob *blob;
	struct spdk_io_channel *blob_channel;

	/* Destination device for copy */
	struct spdk_bs_dev *ext_dev;
	struct spdk_io_channel *ext_channel;

	/* Current cluster for copy operation */
	uint64_t cluster;

	/* Buffer for blob reading */
	uint8_t *read_buff;

	/* Struct for external device writing */
	struct spdk_bs_dev_cb_args ext_args;
};

static void
bs_shallow_copy_cleanup_finish(void *cb_arg, int bserrno)
{
	struct shallow_copy_ctx *ctx = cb_arg;
	struct spdk_bs_cpl *cpl = &ctx->cpl;

	if (bserrno != 0) {
		SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, cleanup error %d\n", ctx->blob->id, bserrno);
		ctx->bserrno = bserrno;
	}

	ctx->ext_dev->destroy_channel(ctx->ext_dev, ctx->ext_channel);
	spdk_free(ctx->read_buff);

	cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);

	free(ctx);
}

static void
bs_shallow_copy_bdev_write_cpl(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
{
	struct shallow_copy_ctx *ctx = cb_arg;
	struct spdk_blob *_blob = ctx->blob;

	if (bserrno != 0) {
		SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, ext dev write error %d\n", ctx->blob->id, bserrno);
		ctx->bserrno = bserrno;
		_blob->locked_operation_in_progress = false;
		spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
		return;
	}

	ctx->cluster++;

	bs_shallow_copy_cluster_find_next(ctx);
}

static void
bs_shallow_copy_blob_read_cpl(void *cb_arg, int bserrno)
{
	struct shallow_copy_ctx *ctx = cb_arg;
	struct spdk_bs_dev *ext_dev = ctx->ext_dev;
	struct spdk_blob *_blob = ctx->blob;

	if (bserrno != 0) {
		SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob read error %d\n", ctx->blob->id, bserrno);
		ctx->bserrno = bserrno;
		_blob->locked_operation_in_progress = false;
		spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
		return;
	}

	ctx->ext_args.channel = ctx->ext_channel;
	ctx->ext_args.cb_fn = bs_shallow_copy_bdev_write_cpl;
	ctx->ext_args.cb_arg = ctx;

	ext_dev->write(ext_dev, ctx->ext_channel, ctx->read_buff,
		       bs_cluster_to_lba(_blob->bs, ctx->cluster),
		       bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
		       &ctx->ext_args);
}

static void
bs_shallow_copy_cluster_find_next(void *cb_arg)
{
	struct shallow_copy_ctx *ctx = cb_arg;
	struct spdk_blob *_blob = ctx->blob;

	while (ctx->cluster < _blob->active.num_clusters) {
		if (_blob->active.clusters[ctx->cluster] != 0) {
			break;
		}

		ctx->cluster++;
	}

	if (ctx->cluster < _blob->active.num_clusters) {
		blob_request_submit_op_single(ctx->blob_channel, _blob, ctx->read_buff,
					      bs_cluster_to_lba(_blob->bs, ctx->cluster),
					      bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
					      bs_shallow_copy_blob_read_cpl, ctx, SPDK_BLOB_READ);
	} else {
		_blob->locked_operation_in_progress = false;
		spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
	}
}

static void
bs_shallow_copy_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
{
	struct shallow_copy_ctx *ctx = cb_arg;
	struct spdk_bs_dev *ext_dev = ctx->ext_dev;
	uint32_t blob_block_size;
	uint64_t blob_total_size;

	if (bserrno != 0) {
		SPDK_ERRLOG("Shallow copy blob open error %d\n", bserrno);
		ctx->bserrno = bserrno;
		bs_shallow_copy_cleanup_finish(ctx, 0);
		return;
	}

	if (!spdk_blob_is_read_only(_blob)) {
		SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob must be read only\n", _blob->id);
		ctx->bserrno = -EPERM;
		spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
		return;
	}

	blob_block_size = _blob->bs->dev->blocklen;
	blob_total_size = spdk_blob_get_num_clusters(_blob) * spdk_bs_get_cluster_size(_blob->bs);

	if (blob_total_size > ext_dev->blockcnt * ext_dev->blocklen) {
		SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device must have at least blob size\n",
			    _blob->id);
		ctx->bserrno = -EINVAL;
		spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
		return;
	}

	if (blob_block_size % ext_dev->blocklen != 0) {
		SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device block size is not compatible with \
blobstore block size\n", _blob->id);
		ctx->bserrno = -EINVAL;
		spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
		return;
	}

	ctx->blob = _blob;

	if (_blob->locked_operation_in_progress) {
		SPDK_DEBUGLOG(blob, "blob 0x%" PRIx64 " shallow copy - another operation in progress\n", _blob->id);
		ctx->bserrno = -EBUSY;
		spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
		return;
	}

	_blob->locked_operation_in_progress = true;

	ctx->cluster = 0;
	bs_shallow_copy_cluster_find_next(ctx);
}

int
spdk_bs_blob_shallow_copy(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
			  spdk_blob_id blobid, struct spdk_bs_dev *ext_dev,
			  spdk_blob_op_complete cb_fn, void *cb_arg)
{
	struct shallow_copy_ctx *ctx;
	struct spdk_io_channel *ext_channel;

	ctx = calloc(1, sizeof(*ctx));
	if (!ctx) {
		return -ENOMEM;
	}

	ctx->bs = bs;
	ctx->blobid = blobid;
	ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
	ctx->cpl.u.bs_basic.cb_fn = cb_fn;
	ctx->cpl.u.bs_basic.cb_arg = cb_arg;
	ctx->bserrno = 0;
	ctx->blob_channel = channel;
	ctx->read_buff = spdk_malloc(bs->cluster_sz, bs->dev->blocklen, NULL,
				     SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
	if (!ctx->read_buff) {
		free(ctx);
		return -ENOMEM;
	}

	ext_channel = ext_dev->create_channel(ext_dev);
	if (!ext_channel) {
		spdk_free(ctx->read_buff);
		free(ctx);
		return -ENOMEM;
	}
	ctx->ext_dev = ext_dev;
	ctx->ext_channel = ext_channel;

	spdk_bs_open_blob(ctx->bs, ctx->blobid, bs_shallow_copy_blob_open_cpl, ctx);

	return 0;
}
/* END spdk_bs_blob_shallow_copy */

/* START spdk_blob_resize */
struct spdk_bs_resize_ctx {
	spdk_blob_op_complete cb_fn;
+1 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@
	spdk_bs_delete_blob;
	spdk_bs_inflate_blob;
	spdk_bs_blob_decouple_parent;
	spdk_bs_blob_shallow_copy;
	spdk_blob_open_opts_init;
	spdk_bs_open_blob;
	spdk_bs_open_blob_ext;
+1 −0
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@ test/common/lib/ut_multithread
test/common/lib/test_rdma
test/unit/lib/bdev/raid/common
test/unit/lib/blob/blob.c/esnap_dev
test/unit/lib/blob/blob.c/ext_dev
test/unit/lib/blob/bs_dev_common
test/unit/lib/blob/bs_scheduler
test/unit/lib/ftl/common/utils
+164 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include "common/lib/ut_multithread.c"
#include "../bs_dev_common.c"
#include "thread/thread.c"
#include "ext_dev.c"
#include "blob/blobstore.c"
#include "blob/request.c"
#include "blob/zeroes.c"
@@ -9164,6 +9165,168 @@ blob_esnap_clone_resize(void)
	memset(g_dev_buffer, 0, DEV_BUFFER_SIZE);
}

static void
bs_dev_io_complete_cb(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
{
	g_bserrno = bserrno;
}

static void
blob_shallow_copy(void)
{
	struct spdk_blob_store *bs = g_bs;
	struct spdk_blob_opts blob_opts;
	struct spdk_blob *blob;
	spdk_blob_id blobid;
	uint64_t num_clusters = 4;
	struct spdk_bs_dev *ext_dev;
	struct spdk_bs_dev_cb_args ext_args;
	struct spdk_io_channel *bdev_ch, *blob_ch;
	uint8_t buf1[DEV_BUFFER_BLOCKLEN];
	uint8_t buf2[DEV_BUFFER_BLOCKLEN];
	uint64_t io_units_per_cluster;
	uint64_t offset;
	int rc;

	blob_ch = spdk_bs_alloc_io_channel(bs);
	SPDK_CU_ASSERT_FATAL(blob_ch != NULL);

	/* Set blob dimension and as thin provisioned */
	ut_spdk_blob_opts_init(&blob_opts);
	blob_opts.thin_provision = true;
	blob_opts.num_clusters = num_clusters;

	/* Create a blob */
	blob = ut_blob_create_and_open(bs, &blob_opts);
	SPDK_CU_ASSERT_FATAL(blob != NULL);
	blobid = spdk_blob_get_id(blob);
	io_units_per_cluster = bs_io_units_per_cluster(blob);

	/* Write on cluster 2 and 4 of blob */
	for (offset = io_units_per_cluster; offset < 2 * io_units_per_cluster; offset++) {
		memset(buf1, offset, DEV_BUFFER_BLOCKLEN);
		spdk_blob_io_write(blob, blob_ch, buf1, offset, 1, blob_op_complete, NULL);
		poll_threads();
		CU_ASSERT(g_bserrno == 0);
	}
	for (offset = 3 * io_units_per_cluster; offset < 4 * io_units_per_cluster; offset++) {
		memset(buf1, offset, DEV_BUFFER_BLOCKLEN);
		spdk_blob_io_write(blob, blob_ch, buf1, offset, 1, blob_op_complete, NULL);
		poll_threads();
		CU_ASSERT(g_bserrno == 0);
	}

	/* Make a snapshot over blob */
	spdk_bs_create_snapshot(bs, blobid, NULL, blob_op_with_id_complete, NULL);
	poll_threads();
	CU_ASSERT(g_bserrno == 0);

	/* Write on cluster 1 and 3 of blob */
	for (offset = 0; offset < io_units_per_cluster; offset++) {
		memset(buf1, offset, DEV_BUFFER_BLOCKLEN);
		spdk_blob_io_write(blob, blob_ch, buf1, offset, 1, blob_op_complete, NULL);
		poll_threads();
		CU_ASSERT(g_bserrno == 0);
	}
	for (offset = 2 * io_units_per_cluster; offset < 3 * io_units_per_cluster; offset++) {
		memset(buf1, offset, DEV_BUFFER_BLOCKLEN);
		spdk_blob_io_write(blob, blob_ch, buf1, offset, 1, blob_op_complete, NULL);
		poll_threads();
		CU_ASSERT(g_bserrno == 0);
	}

	/* Shallow copy with a not read only blob */
	ext_dev = init_ext_dev(num_clusters * 1024 * 1024, DEV_BUFFER_BLOCKLEN);
	rc = spdk_bs_blob_shallow_copy(bs, blob_ch, blobid, ext_dev,
				       blob_op_complete, NULL);
	CU_ASSERT(rc == 0);
	poll_threads();
	CU_ASSERT(g_bserrno == -EPERM);
	ext_dev->destroy(ext_dev);

	/* Set blob read only */
	spdk_blob_set_read_only(blob);
	spdk_blob_sync_md(blob, blob_op_complete, NULL);
	poll_threads();
	CU_ASSERT(g_bserrno == 0);

	/* Shallow copy over a spdk_bs_dev with incorrect size */
	ext_dev = init_ext_dev(1, DEV_BUFFER_BLOCKLEN);
	rc = spdk_bs_blob_shallow_copy(bs, blob_ch, blobid, ext_dev,
				       blob_op_complete, NULL);
	CU_ASSERT(rc == 0);
	poll_threads();
	CU_ASSERT(g_bserrno == -EINVAL);
	ext_dev->destroy(ext_dev);

	/* Shallow copy over a spdk_bs_dev with incorrect block len */
	ext_dev = init_ext_dev(num_clusters * 1024 * 1024, DEV_BUFFER_BLOCKLEN * 2);
	rc = spdk_bs_blob_shallow_copy(bs, blob_ch, blobid, ext_dev,
				       blob_op_complete, NULL);
	CU_ASSERT(rc == 0);
	poll_threads();
	CU_ASSERT(g_bserrno == -EINVAL);
	ext_dev->destroy(ext_dev);

	/* Initialize ext_dev for the successuful shallow copy */
	ext_dev = init_ext_dev(num_clusters * 1024 * 1024, DEV_BUFFER_BLOCKLEN);
	bdev_ch = ext_dev->create_channel(ext_dev);
	SPDK_CU_ASSERT_FATAL(bdev_ch != NULL);
	ext_args.cb_fn = bs_dev_io_complete_cb;
	for (offset = 0; offset < 4 * io_units_per_cluster; offset++) {
		memset(buf2, 0xff, DEV_BUFFER_BLOCKLEN);
		ext_dev->write(ext_dev, bdev_ch, buf2, offset, 1, &ext_args);
		poll_threads();
		CU_ASSERT(g_bserrno == 0);
	}

	/* Correct shallow copy of blob over bdev */
	rc = spdk_bs_blob_shallow_copy(bs, blob_ch, blobid, ext_dev,
				       blob_op_complete, NULL);
	CU_ASSERT(rc == 0);
	poll_threads();
	CU_ASSERT(g_bserrno == 0);

	/* Read from bdev */
	/* Only cluster 1 and 3 must be filled */
	/* Clusters 2 and 4 should not have been touched */
	for (offset = 0; offset < io_units_per_cluster; offset++) {
		memset(buf1, offset, DEV_BUFFER_BLOCKLEN);
		ext_dev->read(ext_dev, bdev_ch, buf2, offset, 1, &ext_args);
		poll_threads();
		CU_ASSERT(g_bserrno == 0);
		CU_ASSERT(memcmp(buf1, buf2, DEV_BUFFER_BLOCKLEN) == 0);
	}
	for (offset = io_units_per_cluster; offset < 2 * io_units_per_cluster; offset++) {
		memset(buf1, 0xff, DEV_BUFFER_BLOCKLEN);
		ext_dev->read(ext_dev, bdev_ch, buf2, offset, 1, &ext_args);
		poll_threads();
		CU_ASSERT(g_bserrno == 0);
		CU_ASSERT(memcmp(buf1, buf2, DEV_BUFFER_BLOCKLEN) == 0);
	}
	for (offset = 2 * io_units_per_cluster; offset < 3 * io_units_per_cluster; offset++) {
		memset(buf1, offset, DEV_BUFFER_BLOCKLEN);
		ext_dev->read(ext_dev, bdev_ch, buf2, offset, 1, &ext_args);
		poll_threads();
		CU_ASSERT(g_bserrno == 0);
		CU_ASSERT(memcmp(buf1, buf2, DEV_BUFFER_BLOCKLEN) == 0);
	}
	for (offset = 3 * io_units_per_cluster; offset < 4 * io_units_per_cluster; offset++) {
		memset(buf1, 0xff, DEV_BUFFER_BLOCKLEN);
		ext_dev->read(ext_dev, bdev_ch, buf2, offset, 1, &ext_args);
		poll_threads();
		CU_ASSERT(g_bserrno == 0);
		CU_ASSERT(memcmp(buf1, buf2, DEV_BUFFER_BLOCKLEN) == 0);
	}

	/* Clean up */
	ext_dev->destroy_channel(ext_dev, bdev_ch);
	ext_dev->destroy(ext_dev);
	spdk_bs_free_io_channel(blob_ch);
	ut_blob_close_and_delete(bs, blob);
	poll_threads();
}

static void
suite_bs_setup(void)
{
@@ -9439,6 +9602,7 @@ main(int argc, char **argv)
		CU_ADD_TEST(suite_blob, blob_is_degraded);
		CU_ADD_TEST(suite_bs, blob_clone_resize);
		CU_ADD_TEST(suite, blob_esnap_clone_resize);
		CU_ADD_TEST(suite_bs, blob_shallow_copy);
	}

	allocate_threads(2);
Loading