Commit 94c43313 authored by Mike Gerdts's avatar Mike Gerdts Committed by Jim Harris
Browse files

blob: snapshots of esnap clones



An esnap clone needs special handling as snapshots are created and
removed. In particular: the following must exist on the blob that
directly references the external snapshot and must be removed from
others:

- Ensure SPDK_BLOB_EXTERNAL_SNAPSHOT invalid flag exists only on the
  esnap clone.
- Ensure BLOB_EXTERNAL_SNAPSHOT_ID internal xattr exists only on the
  esnap clone.
- Clean up any esnap IO channels on a blob that is no longer an esnap
  clone due to snapshot creation or removal.

See the diagrams and description in blob_esnap_clone_snapshot() in
blob_ut.c for details.

Signed-off-by: default avatarMike Gerdts <mgerdts@nvidia.com>
Change-Id: Ie4125d64d5bac9cfa7d6c7cc9a543d72a169f6ee
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11573


Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
parent b47cee6c
Loading
Loading
Loading
Loading
+125 −9
Original line number Diff line number Diff line
@@ -6187,6 +6187,30 @@ bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
	blob2->active.extent_pages = extent_page_temp;
}

/* Copies an internal xattr */
static int
bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
{
	const void	*val = NULL;
	size_t		len;
	int		bserrno;

	bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
	if (bserrno != 0) {
		SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s xattr"
			    BLOB_EXTERNAL_SNAPSHOT_ID " XATTR\n", fromblob->id, name);
		return bserrno;
	}

	bserrno = blob_set_xattr(toblob, name, val, len, true);
	if (bserrno != 0) {
		SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
			    name, toblob->id);
		return bserrno;
	}
	return 0;
}

static void
bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
{
@@ -6196,6 +6220,10 @@ bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)

	if (bserrno != 0) {
		bs_snapshot_swap_cluster_maps(newblob, origblob);
		if (blob_is_esnap_clone(newblob)) {
			bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
			origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
		}
		bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
		return;
	}
@@ -6259,6 +6287,25 @@ bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
		return;
	}

	/* Remove the xattr that references an external snapshot */
	if (blob_is_esnap_clone(origblob)) {
		origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
		bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
		if (bserrno != 0) {
			if (bserrno == -ENOENT) {
				SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
					    " xattr to remove\n", origblob->id);
				assert(false);
			} else {
				/* return cluster map back to original */
				bs_snapshot_swap_cluster_maps(newblob, origblob);
				blob_set_thin_provision(newblob);
				bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
				return;
			}
		}
	}

	bs_blob_list_remove(origblob);
	origblob->parent_id = newblob->id;
	/* set clone blob as thin provisioned */
@@ -6285,6 +6332,12 @@ bs_snapshot_freeze_cpl(void *cb_arg, int rc)

	ctx->frozen = true;

	if (blob_is_esnap_clone(origblob)) {
		/* Clean up any channels associated with the original blob id because future IO will
		 * perform IO using the snapshot blob_id.
		 */
		blob_esnap_destroy_bs_dev_channels(origblob, NULL, NULL);
	}
	if (newblob->back_bs_dev) {
		blob_back_bs_destroy(newblob);
	}
@@ -6295,7 +6348,17 @@ bs_snapshot_freeze_cpl(void *cb_arg, int rc)

	/* inherit parent from original blob if set */
	newblob->parent_id = origblob->parent_id;
	if (origblob->parent_id != SPDK_BLOBID_INVALID) {
	switch (origblob->parent_id) {
	case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
		bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
		if (bserrno != 0) {
			bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
			return;
		}
		break;
	case SPDK_BLOBID_INVALID:
		break;
	default:
		/* Set internal xattr for snapshot id */
		bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
					 &origblob->parent_id, sizeof(spdk_blob_id), true);
@@ -7016,7 +7079,13 @@ delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
	snapshot_entry->clone_count--;
	assert(TAILQ_EMPTY(&snapshot_entry->clones));

	if (ctx->snapshot->parent_id != SPDK_BLOBID_INVALID) {
	switch (ctx->snapshot->parent_id) {
	case SPDK_BLOBID_INVALID:
	case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
		/* No parent snapshot - just remove clone entry */
		free(clone_entry);
		break;
	default:
		/* This snapshot is at the same time a clone of another snapshot - we need to
		 * update parent snapshot (remove current clone, add new one inherited from
		 * the snapshot that is being removed) */
@@ -7030,9 +7099,6 @@ delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
		TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
		TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
		free(snapshot_clone_entry);
	} else {
		/* No parent snapshot - just remove clone entry */
		free(clone_entry);
	}

	/* Restore md_ro flags */
@@ -7091,11 +7157,34 @@ delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
static void
delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
{
	int bserrno;

	/* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
	blob_back_bs_destroy(ctx->clone);

	/* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
	if (ctx->parent_snapshot_entry != NULL) {
	if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
		bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
						 BLOB_EXTERNAL_SNAPSHOT_ID);
		if (bserrno != 0) {
			ctx->bserrno = bserrno;

			/* Restore snapshot to previous state */
			bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
			if (bserrno != 0) {
				delete_snapshot_cleanup_clone(ctx, bserrno);
				return;
			}

			spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
			return;
		}
		ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
		ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
		/* Do not delete the external snapshot along with this snapshot */
		ctx->snapshot->back_bs_dev = NULL;
		ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
	} else if (ctx->parent_snapshot_entry != NULL) {
		/* ...to parent snapshot */
		ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
		ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
@@ -7172,6 +7261,20 @@ delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
	delete_snapshot_update_extent_pages(ctx, 0);
}

static void
delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
{
	struct delete_snapshot_ctx *ctx = cb_arg;

	if (bserrno != 0) {
		SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
			    blob->id, bserrno);
		/* That error should not stop us from syncing metadata. */
	}

	spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
}

static void
delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
{
@@ -7196,6 +7299,13 @@ delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
		return;
	}

	if (blob_is_esnap_clone(ctx->snapshot)) {
		blob_esnap_destroy_bs_dev_channels(ctx->snapshot,
						   delete_snapshot_esnap_channels_destroyed_cb,
						   ctx);
		return;
	}

	spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
}

@@ -8713,7 +8823,9 @@ blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
	SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
		      blob->id);

	if (ctx->cb_fn != NULL) {
		ctx->cb_fn(ctx->cb_arg, blob, status);
	}
	free(ctx);
}

@@ -8754,13 +8866,17 @@ blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, spdk_blob_op_with_han
	struct blob_esnap_destroy_ctx	*ctx;

	if (!blob_is_esnap_clone(blob)) {
		if (cb_fn != NULL) {
			cb_fn(cb_arg, blob, 0);
		}
		return;
	}

	ctx = calloc(1, sizeof(*ctx));
	if (ctx == NULL) {
		if (cb_fn != NULL) {
			cb_fn(cb_arg, blob, -ENOMEM);
		}
		return;
	}
	ctx->cb_fn = cb_fn;
+195 −0
Original line number Diff line number Diff line
@@ -63,6 +63,42 @@ DEFINE_STUB(spdk_memory_domain_memzero, int, (struct spdk_memory_domain *src_dom
		void *src_domain_ctx, struct iovec *iov, uint32_t iovcnt, void (*cpl_cb)(void *, int),
		void *cpl_cb_arg), 0);

static bool
is_esnap_clone(struct spdk_blob *_blob, const void *id, size_t id_len)
{
	const void *val = NULL;
	size_t len = 0;
	bool c0, c1, c2, c3;

	CU_ASSERT(blob_get_xattr_value(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, &val, &len,
				       true) == 0);
	CU_ASSERT((c0 = (len == id_len)));
	CU_ASSERT((c1 = (val != NULL && memcmp(val, id, len) == 0)));
	CU_ASSERT((c2 = !!(_blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT)));
	CU_ASSERT((c3 = (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT)));

	return c0 && c1 && c2 && c3;
}

static bool
is_not_esnap_clone(struct spdk_blob *_blob)
{
	const void *val = NULL;
	size_t len = 0;
	bool c1, c2, c3, c4;

	CU_ASSERT((c1 = (blob_get_xattr_value(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, &val, &len,
					      true) == -ENOENT)));
	CU_ASSERT((c2 = (val == NULL)));
	CU_ASSERT((c3 = ((_blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT) == 0)));
	CU_ASSERT((c4 = (_blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT)));

	return c1 && c2 && c3 && c4;
}

#define UT_ASSERT_IS_ESNAP_CLONE(_blob, _id, _len) CU_ASSERT(is_esnap_clone(_blob, _id, _len))
#define UT_ASSERT_IS_NOT_ESNAP_CLONE(_blob) CU_ASSERT(is_not_esnap_clone(_blob))

static void
_get_xattr_value(void *arg, const char *name,
		 const void **value, size_t *value_len)
@@ -8014,6 +8050,164 @@ blob_ext_md_pages(void)
	g_bs = NULL;
}

static void
blob_esnap_clone_snapshot(void)
{
	/*
	 * When a snapshot is created, the blob that is being snapped becomes
	 * the leaf node (a clone of the snapshot) and the newly created
	 * snapshot sits between the snapped blob and the external snapshot.
	 *
	 * Before creating snap1
	 *
	 *   ,--------.     ,----------.
	 *   |  blob  |     |  vbdev   |
	 *   | blob1  |<----| nvme1n42 |
	 *   |  (rw)  |     |   (ro)   |
	 *   `--------'     `----------'
	 *       Figure 1
	 *
	 * After creating snap1
	 *
	 *   ,--------.     ,--------.     ,----------.
	 *   |  blob  |     |  blob  |     |  vbdev   |
	 *   | blob1  |<----| snap1  |<----| nvme1n42 |
	 *   |  (rw)  |     |  (ro)  |     |   (ro)   |
	 *   `--------'     `--------'     `----------'
	 *       Figure 2
	 *
	 * Starting from Figure 2, if snap1 is removed, the chain reverts to
	 * what it looks like in Figure 1.
	 *
	 * Starting from Figure 2, if blob1 is removed, the chain becomes:
	 *
	 *   ,--------.     ,----------.
	 *   |  blob  |     |  vbdev   |
	 *   | snap1  |<----| nvme1n42 |
	 *   |  (ro)  |     |   (ro)   |
	 *   `--------'     `----------'
	 *       Figure 3
	 *
	 * In each case, the blob pointed to by the nvme vbdev is considered
	 * the "esnap clone".  The esnap clone must have:
	 *
	 *   - XATTR_INTERNAL for BLOB_EXTERNAL_SNAPSHOT_ID (e.g. name or UUID)
	 *   - blob->invalid_flags must contain SPDK_BLOB_EXTERNAL_SNAPSHOT
	 *   - blob->parent_id must be SPDK_BLOBID_EXTERNAL_SNAPSHOT.
	 *
	 * No other blob that descends from the esnap clone may have any of
	 * those set.
	 */
	struct spdk_blob_store	*bs = g_bs;
	const uint32_t		blocklen = bs->io_unit_size;
	struct spdk_blob_opts	opts;
	struct ut_esnap_opts	esnap_opts;
	struct spdk_blob	*blob, *snap_blob;
	spdk_blob_id		blobid, snap_blobid;
	bool			destroyed = false;

	/* Create the esnap clone */
	ut_esnap_opts_init(blocklen, 2048, __func__, &destroyed, &esnap_opts);
	ut_spdk_blob_opts_init(&opts);
	opts.esnap_id = &esnap_opts;
	opts.esnap_id_len = sizeof(esnap_opts);
	opts.num_clusters = 10;
	spdk_bs_create_blob_ext(bs, &opts, blob_op_with_id_complete, NULL);
	poll_threads();
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
	blobid = g_blobid;

	/* Open the blob. */
	spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL);
	poll_threads();
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(g_blob != NULL);
	blob = g_blob;
	UT_ASSERT_IS_ESNAP_CLONE(blob, &esnap_opts, sizeof(esnap_opts));

	/*
	 * Create a snapshot of the blob. The snapshot becomes the esnap clone.
	 */
	spdk_bs_create_snapshot(bs, blobid, NULL, blob_op_with_id_complete, NULL);
	poll_threads();
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
	snap_blobid = g_blobid;

	spdk_bs_open_blob(bs, snap_blobid, blob_op_with_handle_complete, NULL);
	poll_threads();
	CU_ASSERT(g_bserrno == 0);
	SPDK_CU_ASSERT_FATAL(g_blob != NULL);
	snap_blob = g_blob;

	UT_ASSERT_IS_NOT_ESNAP_CLONE(blob);
	UT_ASSERT_IS_ESNAP_CLONE(snap_blob, &esnap_opts, sizeof(esnap_opts));

	/*
	 * Delete the snapshot.  The original blob becomes the esnap clone.
	 */
	ut_blob_close_and_delete(bs, snap_blob);
	snap_blob = NULL;
	snap_blobid = SPDK_BLOBID_INVALID;
	UT_ASSERT_IS_ESNAP_CLONE(blob, &esnap_opts, sizeof(esnap_opts));

	/*
	 * Create the snapshot again, then delete the original blob.  The
	 * snapshot should survive as the esnap clone.
	 */
	spdk_bs_create_snapshot(bs, blobid, NULL, blob_op_with_id_complete, NULL);
	poll_threads();
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
	snap_blobid = g_blobid;

	spdk_bs_open_blob(bs, snap_blobid, blob_op_with_handle_complete, NULL);
	poll_threads();
	CU_ASSERT(g_bserrno == 0);
	SPDK_CU_ASSERT_FATAL(g_blob != NULL);
	snap_blob = g_blob;

	UT_ASSERT_IS_NOT_ESNAP_CLONE(blob);
	UT_ASSERT_IS_ESNAP_CLONE(snap_blob, &esnap_opts, sizeof(esnap_opts));

	ut_blob_close_and_delete(bs, blob);
	blob = NULL;
	blobid = SPDK_BLOBID_INVALID;
	UT_ASSERT_IS_ESNAP_CLONE(snap_blob, &esnap_opts, sizeof(esnap_opts));

	/*
	 * Clone the snapshot.  The snapshot continues to be the esnap clone.
	 */
	spdk_bs_create_clone(bs, snap_blobid, NULL, blob_op_with_id_complete, NULL);
	poll_threads();
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
	blobid = g_blobid;

	spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL);
	poll_threads();
	CU_ASSERT(g_bserrno == 0);
	SPDK_CU_ASSERT_FATAL(g_blob != NULL);
	blob = g_blob;

	UT_ASSERT_IS_NOT_ESNAP_CLONE(blob);
	UT_ASSERT_IS_ESNAP_CLONE(snap_blob, &esnap_opts, sizeof(esnap_opts));

	/*
	 * Delete the snapshot. The clone becomes the esnap clone.
	 */
	ut_blob_close_and_delete(bs, snap_blob);
	snap_blob = NULL;
	snap_blobid = SPDK_BLOBID_INVALID;
	UT_ASSERT_IS_ESNAP_CLONE(blob, &esnap_opts, sizeof(esnap_opts));

	/*
	 * Clean up
	 */
	ut_blob_close_and_delete(bs, blob);
}

static void
suite_bs_setup(void)
{
@@ -8220,6 +8414,7 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, blob_esnap_io_4096_512);
	CU_ADD_TEST(suite, blob_esnap_io_512_4096);
	CU_ADD_TEST(suite_esnap_bs, blob_esnap_thread_add_remove);
	CU_ADD_TEST(suite_esnap_bs, blob_esnap_clone_snapshot);

	allocate_threads(2);
	set_thread(0);