Commit d453aaa3 authored by Mike Gerdts's avatar Mike Gerdts Committed by Jim Harris
Browse files

vbdev_lvol: degraded open of esnap clones



If an esnap clone is missing its snapshot the lvol should still open in
degraded mode. A degraded lvol will not have a bdev registered and as
such cannot perform any IO.

Change-Id: I736194650dfcf1eb78214c8896c31acc7a946b54
Signed-off-by: default avatarMike Gerdts <mgerdts@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16425


Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent a045d8d2
Loading
Loading
Loading
Loading
+123 −6
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@
#include "spdk/log.h"
#include "spdk/string.h"
#include "spdk/uuid.h"
#include "spdk/blob.h"

#include "vbdev_lvol.h"

@@ -439,6 +440,10 @@ _vbdev_lvs_remove(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void
		return;
	}
	TAILQ_FOREACH_SAFE(lvol, &lvs->lvols, link, tmp) {
		if (lvol->bdev == NULL) {
			spdk_lvol_close(lvol, _vbdev_lvs_remove_bdev_unregistered_cb, lvs_bdev);
			continue;
		}
		spdk_bdev_unregister(lvol->bdev, _vbdev_lvs_remove_bdev_unregistered_cb, lvs_bdev);
	}
}
@@ -1003,6 +1008,12 @@ _create_lvol_disk(struct spdk_lvol *lvol, bool destroy)
	unsigned char *alias;
	int rc;

	if (spdk_blob_is_degraded(lvol->blob)) {
		SPDK_NOTICELOG("lvol %s: blob is degraded: deferring bdev creation\n",
			       lvol->unique_id);
		return 0;
	}

	lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store);
	if (lvs_bdev == NULL) {
		SPDK_ERRLOG("No spdk lvs-bdev pair found for lvol %s\n", lvol->unique_id);
@@ -1686,6 +1697,90 @@ vbdev_lvs_grow(struct spdk_lvol_store *lvs,
	}
}

/* Begin degraded blobstore device */

/*
 * When an external snapshot is missing, an instance of bs_dev_degraded is used as the blob's
 * back_bs_dev. No bdev is registered, so there should be no IO nor requests for channels. The main
 * purposes of this device are to prevent blobstore from hitting fatal runtime errors and to
 * indicate that the blob is degraded via the is_degraded() callback.
 */

static void
bs_dev_degraded_read(struct spdk_bs_dev *dev, struct spdk_io_channel *channel, void *payload,
		     uint64_t lba, uint32_t lba_count, struct spdk_bs_dev_cb_args *cb_args)
{
	assert(false);
	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EIO);
}

static void
bs_dev_degraded_readv(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
		      struct iovec *iov, int iovcnt, uint64_t lba, uint32_t lba_count,
		      struct spdk_bs_dev_cb_args *cb_args)
{
	assert(false);
	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EIO);
}

static void
bs_dev_degraded_readv_ext(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
			  struct iovec *iov, int iovcnt, uint64_t lba, uint32_t lba_count,
			  struct spdk_bs_dev_cb_args *cb_args,
			  struct spdk_blob_ext_io_opts *io_opts)
{
	assert(false);
	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EIO);
}

static bool
bs_dev_degraded_is_zeroes(struct spdk_bs_dev *dev, uint64_t lba, uint64_t lba_count)
{
	assert(false);
	return false;
}

static struct spdk_io_channel *
bs_dev_degraded_create_channel(struct spdk_bs_dev *bs_dev)
{
	assert(false);
	return NULL;
}

static void
bs_dev_degraded_destroy_channel(struct spdk_bs_dev *bs_dev, struct spdk_io_channel *channel)
{
	assert(false);
}

static void
bs_dev_degraded_destroy(struct spdk_bs_dev *bs_dev)
{
}

static bool
bs_dev_degraded_is_degraded(struct spdk_bs_dev *bs_dev)
{
	return true;
}

static struct spdk_bs_dev bs_dev_degraded = {
	.create_channel = bs_dev_degraded_create_channel,
	.destroy_channel = bs_dev_degraded_destroy_channel,
	.destroy = bs_dev_degraded_destroy,
	.read = bs_dev_degraded_read,
	.readv = bs_dev_degraded_readv,
	.readv_ext = bs_dev_degraded_readv_ext,
	.is_zeroes = bs_dev_degraded_is_zeroes,
	.is_degraded = bs_dev_degraded_is_degraded,
	/* Make the device as large as possible without risk of uint64 overflow. */
	.blockcnt = UINT64_MAX / 512,
	/* Prevent divide by zero errors calculating LBAs that will never be read. */
	.blocklen = 512,
};

/* End degraded blobstore device */

/* Begin external snapshot support */

static void
@@ -1701,6 +1796,7 @@ vbdev_lvol_esnap_dev_create(void *bs_ctx, void *blob_ctx, struct spdk_blob *blob
			    const void *esnap_id, uint32_t id_len,
			    struct spdk_bs_dev **_bs_dev)
{
	struct spdk_lvol_store	*lvs = bs_ctx;
	struct spdk_lvol	*lvol = blob_ctx;
	struct spdk_bs_dev	*bs_dev = NULL;
	struct spdk_uuid	uuid;
@@ -1735,16 +1831,37 @@ vbdev_lvol_esnap_dev_create(void *bs_ctx, void *blob_ctx, struct spdk_blob *blob
	rc = spdk_bdev_create_bs_dev(uuid_str, false, NULL, 0,
				     vbdev_lvol_esnap_bdev_event_cb, NULL, &bs_dev);
	if (rc != 0) {
		SPDK_ERRLOG("lvol %s: failed to create bs_dev from bdev '%s': %d\n",
			    lvol->unique_id, uuid_str, rc);
		return rc;
		goto fail;
	}

	rc = spdk_bs_bdev_claim(bs_dev, &g_lvol_if);
	if (rc != 0) {
		SPDK_ERRLOG("lvol %s: unable to claim esnap bdev '%s': %d\n",
			    lvol->unique_id, uuid_str, rc);
		SPDK_ERRLOG("lvol %s: unable to claim esnap bdev '%s': %d\n", lvol->unique_id,
			    uuid_str, rc);
		bs_dev->destroy(bs_dev);
		return rc;
		goto fail;
	}

	*_bs_dev = bs_dev;
	return 0;

fail:
	/* Unable to open or claim the bdev. This lvol is degraded. */
	bs_dev = &bs_dev_degraded;
	SPDK_NOTICELOG("lvol %s: bdev %s not available: lvol is degraded\n", lvol->unique_id,
		       uuid_str);

	/*
	 * Be sure not to call spdk_lvs_missing_add() on an lvol that is already degraded. This can
	 * lead to a cycle in the degraded_lvols tailq.
	 */
	if (lvol->degraded_set == NULL) {
		rc = spdk_lvs_esnap_missing_add(lvs, lvol, uuid_str, sizeof(uuid_str));
		if (rc != 0) {
			SPDK_NOTICELOG("lvol %s: unable to register missing esnap device %s: "
				       "it will not be hotplugged if added later\n",
				       lvol->unique_id, uuid_str);
		}
	}

	*_bs_dev = bs_dev;
+125 −0
Original line number Diff line number Diff line
@@ -75,6 +75,130 @@ function test_esnap_reload() {
	rpc_cmd bdev_malloc_delete "$esnap_dev"
}

function test_esnap_reload_missing() {
	local bs_dev esnap_dev
	local block_size=512
	local esnap_size_mb=1
	local lvs_cluster_size=$((16 * 1024))
	local lvs_uuid esnap_uuid eclone_uuid snap_uuid clone_uuid uuid
	local aio_bdev=test_esnap_reload_aio0
	local lvols

	# Create the lvstore on an aio device. Can't use malloc because we need to remove
	# the device and re-add it to trigger an lvstore unload and then load.
	rm -f $testdir/aio_bdev_0
	truncate -s "${AIO_SIZE_MB}M" $testdir/aio_bdev_0
	bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size")
	lvs_uuid=$(rpc_cmd bdev_lvol_create_lvstore -c "$lvs_cluster_size" "$bs_dev" lvs_test)

	# Create a bdev that will be the external snapshot
	# State:
	#   esnap(present) <-- eclone(not degraded)
	esnap_uuid=e4b40d8b-f623-416d-8234-baf5a4c83cbd
	esnap_dev=$(rpc_cmd bdev_malloc_create -u "$esnap_uuid" "$esnap_size_mb" "$block_size")
	eclone_uuid=$(rpc_cmd bdev_lvol_clone_bdev "$esnap_uuid" lvs_test "eclone")
	lvols=$(rpc_cmd bdev_lvol_get_lvols)
	[[ "$(jq -r '. | length' <<< "$lvols")" == "1" ]]
	[[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "false" ]]

	# Unload the lvstore and delete the external snapshot
	rpc_cmd bdev_aio_delete "$aio_bdev"
	NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test
	rpc_cmd bdev_malloc_delete "$esnap_uuid"

	# Load the lvstore, eclone bdev should not exist but the lvol should exist.
	# State:
	#   esnap(missing) <-- eclone(degraded)
	bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size")
	NOT rpc_cmd bdev_get_bdevs -b lvs_test/eclone
	NOT rpc_cmd bdev_get_bdevs -b "$eclone_uuid"
	lvols=$(rpc_cmd bdev_lvol_get_lvols)
	[[ "$(jq -r '. | length' <<< "$lvols")" == "1" ]]
	[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "true" ]]

	# Reload the lvstore with esnap present during load. This should make the lvol not degraded.
	# State:
	#   esnap(present) <-- eclone(not degraded)
	rpc_cmd bdev_aio_delete "$aio_bdev"
	NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test
	esnap_dev=$(rpc_cmd bdev_malloc_create -u "$esnap_uuid" "$esnap_size_mb" "$block_size")
	bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size")
	rpc_cmd bdev_lvol_get_lvstores -l lvs_test
	rpc_cmd bdev_get_bdevs -b lvs_test/eclone
	rpc_cmd bdev_get_bdevs -b "$eclone_uuid"
	lvols=$(rpc_cmd bdev_lvol_get_lvols)
	[[ "$(jq -r '. | length' <<< "$lvols")" == "1" ]]
	[[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "false" ]]

	# Create a clone of eclone, then reload without the esnap present.
	# State:
	#   esnap(missing) <-- eclone(degraded) <-- clone(degraded)
	rpc_cmd bdev_lvol_set_read_only "$eclone_uuid"
	clone_uuid=$(rpc_cmd bdev_lvol_clone "$eclone_uuid" clone)
	rpc_cmd bdev_get_bdevs -b lvs_test/clone
	rpc_cmd bdev_get_bdevs -b "$clone_uuid"
	rpc_cmd bdev_aio_delete "$aio_bdev"
	NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test
	rpc_cmd bdev_malloc_delete "$esnap_uuid"
	bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size")
	lvols=$(rpc_cmd bdev_lvol_get_lvols)
	[[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "clone").is_clone' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "true" ]]
	NOT rpc_cmd bdev_get_bdevs -b lvs_test/eclone
	NOT rpc_cmd bdev_get_bdevs -b "$eclone_uuid"
	NOT rpc_cmd bdev_get_bdevs -b lvs_test/clone
	NOT rpc_cmd bdev_get_bdevs -b "$clone_uuid"

	# Reload the lvstore with esnap present during load. This should make the lvols not
	# degraded.
	# State:
	#   esnap(present) <-- eclone(not degraded) <-- clone(not degraded)
	rpc_cmd bdev_aio_delete "$aio_bdev"
	NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test
	esnap_dev=$(rpc_cmd bdev_malloc_create -u "$esnap_uuid" "$esnap_size_mb" "$block_size")
	bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size")
	lvols=$(rpc_cmd bdev_lvol_get_lvols)
	[[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "false" ]]
	[[ "$(jq -r '.[] | select(.name == "clone").is_clone' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "clone").is_degraded' <<< "$lvols")" == "false" ]]
	rpc_cmd bdev_get_bdevs -b lvs_test/eclone
	rpc_cmd bdev_get_bdevs -b "$eclone_uuid"
	rpc_cmd bdev_get_bdevs -b lvs_test/clone
	rpc_cmd bdev_get_bdevs -b "$clone_uuid"

	# Create a snapshot of clone, then reload without the esnap present.
	# State:
	#   esnap(missing) <-- eclone(degraded) <-- snap(degraded) <-- clone(degraded)
	snap_uuid=$(rpc_cmd bdev_lvol_snapshot "$clone_uuid" snap)
	rpc_cmd bdev_get_bdevs -b lvs_test/snap
	rpc_cmd bdev_get_bdevs -b "$snap_uuid"
	rpc_cmd bdev_aio_delete "$aio_bdev"
	NOT rpc_cmd bdev_lvol_get_lvstores -l lvs_test
	rpc_cmd bdev_malloc_delete "$esnap_uuid"
	bs_dev=$(rpc_cmd bdev_aio_create "$testdir/aio_bdev_0" "$aio_bdev" "$block_size")
	lvols=$(rpc_cmd bdev_lvol_get_lvols)
	[[ "$(jq -r '.[] | select(.name == "eclone").is_esnap_clone' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "eclone").is_degraded' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "clone").is_clone' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "clone").is_degraded' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "snap").is_clone' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "snap").is_snapshot' <<< "$lvols")" == "true" ]]
	[[ "$(jq -r '.[] | select(.name == "snap").is_degraded' <<< "$lvols")" == "true" ]]
	NOT rpc_cmd bdev_get_bdevs -b lvs_test/eclone
	NOT rpc_cmd bdev_get_bdevs -b "$eclone_uuid"
	NOT rpc_cmd bdev_get_bdevs -b lvs_test/clone
	NOT rpc_cmd bdev_get_bdevs -b "$clone_uuid"
	NOT rpc_cmd bdev_get_bdevs -b lvs_test/snap
	NOT rpc_cmd bdev_get_bdevs -b "$snap_uuid"

	rpc_cmd bdev_aio_delete "$aio_bdev"
}

function log_jq_out() {
	local key

@@ -225,6 +349,7 @@ waitforlisten $spdk_pid
modprobe nbd

run_test "test_esnap_reload" test_esnap_reload
run_test "test_esnap_reload" test_esnap_reload_missing
run_test "test_esnap_clones" test_esnap_clones

trap - SIGINT SIGTERM SIGPIPE EXIT
+25 −4
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
#include "spdk_cunit.h"
#include "spdk/string.h"

#include "common/lib/ut_multithread.c"
#include "bdev/lvol/vbdev_lvol.c"

#include "unit/lib/json_mock.c"
@@ -44,6 +45,10 @@ DEFINE_STUB(spdk_blob_get_esnap_id, int,
DEFINE_STUB(spdk_blob_is_esnap_clone, bool, (const struct spdk_blob *blob), false);
DEFINE_STUB(spdk_lvol_iter_immediate_clones, int,
	    (struct spdk_lvol *lvol, spdk_lvol_iter_cb cb_fn, void *cb_arg), -ENOTSUP);
DEFINE_STUB(spdk_lvs_esnap_missing_add, int,
	    (struct spdk_lvol_store *lvs, struct spdk_lvol *lvol, const void *esnap_id,
	     uint32_t id_len), -ENOTSUP);
DEFINE_STUB(spdk_blob_is_degraded, bool, (const struct spdk_blob *blob), false);

struct spdk_blob {
	uint64_t	id;
@@ -1797,27 +1802,37 @@ ut_esnap_dev_create(void)

	/* Bdev not found */
	g_base_bdev = NULL;
	MOCK_SET(spdk_blob_is_degraded, true);
	rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev);
	CU_ASSERT(rc == -ENODEV);
	CU_ASSERT(bs_dev == NULL);
	CU_ASSERT(rc == 0);
	SPDK_CU_ASSERT_FATAL(bs_dev != NULL);
	CU_ASSERT(bs_dev->destroy == bs_dev_degraded_destroy);
	bs_dev->destroy(bs_dev);

	/* Cannot get a claim */
	/* TODO: This suggests we need a way to wait for a claim to be available. */
	g_base_bdev = &bdev;
	lvol_already_opened = true;
	MOCK_SET(spdk_blob_is_degraded, true);
	rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev);
	CU_ASSERT(rc == -EPERM);
	CU_ASSERT(bs_dev == NULL);
	CU_ASSERT(rc == 0);
	SPDK_CU_ASSERT_FATAL(bs_dev != NULL);
	CU_ASSERT(bs_dev->destroy == bs_dev_degraded_destroy);
	bs_dev->destroy(bs_dev);

	/* Happy path */
	lvol_already_opened = false;
	MOCK_SET(spdk_blob_is_degraded, false);
	rc = vbdev_lvol_esnap_dev_create(&lvs, &lvol, &blob, uuid_str, sizeof(uuid_str), &bs_dev);
	CU_ASSERT(rc == 0);
	SPDK_CU_ASSERT_FATAL(bs_dev != NULL);
	CU_ASSERT(bs_dev->destroy == ut_bs_dev_destroy);
	bs_dev->destroy(bs_dev);

	g_base_bdev = NULL;
	lvol_already_opened = false;
	free(unterminated);
	MOCK_CLEAR(spdk_blob_is_degraded);
}

static void
@@ -1910,9 +1925,15 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, ut_esnap_dev_create);
	CU_ADD_TEST(suite, ut_lvol_esnap_clone_bad_args);

	allocate_threads(1);
	set_thread(0);

	CU_basic_set_mode(CU_BRM_VERBOSE);
	CU_basic_run_tests();
	num_failures = CU_get_number_of_failures();
	CU_cleanup_registry();

	free_threads();

	return num_failures;
}