Commit 421fb110 authored by Mike Gerdts's avatar Mike Gerdts Committed by Jim Harris
Browse files

blob_bdev: defer free until all channels destroyed



To avoid races that lead to use-after-free errors during esnap device
hot add/remove, we need a way to ensure that the destroy callback does
not free a bs_dev until all consumers are done.

This adds reference counting to the create_channel() and
destroy_channel() callbacks. The reference couunt is initialized to 1
and is decremented by destroy(). The destroy() and destroy_channel()
callbacks are updated to free the bs_dev only when the reference count
drops to 0.

Signed-off-by: default avatarMike Gerdts <mgerdts@nvidia.com>
Change-Id: Ie0b873717e431b33ce6548f878643dbc66d4f956
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/16422


Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent ba91ffba
Loading
Loading
Loading
Loading
+74 −3
Original line number Diff line number Diff line
@@ -19,6 +19,8 @@ struct blob_bdev {
	struct spdk_bdev	*bdev;
	struct spdk_bdev_desc	*desc;
	bool			write;
	int32_t			refs;
	struct spdk_spinlock	lock;
};

struct blob_resubmit {
@@ -346,23 +348,88 @@ static struct spdk_io_channel *
bdev_blob_create_channel(struct spdk_bs_dev *dev)
{
	struct blob_bdev *blob_bdev = (struct blob_bdev *)dev;
	struct spdk_io_channel *ch;

	return spdk_bdev_get_io_channel(blob_bdev->desc);
	ch = spdk_bdev_get_io_channel(blob_bdev->desc);
	if (ch != NULL) {
		spdk_spin_lock(&blob_bdev->lock);
		blob_bdev->refs++;
		spdk_spin_unlock(&blob_bdev->lock);
	}

	return ch;
}

static void
bdev_blob_free(struct blob_bdev *blob_bdev)
{
	assert(blob_bdev->refs == 0);

	spdk_spin_destroy(&blob_bdev->lock);
	free(blob_bdev);
}

static void
bdev_blob_destroy_channel(struct spdk_bs_dev *dev, struct spdk_io_channel *channel)
{
	struct blob_bdev *blob_bdev = (struct blob_bdev *)dev;
	int32_t refs;

	spdk_spin_lock(&blob_bdev->lock);

	assert(blob_bdev->refs > 0);
	blob_bdev->refs--;
	refs = blob_bdev->refs;

	spdk_spin_unlock(&blob_bdev->lock);

	spdk_put_io_channel(channel);

	/*
	 * If the value of blob_bdev->refs taken while holding blob_bdev->refs is zero, the blob and
	 * this channel have been destroyed. This means that dev->destroy() has been called and it
	 * would be an error (akin to use after free) if dev is dereferenced after destroying it.
	 * Thus, there should be no race with bdev_blob_create_channel().
	 *
	 * Because the value of blob_bdev->refs was taken while holding the lock here and the same
	 * is done in bdev_blob_destroy(), there is no race with bdev_blob_destroy().
	 */
	if (refs == 0) {
		bdev_blob_free(blob_bdev);
	}
}

static void
bdev_blob_destroy(struct spdk_bs_dev *bs_dev)
{
	struct spdk_bdev_desc *desc = __get_desc(bs_dev);
	struct blob_bdev *blob_bdev = (struct blob_bdev *)bs_dev;
	struct spdk_bdev_desc *desc;
	int32_t refs;

	spdk_spin_lock(&blob_bdev->lock);

	desc = blob_bdev->desc;
	blob_bdev->desc = NULL;
	blob_bdev->refs--;
	refs = blob_bdev->refs;

	spdk_spin_unlock(&blob_bdev->lock);

	spdk_bdev_close(desc);
	free(bs_dev);

	/*
	 * If the value of blob_bdev->refs taken while holding blob_bdev->refs is zero,
	 * bs_dev->destroy() has been called and all the channels have been destroyed. It would be
	 * an error (akin to use after free) if bs_dev is dereferenced after destroying it. Thus,
	 * there should be no race with bdev_blob_create_channel().
	 *
	 * Because the value of blob_bdev->refs was taken while holding the lock here and the same
	 * is done in bdev_blob_destroy_channel(), there is no race with
	 * bdev_blob_destroy_channel().
	 */
	if (refs == 0) {
		bdev_blob_free(blob_bdev);
	}
}

static struct spdk_bdev *
@@ -425,6 +492,8 @@ spdk_bdev_create_bs_dev(const char *bdev_name, bool write,
	struct spdk_bdev_desc *desc;
	int rc;

	assert(spdk_get_thread() != NULL);

	if (opts != NULL && opts_size != sizeof(*opts)) {
		SPDK_ERRLOG("bdev name '%s': unsupported options\n", bdev_name);
		return -EINVAL;
@@ -447,6 +516,8 @@ spdk_bdev_create_bs_dev(const char *bdev_name, bool write,

	*bs_dev = &b->bs_dev;
	b->write = write;
	b->refs = 1;
	spdk_spin_init(&b->lock);

	return 0;
}
+184 −3
Original line number Diff line number Diff line
@@ -5,7 +5,11 @@
#include "spdk/stdinc.h"

#include "spdk_cunit.h"
#include "common/lib/test_env.c"
#include "common/lib/ut_multithread.c"

static void ut_put_io_channel(struct spdk_io_channel *ch);

#define spdk_put_io_channel(ch) ut_put_io_channel(ch);
#include "blob/bdev/blob_bdev.c"

DEFINE_STUB(spdk_bdev_io_type_supported, bool, (struct spdk_bdev *bdev,
@@ -48,8 +52,6 @@ DEFINE_STUB(spdk_bdev_copy_blocks, int,
	    (struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, uint64_t dst_offset_blocks,
	     uint64_t src_offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb,
	     void *cb_arg), 0);
DEFINE_STUB(spdk_bdev_get_io_channel, struct spdk_io_channel *,
	    (struct spdk_bdev_desc *desc), NULL);

struct spdk_bdev {
	char name[16];
@@ -65,6 +67,7 @@ struct spdk_bdev_desc {
	struct spdk_bdev *bdev;
	bool write;
	enum spdk_bdev_claim_type claim_type;
	struct spdk_thread *thread;
};

struct spdk_bdev *g_bdev;
@@ -73,6 +76,20 @@ static struct spdk_bdev_module g_bdev_mod = {
	.name = "blob_bdev_ut"
};

struct spdk_io_channel *
spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc)
{
	if (desc != NULL) {
		return (struct spdk_io_channel *)0x1;
	}
	return NULL;
}

static void
ut_put_io_channel(struct spdk_io_channel *ch)
{
}

static struct spdk_bdev *
get_bdev(const char *bdev_name)
{
@@ -105,6 +122,7 @@ spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event
	desc = calloc(1, sizeof(*desc));
	desc->bdev = g_bdev;
	desc->write = write;
	desc->thread = spdk_get_thread();
	*_desc = desc;
	bdev->open_cnt++;

@@ -116,6 +134,8 @@ spdk_bdev_close(struct spdk_bdev_desc *desc)
{
	struct spdk_bdev *bdev = desc->bdev;

	CU_ASSERT(desc->thread == spdk_get_thread());

	bdev->open_cnt--;
	if (bdev->claim_desc == desc) {
		bdev->claim_desc = NULL;
@@ -362,6 +382,159 @@ claim_bs_dev_ro(void)
	g_bdev = NULL;
}

/*
 * Verify that create_channel() and destroy_channel() increment and decrement the blob_bdev->refs.
 */
static void
deferred_destroy_refs(void)
{
	struct spdk_bdev bdev;
	struct spdk_io_channel *ch1, *ch2;
	struct spdk_bs_dev *bs_dev = NULL;
	struct blob_bdev *blob_bdev;
	int rc;

	set_thread(0);
	init_bdev(&bdev, "bdev0", 16);
	g_bdev = &bdev;

	/* Open a blob_bdev, verify reference count is 1. */
	rc = spdk_bdev_create_bs_dev("bdev0", false, NULL, 0, NULL, NULL, &bs_dev);
	CU_ASSERT(rc == 0);
	SPDK_CU_ASSERT_FATAL(bs_dev != NULL);
	blob_bdev = (struct blob_bdev *)bs_dev;
	CU_ASSERT(blob_bdev->refs == 1);
	CU_ASSERT(blob_bdev->desc != NULL);

	/* Verify reference count increases with channels on the same thread. */
	ch1 = bs_dev->create_channel(bs_dev);
	SPDK_CU_ASSERT_FATAL(ch1 != NULL);
	CU_ASSERT(blob_bdev->refs == 2);
	ch2 = bs_dev->create_channel(bs_dev);
	SPDK_CU_ASSERT_FATAL(ch2 != NULL);
	CU_ASSERT(blob_bdev->refs == 3);
	bs_dev->destroy_channel(bs_dev, ch1);
	CU_ASSERT(blob_bdev->refs == 2);
	bs_dev->destroy_channel(bs_dev, ch2);
	CU_ASSERT(blob_bdev->refs == 1);
	CU_ASSERT(blob_bdev->desc != NULL);

	/* Verify reference count increases with channels on different threads. */
	ch1 = bs_dev->create_channel(bs_dev);
	SPDK_CU_ASSERT_FATAL(ch1 != NULL);
	CU_ASSERT(blob_bdev->refs == 2);
	set_thread(1);
	ch2 = bs_dev->create_channel(bs_dev);
	SPDK_CU_ASSERT_FATAL(ch2 != NULL);
	CU_ASSERT(blob_bdev->refs == 3);
	bs_dev->destroy_channel(bs_dev, ch1);
	CU_ASSERT(blob_bdev->refs == 2);
	bs_dev->destroy_channel(bs_dev, ch2);
	CU_ASSERT(blob_bdev->refs == 1);
	CU_ASSERT(blob_bdev->desc != NULL);

	set_thread(0);
	bs_dev->destroy(bs_dev);
	g_bdev = NULL;
}

/*
 * When a channel is open bs_dev->destroy() should not free bs_dev until after the last channel is
 * closed. Further, destroy() prevents the creation of new channels.
 */
static void
deferred_destroy_channels(void)
{
	struct spdk_bdev bdev;
	struct spdk_io_channel *ch1, *ch2;
	struct spdk_bs_dev *bs_dev = NULL;
	struct blob_bdev *blob_bdev;
	int rc;

	set_thread(0);
	init_bdev(&bdev, "bdev0", 16);

	/* Open bs_dev and sanity check */
	g_bdev = &bdev;
	rc = spdk_bdev_create_bs_dev("bdev0", false, NULL, 0, NULL, NULL, &bs_dev);
	CU_ASSERT(rc == 0);
	SPDK_CU_ASSERT_FATAL(bs_dev != NULL);
	CU_ASSERT(bdev.open_cnt == 1);
	blob_bdev = (struct blob_bdev *)bs_dev;
	CU_ASSERT(blob_bdev->refs == 1);
	CU_ASSERT(blob_bdev->desc != NULL);

	/* Create a channel, destroy the bs_dev. It should not be freed yet. */
	ch1 = bs_dev->create_channel(bs_dev);
	SPDK_CU_ASSERT_FATAL(ch1 != NULL);
	CU_ASSERT(blob_bdev->refs == 2);
	bs_dev->destroy(bs_dev);

	/* Destroy closes the bdev and prevents desc from being used for creating more channels. */
	CU_ASSERT(blob_bdev->desc == NULL);
	CU_ASSERT(bdev.open_cnt == 0);
	CU_ASSERT(blob_bdev->refs == 1);
	ch2 = bs_dev->create_channel(bs_dev);
	CU_ASSERT(ch2 == NULL)
	CU_ASSERT(blob_bdev->refs == 1);
	bs_dev->destroy_channel(bs_dev, ch1);
	g_bdev = NULL;

	/* Now bs_dev should have been freed. Builds with asan will verify. */
}

/*
 * Verify that deferred destroy copes well with the last channel destruction being on a thread other
 * than the thread used to obtain the bdev descriptor.
 */
static void
deferred_destroy_threads(void)
{
	struct spdk_bdev bdev;
	struct spdk_io_channel *ch1, *ch2;
	struct spdk_bs_dev *bs_dev = NULL;
	struct blob_bdev *blob_bdev;
	int rc;

	set_thread(0);
	init_bdev(&bdev, "bdev0", 16);
	g_bdev = &bdev;

	/* Open bs_dev and sanity check */
	rc = spdk_bdev_create_bs_dev("bdev0", false, NULL, 0, NULL, NULL, &bs_dev);
	CU_ASSERT(rc == 0);
	SPDK_CU_ASSERT_FATAL(bs_dev != NULL);
	CU_ASSERT(bdev.open_cnt == 1);
	blob_bdev = (struct blob_bdev *)bs_dev;
	CU_ASSERT(blob_bdev->refs == 1);
	CU_ASSERT(blob_bdev->desc != NULL);

	/* Create two channels, each on their own thread. */
	ch1 = bs_dev->create_channel(bs_dev);
	SPDK_CU_ASSERT_FATAL(ch1 != NULL);
	CU_ASSERT(blob_bdev->refs == 2);
	CU_ASSERT(spdk_get_thread() == blob_bdev->desc->thread);
	set_thread(1);
	ch2 = bs_dev->create_channel(bs_dev);
	SPDK_CU_ASSERT_FATAL(ch2 != NULL);
	CU_ASSERT(blob_bdev->refs == 3);

	/* Destroy the bs_dev on thread 0, the channel on thread 0, then the channel on thread 1. */
	set_thread(0);
	bs_dev->destroy(bs_dev);
	CU_ASSERT(blob_bdev->desc == NULL);
	CU_ASSERT(bdev.open_cnt == 0);
	CU_ASSERT(blob_bdev->refs == 2);
	bs_dev->destroy_channel(bs_dev, ch1);
	CU_ASSERT(blob_bdev->refs == 1);
	set_thread(1);
	bs_dev->destroy_channel(bs_dev, ch2);
	set_thread(0);
	g_bdev = NULL;

	/* Now bs_dev should have been freed. Builds with asan will verify. */
}

int
main(int argc, char **argv)
{
@@ -378,11 +551,19 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, create_bs_dev_rw);
	CU_ADD_TEST(suite, claim_bs_dev);
	CU_ADD_TEST(suite, claim_bs_dev_ro);
	CU_ADD_TEST(suite, deferred_destroy_refs);
	CU_ADD_TEST(suite, deferred_destroy_channels);
	CU_ADD_TEST(suite, deferred_destroy_threads);

	allocate_threads(2);
	set_thread(0);

	CU_basic_set_mode(CU_BRM_VERBOSE);
	CU_basic_run_tests();
	num_failures = CU_get_number_of_failures();
	CU_cleanup_registry();

	free_threads();

	return num_failures;
}