Commit 08650f86 authored by Mike Gerdts's avatar Mike Gerdts Committed by Ben Walker
Browse files

lvol: lvol destruction race leads to null deref



As an lvolstore is being destroyed, _vbdev_lvs_remove() starts an
interation through the lvols to delete each one, ultimately leading to
the destruction of the lvolstore with a call to lvs_free(). The callback
passed to vbdev_lvs_destruct() is always called asynchronously via
spdk_io_device_unregister() in bs_free().

When the lvolstore resides on bdevs that perform async IO (i.e. most
bdevs other than malloc), this gives a small window when the lvol bdev
is not registered but a lookup with spdk_lvol_get_by_uuid() or
spdk_lvol_get_by_names() will succeed. If rpc_bdev_lvol_delete() runs
during this window, it can get a reference to an lvol that has just been
unregistered and lvol->blob may be NULL. This lvol is then passed to
vbdev_lvol_destroy().

Before this fix, vbdev_lvol_destroy() would call:

   spdk_blob_is_degraded(lvol->blob);

Which would then lead to a NULL pointer dereference, as
spdk_blob_is_degraded() assumes a valid blob is passed. While a NULL
check would avoid this particular problem, a NULL blob is not
necessarily caused by the condition described above. It would better to
flag the lvstore's destruction before returning from
vbdev_lvs_destruct() and use that flag to prevent operations on the
lvolstore that is being deleted. Such a flag already exists in the form
of 'lvs_bdev->req != NULL', but that is set too late to close this race.

This fix introduces lvs_bdev->removal_in_progress which is set prior to
returning from vbdev_lvs_unload() and vbdev_lvs_destruct(). It is
checked by vbdev_lvol_destroy() before trying to destroy the lvol.  Now,
any lvol destruction initiated by something other than
vbdev_lvs_destruct() while an lvolstore unload or destroy is in progress
will fail with -ENODEV.

Fixes issue: #2998

Signed-off-by: default avatarMike Gerdts <mgerdts@nvidia.com>
Change-Id: I4d861879097703b0d8e3180e6de7ad6898f340fd
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/17891


Community-CI: Mellanox Build Bot
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent aee609e1
Loading
Loading
Loading
Loading
+42 −7
Original line number Diff line number Diff line
@@ -41,6 +41,8 @@ struct spdk_bdev_module g_lvol_if = {

SPDK_BDEV_MODULE_REGISTER(lvol, &g_lvol_if)

static void _vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg);

struct lvol_store_bdev *
vbdev_get_lvs_bdev_by_lvs(struct spdk_lvol_store *lvs_orig)
{
@@ -50,8 +52,11 @@ vbdev_get_lvs_bdev_by_lvs(struct spdk_lvol_store *lvs_orig)
	while (lvs_bdev != NULL) {
		lvs = lvs_bdev->lvs;
		if (lvs == lvs_orig) {
			if (lvs_bdev->req != NULL) {
				/* We do not allow access to lvs that are being destroyed */
			if (lvs_bdev->removal_in_progress) {
				/* We do not allow access to lvs that are being unloaded or
				 * destroyed */
				SPDK_DEBUGLOG(vbdev_lvol, "lvs %s: removal in progress\n",
					      lvs_orig->name);
				return NULL;
			} else {
				return lvs_bdev;
@@ -120,8 +125,11 @@ vbdev_get_lvs_bdev_by_bdev(struct spdk_bdev *bdev_orig)

	while (lvs_bdev != NULL) {
		if (lvs_bdev->bdev == bdev_orig) {
			if (lvs_bdev->req != NULL) {
				/* We do not allow access to lvs that are being destroyed */
			if (lvs_bdev->removal_in_progress) {
				/* We do not allow access to lvs that are being unloaded or
				 * destroyed */
				SPDK_DEBUGLOG(vbdev_lvol, "lvs %s: removal in progress\n",
					      lvs_bdev->lvs->name);
				return NULL;
			} else {
				return lvs_bdev;
@@ -359,7 +367,7 @@ _vbdev_lvs_remove_lvol_cb(void *cb_arg, int lvolerrno)
	lvol = TAILQ_FIRST(&lvs->lvols);
	while (lvol != NULL) {
		if (spdk_lvol_deletable(lvol)) {
			vbdev_lvol_destroy(lvol, _vbdev_lvs_remove_lvol_cb, lvs_bdev);
			_vbdev_lvol_destroy(lvol, _vbdev_lvs_remove_lvol_cb, lvs_bdev);
			return;
		}
		lvol = TAILQ_NEXT(lvol, link);
@@ -425,6 +433,8 @@ _vbdev_lvs_remove(struct spdk_lvol_store *lvs, spdk_lvs_op_complete cb_fn, void
		return;
	}

	lvs_bdev->removal_in_progress = true;

	req->cb_fn = cb_fn;
	req->cb_arg = cb_arg;
	lvs_bdev->req = req;
@@ -611,8 +621,8 @@ _vbdev_lvol_destroy_cb(void *cb_arg, int bdeverrno)
	free(ctx);
}

void
vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg)
static void
_vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg)
{
	struct vbdev_lvol_destroy_ctx *ctx;
	size_t count;
@@ -620,6 +630,10 @@ vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb
	assert(lvol != NULL);
	assert(cb_fn != NULL);

	/* Callers other than _vbdev_lvs_remove() must ensure the lvstore is not being removed. */
	assert(cb_fn == _vbdev_lvs_remove_lvol_cb ||
	       vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store) != NULL);

	/* Check if it is possible to delete lvol */
	spdk_blob_get_clones(lvol->lvol_store->blobstore, lvol->blob_id, NULL, &count);
	if (count > 1) {
@@ -647,6 +661,26 @@ vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb
	spdk_bdev_unregister(lvol->bdev, _vbdev_lvol_destroy_cb, ctx);
}

void
vbdev_lvol_destroy(struct spdk_lvol *lvol, spdk_lvol_op_complete cb_fn, void *cb_arg)
{
	struct lvol_store_bdev *lvs_bdev;

	/*
	 * During destruction of an lvolstore, _vbdev_lvs_unload() iterates through lvols until they
	 * are all deleted. There may be some IO required
	 */
	lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store);
	if (lvs_bdev == NULL) {
		SPDK_DEBUGLOG(vbdev_lvol, "lvol %s: lvolstore is being removed\n",
			      lvol->unique_id);
		cb_fn(cb_arg, -ENODEV);
		return;
	}

	_vbdev_lvol_destroy(lvol, cb_fn, cb_arg);
}

static char *
vbdev_lvol_find_name(struct spdk_lvol *lvol, spdk_blob_id blob_id)
{
@@ -1086,6 +1120,7 @@ _create_lvol_disk(struct spdk_lvol *lvol, bool destroy)
	lvs_bdev = vbdev_get_lvs_bdev_by_lvs(lvol->lvol_store);
	if (lvs_bdev == NULL) {
		SPDK_ERRLOG("No spdk lvs-bdev pair found for lvol %s\n", lvol->unique_id);
		assert(false);
		return -ENODEV;
	}

+1 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ struct lvol_store_bdev {
	struct spdk_lvol_store	*lvs;
	struct spdk_bdev	*bdev;
	struct spdk_lvs_req	*req;
	bool			removal_in_progress;

	TAILQ_ENTRY(lvol_store_bdev)	lvol_stores;
};
+86 −0
Original line number Diff line number Diff line
@@ -672,6 +672,91 @@ esnap_remove_degraded(void)
	CU_ASSERT(rc == 0);
}

static void
late_delete(void)
{
	char aiopath[PATH_MAX];
	struct spdk_lvol_store *lvs = NULL;
	const uint32_t bs_size_bytes = 10 * 1024 * 1024;
	const uint32_t bs_block_size = 4096;
	const uint32_t cluster_size = 32 * 1024;
	struct op_with_handle_data owh_data;
	struct lvol_bdev *lvol_bdev;
	struct spdk_lvol *lvol;
	int rc, rc2;
	int count;

	/* Create device for lvstore. This cannot be a malloc bdev because we need to sneak a
	 * deletion in while blob_persist() is in progress.
	 */
	rc = make_test_file(bs_size_bytes, aiopath, sizeof(aiopath), "late_delete.aio");
	SPDK_CU_ASSERT_FATAL(rc == 0);
	rc = create_aio_bdev("aio1", aiopath, bs_block_size, false);
	SPDK_CU_ASSERT_FATAL(rc == 0);
	poll_threads();

	/* Create lvstore */
	rc = vbdev_lvs_create("aio1", "lvs1", cluster_size, 0, 0,
			      lvs_op_with_handle_cb, clear_owh(&owh_data));
	SPDK_CU_ASSERT_FATAL(rc == 0);
	poll_error_updated(&owh_data.lvserrno);
	SPDK_CU_ASSERT_FATAL(owh_data.lvserrno == 0);
	SPDK_CU_ASSERT_FATAL(owh_data.u.lvs != NULL);
	lvs = owh_data.u.lvs;

	/* Create an lvol */
	vbdev_lvol_create(lvs, "lvol", 1, true, LVOL_CLEAR_WITH_DEFAULT,
			  lvol_op_with_handle_cb, clear_owh(&owh_data));
	poll_error_updated(&owh_data.lvserrno);
	CU_ASSERT(owh_data.lvserrno == 0);
	CU_ASSERT(owh_data.u.lvol != NULL);

	/* Verify the lvol can be found both ways */
	CU_ASSERT(spdk_bdev_get_by_name("lvs1/lvol") != NULL);
	CU_ASSERT(spdk_lvol_get_by_names("lvs1", "lvol") != NULL);

	/*
	 * Once the lvolstore deletion starts, it will briefly be possible to look up lvol bdevs and
	 * degraded lvols in that lvolstore but any attempt to delete them will fail with -ENODEV.
	 */
	rc = 0xbad;
	count = 0;
	vbdev_lvs_destruct(lvs, lvol_op_complete_cb, &rc);
	do {
		lvol_bdev = (struct lvol_bdev *)spdk_bdev_get_by_name("lvs1/lvol");
		if (lvol_bdev != NULL) {
			rc2 = 0xbad;
			vbdev_lvol_destroy(lvol_bdev->lvol, lvol_op_complete_cb, &rc2);
			CU_ASSERT(rc2 == -ENODEV);
		}
		lvol = spdk_lvol_get_by_names("lvs1", "lvol");
		if (lvol != NULL) {
			/* If we are here, we are likely reproducing #2998 */
			rc2 = 0xbad;
			vbdev_lvol_destroy(lvol, lvol_op_complete_cb, &rc2);
			CU_ASSERT(rc2 == -ENODEV);
			count++;
		}

		spdk_thread_poll(g_ut_threads[0].thread, 0, 0);
	} while (rc == 0xbad);

	/* Ensure that the test exercised the race */
	CU_ASSERT(count != 0);

	/* The lvol is now gone */
	CU_ASSERT(spdk_bdev_get_by_name("lvs1/lvol") == NULL);
	CU_ASSERT(spdk_lvol_get_by_names("lvs1", "lvol") == NULL);

	/* Clean up */
	rc = 0xbad;
	bdev_aio_delete("aio1", unregister_cb, &rc);
	poll_error_updated(&rc);
	CU_ASSERT(rc == 0);
	rc = unlink(aiopath);
	CU_ASSERT(rc == 0);
}

static void
bdev_init_cb(void *arg, int rc)
{
@@ -706,6 +791,7 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, esnap_clone_io);
	CU_ADD_TEST(suite, esnap_hotplug);
	CU_ADD_TEST(suite, esnap_remove_degraded);
	CU_ADD_TEST(suite, late_delete);

	allocate_threads(2);
	set_thread(0);