Commit 4473942f authored by Pawel Wodkowski's avatar Pawel Wodkowski Committed by Jim Harris
Browse files

bdev: replace tailq by arrays in base and vbdev linking



SPKD base bdev might be part of multiple vbdevs. The same is true in
reverse direction. So consider folowing scenario:

  bdev3  bdev4  bdev5
     |     |     |
   +-+--+  +  +--+--+
  /      \ | /       \
bdev0    bdev1      bdev2

In current implementation bdev0/1/2 will apear as base base for
bdev3/4/5 which is obviously wrong.
This patch try to address this issue.

Change-Id: Ic99c13c8656ceb597aba7e41ccb2fa8090b4f13b
Signed-off-by: default avatarPawel Wodkowski <pawelx.wodkowski@intel.com>
Reviewed-on: https://review.gerrithub.io/405104


Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarDariusz Stojaczyk <dariuszx.stojaczyk@intel.com>
Tested-by: default avatarSPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: default avatarDaniel Verkamp <daniel.verkamp@intel.com>
parent 3d220b92
Loading
Loading
Loading
Loading
+6 −7
Original line number Diff line number Diff line
@@ -282,15 +282,14 @@ struct spdk_bdev {
	/** The bdev status */
	enum spdk_bdev_status status;

	/** The list of block devices that this block device is built on top of (if any). */
	TAILQ_HEAD(, spdk_bdev) base_bdevs;
	/** The array of block devices that this block device is built on top of (if any). */
	struct spdk_bdev **base_bdevs;
	size_t base_bdevs_cnt;

	TAILQ_ENTRY(spdk_bdev) base_bdev_link;

	/** The list of virtual block devices built on top of this block device. */
	TAILQ_HEAD(, spdk_bdev) vbdevs;

	TAILQ_ENTRY(spdk_bdev) vbdev_link;
	/** The array of virtual block devices built on top of this block device. */
	struct spdk_bdev **vbdevs;
	size_t vbdevs_cnt;

	/**
	 * Pointer to the module that has claimed this bdev for purposes of creating virtual
+127 −25
Original line number Diff line number Diff line
@@ -2350,10 +2350,8 @@ spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io)
}

static int
_spdk_bdev_register(struct spdk_bdev *bdev)
spdk_bdev_init(struct spdk_bdev *bdev)
{
	struct spdk_bdev_module *module;

	assert(bdev->module != NULL);

	if (!bdev->name) {
@@ -2370,9 +2368,6 @@ _spdk_bdev_register(struct spdk_bdev *bdev)

	TAILQ_INIT(&bdev->open_descs);

	TAILQ_INIT(&bdev->vbdevs);
	TAILQ_INIT(&bdev->base_bdevs);

	TAILQ_INIT(&bdev->aliases);

	bdev->reset_in_progress = NULL;
@@ -2382,6 +2377,22 @@ _spdk_bdev_register(struct spdk_bdev *bdev)
				sizeof(struct spdk_bdev_channel));

	pthread_mutex_init(&bdev->mutex, NULL);
	return 0;
}

static void
spdk_bdev_fini(struct spdk_bdev *bdev)
{
	pthread_mutex_destroy(&bdev->mutex);

	spdk_io_device_unregister(__bdev_to_io_dev(bdev), NULL);
}

static void
spdk_bdev_start(struct spdk_bdev *bdev)
{
	struct spdk_bdev_module *module;

	SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Inserting bdev %s into list\n", bdev->name);
	TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, link);

@@ -2391,34 +2402,132 @@ _spdk_bdev_register(struct spdk_bdev *bdev)
			module->examine(bdev);
		}
	}

	return 0;
}

int
spdk_bdev_register(struct spdk_bdev *bdev)
{
	return _spdk_bdev_register(bdev);
	int rc = spdk_bdev_init(bdev);

	if (rc == 0) {
		spdk_bdev_start(bdev);
	}

	return rc;
}

static void
spdk_vbdev_remove_base_bdevs(struct spdk_bdev *vbdev)
{
	struct spdk_bdev **bdevs;
	struct spdk_bdev *base;
	size_t i, j, k;
	bool found;

	/* Iterate over base bdevs to remove vbdev from them. */
	for (i = 0; i < vbdev->base_bdevs_cnt; i++) {
		found = false;
		base = vbdev->base_bdevs[i];

		for (j = 0; j < base->vbdevs_cnt; j++) {
			if (base->vbdevs[j] != vbdev) {
				continue;
			}

			for (k = j; k + 1 < base->vbdevs_cnt; k++) {
				base->vbdevs[k] = base->vbdevs[k + 1];
			}

			base->vbdevs_cnt--;
			if (base->vbdevs_cnt > 0) {
				bdevs = realloc(base->vbdevs, base->vbdevs_cnt * sizeof(bdevs[0]));
				/* It would be odd if shrinking memory block fail. */
				assert(bdevs);
				base->vbdevs = bdevs;
			} else {
				free(base->vbdevs);
				base->vbdevs = NULL;
			}

			found = true;
			break;
		}

		if (!found) {
			SPDK_WARNLOG("Bdev '%s' is not base bdev of '%s'.\n", base->name, vbdev->name);
		}
	}

	free(vbdev->base_bdevs);
	vbdev->base_bdevs = NULL;
	vbdev->base_bdevs_cnt = 0;
}

static int
spdk_vbdev_set_base_bdevs(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, size_t cnt)
{
	struct spdk_bdev **vbdevs;
	struct spdk_bdev *base;
	size_t i;

	/* Adding base bdevs isn't supported (yet?). */
	assert(vbdev->base_bdevs_cnt == 0);

	vbdev->base_bdevs = malloc(cnt * sizeof(vbdev->base_bdevs[0]));
	if (!vbdev->base_bdevs) {
		SPDK_ERRLOG("%s - realloc() failed\n", vbdev->name);
		return -ENOMEM;
	}

	memcpy(vbdev->base_bdevs, base_bdevs, cnt * sizeof(vbdev->base_bdevs[0]));
	vbdev->base_bdevs_cnt = cnt;

	/* Iterate over base bdevs to add this vbdev to them. */
	for (i = 0; i < cnt; i++) {
		base = vbdev->base_bdevs[i];

		assert(base != NULL);
		assert(base->claim_module != NULL);

		vbdevs = realloc(base->vbdevs, (base->vbdevs_cnt + 1) * sizeof(vbdevs[0]));
		if (!vbdevs) {
			SPDK_ERRLOG("%s - realloc() failed\n", base->name);
			spdk_vbdev_remove_base_bdevs(vbdev);
			return -ENOMEM;
		}

		vbdevs[base->vbdevs_cnt] = vbdev;
		base->vbdevs = vbdevs;
		base->vbdevs_cnt++;
	}

	return 0;
}

int
spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, int base_bdev_count)
{
	int i, rc;
	int rc;

	rc = _spdk_bdev_register(vbdev);
	rc = spdk_bdev_init(vbdev);
	if (rc) {
		return rc;
	}

	for (i = 0; i < base_bdev_count; i++) {
		assert(base_bdevs[i] != NULL);
		assert(base_bdevs[i]->claim_module != NULL);
		TAILQ_INSERT_TAIL(&vbdev->base_bdevs, base_bdevs[i], base_bdev_link);
		TAILQ_INSERT_TAIL(&base_bdevs[i]->vbdevs, vbdev, vbdev_link);
	if (base_bdev_count == 0) {
		spdk_bdev_start(vbdev);
		return 0;
	}

	rc = spdk_vbdev_set_base_bdevs(vbdev, base_bdevs, base_bdev_count);
	if (rc) {
		spdk_bdev_fini(vbdev);
		return rc;
	}

	spdk_bdev_start(vbdev);
	return 0;

}

void
@@ -2443,17 +2552,12 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void
	struct spdk_bdev_desc	*desc, *tmp;
	int			rc;
	bool			do_destruct = true;
	struct spdk_bdev	*base_bdev;

	SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Removing bdev %s from list\n", bdev->name);

	pthread_mutex_lock(&bdev->mutex);

	if (!TAILQ_EMPTY(&bdev->base_bdevs)) {
		TAILQ_FOREACH(base_bdev, &bdev->base_bdevs, base_bdev_link) {
			TAILQ_REMOVE(&base_bdev->vbdevs, bdev, vbdev_link);
		}
	}
	spdk_vbdev_remove_base_bdevs(bdev);

	bdev->status = SPDK_BDEV_STATUS_REMOVING;
	bdev->unregister_cb = cb_fn;
@@ -2480,9 +2584,7 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void
	TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link);
	pthread_mutex_unlock(&bdev->mutex);

	pthread_mutex_destroy(&bdev->mutex);

	spdk_io_device_unregister(__bdev_to_io_dev(bdev), NULL);
	spdk_bdev_fini(bdev);

	rc = bdev->fn_table->destruct(bdev->ctxt);
	if (rc < 0) {
+122 −39
Original line number Diff line number Diff line
@@ -89,6 +89,45 @@ vbdev_ut_examine(struct spdk_bdev *bdev)
	spdk_bdev_module_examine_done(&vbdev_ut_if);
}

static bool
is_vbdev(struct spdk_bdev *base, struct spdk_bdev *vbdev)
{
	size_t i;
	int found = 0;

	for (i = 0; i < base->vbdevs_cnt; i++) {
		found += base->vbdevs[i] == vbdev;
	}

	CU_ASSERT(found <= 1);
	return !!found;
}

static bool
is_base_bdev(struct spdk_bdev *base, struct spdk_bdev *vbdev)
{
	size_t i;
	int found = 0;

	for (i = 0; i < vbdev->base_bdevs_cnt; i++) {
		found += vbdev->base_bdevs[i] == base;
	}

	CU_ASSERT(found <= 1);
	return !!found;
}

static bool
check_base_and_vbdev(struct spdk_bdev *base, struct spdk_bdev *vbdev)
{
	bool _is_vbdev = is_vbdev(base, vbdev);
	bool _is_base = is_base_bdev(base, vbdev);

	CU_ASSERT(_is_vbdev == _is_base);

	return _is_base && _is_vbdev;
}

static struct spdk_bdev *
allocate_bdev(char *name)
{
@@ -104,8 +143,8 @@ allocate_bdev(char *name)

	rc = spdk_bdev_register(bdev);
	CU_ASSERT(rc == 0);
	CU_ASSERT(TAILQ_EMPTY(&bdev->base_bdevs));
	CU_ASSERT(TAILQ_EMPTY(&bdev->vbdevs));
	CU_ASSERT(bdev->base_bdevs_cnt == 0);
	CU_ASSERT(bdev->vbdevs_cnt == 0);

	return bdev;
}
@@ -132,8 +171,14 @@ allocate_vbdev(char *name, struct spdk_bdev *base1, struct spdk_bdev *base2)

	rc = spdk_vbdev_register(bdev, array, base2 == NULL ? 1 : 2);
	CU_ASSERT(rc == 0);
	CU_ASSERT(!TAILQ_EMPTY(&bdev->base_bdevs));
	CU_ASSERT(TAILQ_EMPTY(&bdev->vbdevs));
	CU_ASSERT(bdev->base_bdevs_cnt > 0);
	CU_ASSERT(bdev->vbdevs_cnt == 0);

	CU_ASSERT(check_base_and_vbdev(base1, bdev) == true);

	if (base2) {
		CU_ASSERT(check_base_and_vbdev(base2, bdev) == true);
	}

	return bdev;
}
@@ -142,48 +187,55 @@ static void
free_bdev(struct spdk_bdev *bdev)
{
	spdk_bdev_unregister(bdev, NULL, NULL);
	memset(bdev, 0xFF, sizeof(*bdev));
	free(bdev);
}

static void
free_vbdev(struct spdk_bdev *bdev)
{
	CU_ASSERT(!TAILQ_EMPTY(&bdev->base_bdevs));
	CU_ASSERT(bdev->base_bdevs_cnt != 0);
	spdk_bdev_unregister(bdev, NULL, NULL);
	memset(bdev, 0xFF, sizeof(*bdev));
	free(bdev);
}

static void
open_write_test(void)
{
	struct spdk_bdev *bdev[8];
	struct spdk_bdev_desc *desc[8] = {};
	struct spdk_bdev *bdev[9];
	struct spdk_bdev_desc *desc[9] = {};
	int rc;

	/*
	 * Create a tree of bdevs to test various open w/ write cases.
	 *
	 * bdev0 through bdev2 are physical block devices, such as NVMe
	 * bdev0 through bdev3 are physical block devices, such as NVMe
	 * namespaces or Ceph block devices.
	 *
	 * bdev3 is a virtual bdev with multiple base bdevs.  This models
	 * bdev4 is a virtual bdev with multiple base bdevs.  This models
	 * caching or RAID use cases.
	 *
	 * bdev4 through bdev6 are all virtual bdevs with the same base
	 * bdev.  This models partitioning or logical volume use cases.
	 * bdev5 through bdev7 are all virtual bdevs with the same base
	 * bdev (except bdev7). This models partitioning or logical volume
	 * use cases.
	 *
	 * bdev7 is a virtual bdev with multiple base bdevs, but these
	 * bdev7 is a virtual bdev with multiple base bdevs. One of base bdevs
	 * (bdev2) is shared with other virtual bdevs: bdev5 and bdev6. This
	 * models caching, RAID, partitioning or logical volumes use cases.
	 *
	 * bdev8 is a virtual bdev with multiple base bdevs, but these
	 * base bdevs are themselves virtual bdevs.
	 *
	 *                bdev7
	 *                bdev8
	 *                  |
	 *            +----------+
	 *            |          |
	 *          bdev3      bdev4   bdev5   bdev6
	 *          bdev4      bdev5   bdev6   bdev7
	 *            |          |       |       |
	 *        +---+---+      +-------+-------+
	 *        |       |              |
	 *      bdev0   bdev1          bdev2
	 *        +---+---+      +---+   +   +---+---+
	 *        |       |           \  |  /         \
	 *      bdev0   bdev1          bdev2         bdev3
	 */

	bdev[0] = allocate_bdev("bdev0");
@@ -198,18 +250,48 @@ open_write_test(void)
	rc = spdk_bdev_module_claim_bdev(bdev[2], NULL, &bdev_ut_if);
	CU_ASSERT(rc == 0);

	bdev[3] = allocate_vbdev("bdev3", bdev[0], bdev[1]);
	bdev[3] = allocate_bdev("bdev3");
	rc = spdk_bdev_module_claim_bdev(bdev[3], NULL, &bdev_ut_if);
	CU_ASSERT(rc == 0);

	bdev[4] = allocate_vbdev("bdev4", bdev[2], NULL);
	bdev[4] = allocate_vbdev("bdev4", bdev[0], bdev[1]);
	rc = spdk_bdev_module_claim_bdev(bdev[4], NULL, &bdev_ut_if);
	CU_ASSERT(rc == 0);

	bdev[5] = allocate_vbdev("bdev5", bdev[2], NULL);
	rc = spdk_bdev_module_claim_bdev(bdev[5], NULL, &bdev_ut_if);
	CU_ASSERT(rc == 0);

	bdev[6] = allocate_vbdev("bdev6", bdev[2], NULL);

	bdev[7] = allocate_vbdev("bdev7", bdev[3], bdev[4]);
	bdev[7] = allocate_vbdev("bdev7", bdev[2], bdev[3]);

	bdev[8] = allocate_vbdev("bdev8", bdev[4], bdev[5]);

	/* Check tree */
	CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[4]) == true);
	CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[5]) == false);
	CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[6]) == false);
	CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[7]) == false);
	CU_ASSERT(check_base_and_vbdev(bdev[0], bdev[8]) == false);

	CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[4]) == true);
	CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[5]) == false);
	CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[6]) == false);
	CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[7]) == false);
	CU_ASSERT(check_base_and_vbdev(bdev[1], bdev[8]) == false);

	CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[4]) == false);
	CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[5]) == true);
	CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[6]) == true);
	CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[7]) == true);
	CU_ASSERT(check_base_and_vbdev(bdev[2], bdev[8]) == false);

	CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[4]) == false);
	CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[5]) == false);
	CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[6]) == false);
	CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[7]) == true);
	CU_ASSERT(check_base_and_vbdev(bdev[3], bdev[8]) == false);

	/* Open bdev0 read-only.  This should succeed. */
	rc = spdk_bdev_open(bdev[0], false, NULL, NULL, &desc[0]);
@@ -225,51 +307,52 @@ open_write_test(void)
	CU_ASSERT(rc == -EPERM);

	/*
	 * Open bdev3 read/write.  This should fail since bdev3 has been claimed
	 * Open bdev4 read/write.  This should fail since bdev3 has been claimed
	 * by a vbdev module.
	 */
	rc = spdk_bdev_open(bdev[3], true, NULL, NULL, &desc[3]);
	rc = spdk_bdev_open(bdev[4], true, NULL, NULL, &desc[4]);
	CU_ASSERT(rc == -EPERM);

	/* Open bdev3 read-only.  This should succeed. */
	rc = spdk_bdev_open(bdev[3], false, NULL, NULL, &desc[3]);
	/* Open bdev4 read-only.  This should succeed. */
	rc = spdk_bdev_open(bdev[4], false, NULL, NULL, &desc[4]);
	CU_ASSERT(rc == 0);
	CU_ASSERT(desc[3] != NULL);
	spdk_bdev_close(desc[3]);
	CU_ASSERT(desc[4] != NULL);
	spdk_bdev_close(desc[4]);

	/*
	 * Open bdev7 read/write.  This should succeed since it is a leaf
	 * Open bdev8 read/write.  This should succeed since it is a leaf
	 * bdev.
	 */
	rc = spdk_bdev_open(bdev[7], true, NULL, NULL, &desc[7]);
	rc = spdk_bdev_open(bdev[8], true, NULL, NULL, &desc[8]);
	CU_ASSERT(rc == 0);
	CU_ASSERT(desc[7] != NULL);
	spdk_bdev_close(desc[7]);
	CU_ASSERT(desc[8] != NULL);
	spdk_bdev_close(desc[8]);

	/*
	 * Open bdev4 read/write.  This should fail since bdev4 has been claimed
	 * Open bdev5 read/write.  This should fail since bdev4 has been claimed
	 * by a vbdev module.
	 */
	rc = spdk_bdev_open(bdev[4], true, NULL, NULL, &desc[4]);
	rc = spdk_bdev_open(bdev[5], true, NULL, NULL, &desc[5]);
	CU_ASSERT(rc == -EPERM);

	/* Open bdev4 read-only.  This should succeed. */
	rc = spdk_bdev_open(bdev[4], false, NULL, NULL, &desc[4]);
	rc = spdk_bdev_open(bdev[5], false, NULL, NULL, &desc[5]);
	CU_ASSERT(rc == 0);
	CU_ASSERT(desc[4] != NULL);
	spdk_bdev_close(desc[4]);
	CU_ASSERT(desc[5] != NULL);
	spdk_bdev_close(desc[5]);

	free_vbdev(bdev[7]);
	free_vbdev(bdev[8]);

	free_vbdev(bdev[3]);
	free_vbdev(bdev[4]);
	free_vbdev(bdev[5]);
	free_vbdev(bdev[6]);
	free_vbdev(bdev[7]);

	free_vbdev(bdev[4]);

	free_bdev(bdev[0]);
	free_bdev(bdev[1]);
	free_bdev(bdev[2]);

	free_bdev(bdev[3]);
}

static void
+3 −2
Original line number Diff line number Diff line
@@ -91,7 +91,8 @@ static void
part_test(void)
{
	struct spdk_bdev_part_base	*base;
	struct spdk_bdev_part		part1, part2;
	struct spdk_bdev_part		part1 = {};
	struct spdk_bdev_part		part2 = {};
	struct spdk_bdev		bdev_base = {};
	SPDK_BDEV_PART_TAILQ		tailq = TAILQ_HEAD_INITIALIZER(tailq);
	int rc;
@@ -116,7 +117,7 @@ part_test(void)
	 * The base device was removed - ensure that the partition vbdevs were
	 *  removed from the base's vbdev list.
	 */
	CU_ASSERT(TAILQ_EMPTY(&bdev_base.vbdevs));
	CU_ASSERT(bdev_base.vbdevs_cnt == 0);

	spdk_bdev_part_base_free(base);
	spdk_bdev_unregister(&bdev_base, NULL, NULL);