Commit bab34df6 authored by Jim Harris's avatar Jim Harris
Browse files

vhost: split iovs which span hugepage boundaries



A virtio descriptor may specify a buffer that spans
a huge page boundary or even a vhost memory region.
So modify spdk_vhost_vring_desc_to_iov() to take this
into account.

While here, also change spdk_vhost_vring_desc_to_iov
to return int instead of bool.  int return with -1 is
most standard to indicate failure.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I71faefce0367dc9e44d70ea4429a28dd64f04c10

Reviewed-on: https://review.gerrithub.io/371756


Tested-by: default avatarSPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: default avatarDaniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: default avatarPiotr Pelpliński <piotr.pelplinski@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarDariusz Stojaczyk <dariuszx.stojaczyk@intel.com>
parent c41338ce
Loading
Loading
Loading
Loading
+28 −5
Original line number Diff line number Diff line
@@ -143,13 +143,36 @@ spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc)
	return !!(cur_desc->flags & VRING_DESC_F_WRITE);
}

bool
#define _2MB_OFFSET(ptr)	((ptr) & (0x200000 - 1))

int
spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov,
			     const struct vring_desc *desc)
			     uint16_t *iov_index, const struct vring_desc *desc)
{
	iov->iov_base =  spdk_vhost_gpa_to_vva(vdev, desc->addr);
	iov->iov_len = desc->len;
	return !iov->iov_base;
	uint32_t remaining = desc->len;
	uint32_t len;
	uintptr_t payload = desc->addr;
	uintptr_t vva;

	while (remaining) {
		if (*iov_index >= SPDK_VHOST_IOVS_MAX) {
			SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX);
			return -1;
		}
		vva = (uintptr_t)spdk_vhost_gpa_to_vva(vdev, payload);
		if (vva == 0) {
			SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload);
			return -1;
		}
		len = spdk_min(remaining, 0x200000 - _2MB_OFFSET(payload));
		iov[*iov_index].iov_base = (void *)vva;
		iov[*iov_index].iov_len = len;
		remaining -= len;
		payload += len;
		(*iov_index)++;
	}

	return 0;
}

struct spdk_vhost_dev *
+2 −3
Original line number Diff line number Diff line
@@ -137,14 +137,13 @@ blk_iovs_setup(struct spdk_vhost_dev *vdev, struct rte_vhost_vring *vq, uint16_t
			return -1;
		}

		if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev, &iovs[cnt], desc))) {
		if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev, iovs, &cnt, desc))) {
			SPDK_TRACELOG(SPDK_TRACE_VHOST_BLK, "Invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
				      req_idx, cnt);
			return -1;
		}

		len += iovs[cnt].iov_len;
		cnt++;
		len += desc->len;

		out_cnt += spdk_vhost_vring_desc_is_wr(desc);

+2 −2
Original line number Diff line number Diff line
@@ -109,8 +109,8 @@ struct vring_desc *spdk_vhost_vring_desc_get_next(struct vring_desc *vq_desc,
		struct vring_desc *cur_desc);
bool spdk_vhost_vring_desc_is_wr(struct vring_desc *cur_desc);

bool spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov,
				  const struct vring_desc *desc);
int spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov,
				 uint16_t *iov_index, const struct vring_desc *desc);

struct spdk_vhost_dev *spdk_vhost_dev_find_by_vid(int vid);

+8 −4
Original line number Diff line number Diff line
@@ -516,9 +516,11 @@ task_data_setup(struct spdk_vhost_scsi_task *task,

		/* All remaining descriptors are data. */
		while (iovcnt < iovcnt_max) {
			spdk_vhost_vring_desc_to_iov(vdev, &iovs[iovcnt], desc);
			if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev, iovs, &iovcnt, desc))) {
				task->resp = NULL;
				goto abort_task;
			}
			len += desc->len;
			iovcnt++;

			if (!spdk_vhost_vring_desc_has_next(desc))
				break;
@@ -539,9 +541,11 @@ task_data_setup(struct spdk_vhost_scsi_task *task,

		/* Process descriptors up to response. */
		while (!spdk_vhost_vring_desc_is_wr(desc) && iovcnt < iovcnt_max) {
			spdk_vhost_vring_desc_to_iov(vdev, &iovs[iovcnt], desc);
			if (spdk_unlikely(spdk_vhost_vring_desc_to_iov(vdev, iovs, &iovcnt, desc))) {
				task->resp = NULL;
				goto abort_task;
			}
			len += desc->len;
			iovcnt++;

			if (!spdk_vhost_vring_desc_has_next(desc)) {
				SPDK_WARNLOG("TO_DEV cmd: no response descriptor.\n");
+119 −2
Original line number Diff line number Diff line
@@ -74,9 +74,126 @@ test_setup(void)
	return 0;
}

static struct spdk_vhost_dev *
alloc_vdev(void)
{
	struct spdk_vhost_dev *vdev = NULL;
	int rc;

	/* spdk_vhost_dev must be allocated on a cache line boundary. */
	rc = posix_memalign((void **)&vdev, 64, sizeof(*vdev));
	CU_ASSERT(rc == 0);
	SPDK_CU_ASSERT_FATAL(vdev != NULL);

	vdev->mem = calloc(1, sizeof(*vdev->mem) + 2 * sizeof(struct rte_vhost_mem_region));
	SPDK_CU_ASSERT_FATAL(vdev->mem != NULL);
	vdev->mem->nregions = 2;
	vdev->mem->regions[0].guest_phys_addr = 0;
	vdev->mem->regions[0].size = 0x400000; /* 4 MB */
	vdev->mem->regions[0].host_user_addr = 0x1000000;
	vdev->mem->regions[1].guest_phys_addr = 0x400000;
	vdev->mem->regions[1].size = 0x400000; /* 4 MB */
	vdev->mem->regions[1].host_user_addr = 0x2000000;

	return vdev;
}

static void
null_test(void)
free_vdev(struct spdk_vhost_dev *vdev)
{
	free(vdev->mem);
	free(vdev);
}

static void
desc_to_iov_test(void)
{
	struct spdk_vhost_dev *vdev;
	struct iovec iov[SPDK_VHOST_IOVS_MAX];
	uint16_t iov_index;
	struct vring_desc desc;
	int rc;

	vdev = alloc_vdev();

	/* Test simple case where iov falls fully within a 2MB page. */
	desc.addr = 0x110000;
	desc.len = 0x1000;
	iov_index = 0;
	rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
	CU_ASSERT(rc == 0);
	CU_ASSERT(iov_index == 1);
	CU_ASSERT(iov[0].iov_base == (void *)0x1110000);
	CU_ASSERT(iov[0].iov_len == 0x1000);
	/*
	 * Always memset the iov to ensure each test validates data written by its call
	 * to the function under test.
	 */
	memset(iov, 0, sizeof(iov));

	/* Same test, but ensure it respects the non-zero starting iov_index. */
	iov_index = SPDK_VHOST_IOVS_MAX - 1;
	rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
	CU_ASSERT(rc == 0);
	CU_ASSERT(iov_index == SPDK_VHOST_IOVS_MAX);
	CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_base == (void *)0x1110000);
	CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_len == 0x1000);
	memset(iov, 0, sizeof(iov));

	/* Test for failure if iov_index already equals SPDK_VHOST_IOVS_MAX. */
	iov_index = SPDK_VHOST_IOVS_MAX;
	rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
	CU_ASSERT(rc != 0);
	memset(iov, 0, sizeof(iov));

	/* Test case where iov spans a 2MB boundary, but does not span a vhost memory region. */
	desc.addr = 0x1F0000;
	desc.len = 0x20000;
	iov_index = 0;
	rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
	CU_ASSERT(rc == 0);
	CU_ASSERT(iov_index == 2);
	CU_ASSERT(iov[0].iov_base == (void *)0x11F0000);
	CU_ASSERT(iov[0].iov_len == 0x10000);
	CU_ASSERT(iov[1].iov_base == (void *)0x1200000);
	CU_ASSERT(iov[1].iov_len == 0x10000);
	memset(iov, 0, sizeof(iov));

	/* Same test, but ensure it respects the non-zero starting iov_index. */
	iov_index = SPDK_VHOST_IOVS_MAX - 2;
	rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
	CU_ASSERT(rc == 0);
	CU_ASSERT(iov_index == SPDK_VHOST_IOVS_MAX);
	CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 2].iov_base == (void *)0x11F0000);
	CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 2].iov_len == 0x10000);
	CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_base == (void *)0x1200000);
	CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_len == 0x10000);
	memset(iov, 0, sizeof(iov));

	/*
	 * This test should fail.  The first part of the descriptor will fit in the last
	 * iov, but the part after the 2MB boundary would overflow.
	 */
	iov_index = SPDK_VHOST_IOVS_MAX - 1;
	rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
	CU_ASSERT(rc != 0);
	memset(iov, 0, sizeof(iov));

	/* Test case where iov spans a vhost memory region. */
	desc.addr = 0x3F0000;
	desc.len = 0x20000;
	iov_index = 0;
	rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
	CU_ASSERT(rc == 0);
	CU_ASSERT(iov_index == 2);
	CU_ASSERT(iov[0].iov_base == (void *)0x13F0000);
	CU_ASSERT(iov[0].iov_len == 0x10000);
	CU_ASSERT(iov[1].iov_base == (void *)0x2000000);
	CU_ASSERT(iov[1].iov_len == 0x10000);
	memset(iov, 0, sizeof(iov));

	free_vdev(vdev);

	CU_ASSERT(true);
}

@@ -97,7 +214,7 @@ main(int argc, char **argv)
	}

	if (
		CU_add_test(suite, "null test", null_test) == NULL
		CU_add_test(suite, "desc_to_iov", desc_to_iov_test) == NULL
	) {
		CU_cleanup_registry();
		return CU_get_error();