Commit f570aa65 authored by Jim Harris's avatar Jim Harris
Browse files

vhost: only split on 2MB boundaries when necessary



vhost I/O only need to be split on 2MB boundaries if
there is a break in the VM's memtable at that 2MB
boundary.

This should drastically reduce (if not eliminate)
the intermittent test pool failures seen recently.
virtio limits number of segments to 128, but this
2MB splitting could introduce additional segment
breaks which we do not allocate IOVs for.  In almost
all cases, there are no memtable breaks except at
low 2MB, so most of the extra segment breaks we are
adding are unnecessary.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I12d85c289ad80c7bb65e3d2030a2405092b19deb

Reviewed-on: https://review.gerrithub.io/396058


Reviewed-by: default avatarDaniel Verkamp <daniel.verkamp@intel.com>
Tested-by: default avatarSPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-by: default avatarDariusz Stojaczyk <dariuszx.stojaczyk@intel.com>
parent e489ca69
Loading
Loading
Loading
Loading
+21 −1
Original line number Diff line number Diff line
@@ -329,6 +329,7 @@ spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov,
			     uint16_t *iov_index, const struct vring_desc *desc)
{
	uint32_t remaining = desc->len;
	uint32_t to_boundary;
	uint32_t len;
	uintptr_t payload = desc->addr;
	uintptr_t vva;
@@ -343,7 +344,26 @@ spdk_vhost_vring_desc_to_iov(struct spdk_vhost_dev *vdev, struct iovec *iov,
			SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload);
			return -1;
		}
		len = spdk_min(remaining, 0x200000 - _2MB_OFFSET(payload));
		to_boundary = 0x200000 - _2MB_OFFSET(payload);
		if (spdk_likely(remaining <= to_boundary)) {
			len = remaining;
		} else {
			/*
			 * Descriptor crosses a 2MB hugepage boundary.  vhost memory regions are allocated
			 *  from hugepage memory, so this means this descriptor may be described by
			 *  discontiguous vhost memory regions.  Do not blindly split on the 2MB boundary,
			 *  only split it if the two sides of the boundary do not map to the same vhost
			 *  memory region.  This helps ensure we do not exceed the max number of IOVs
			 *  defined by SPDK_VHOST_IOVS_MAX.
			 */
			len = to_boundary;
			while (len < remaining) {
				if (vva + len != (uintptr_t)spdk_vhost_gpa_to_vva(vdev, payload + len)) {
					break;
				}
				len += spdk_min(remaining - len, 0x200000);
			}
		}
		iov[*iov_index].iov_base = (void *)vva;
		iov[*iov_index].iov_len = len;
		remaining -= len;
+5 −18
Original line number Diff line number Diff line
@@ -164,31 +164,18 @@ desc_to_iov_test(void)
	iov_index = 0;
	rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
	CU_ASSERT(rc == 0);
	CU_ASSERT(iov_index == 2);
	CU_ASSERT(iov_index == 1);
	CU_ASSERT(iov[0].iov_base == (void *)0x11F0000);
	CU_ASSERT(iov[0].iov_len == 0x10000);
	CU_ASSERT(iov[1].iov_base == (void *)0x1200000);
	CU_ASSERT(iov[1].iov_len == 0x10000);
	CU_ASSERT(iov[0].iov_len == 0x20000);
	memset(iov, 0, sizeof(iov));

	/* Same test, but ensure it respects the non-zero starting iov_index. */
	iov_index = SPDK_VHOST_IOVS_MAX - 2;
	iov_index = SPDK_VHOST_IOVS_MAX - 1;
	rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
	CU_ASSERT(rc == 0);
	CU_ASSERT(iov_index == SPDK_VHOST_IOVS_MAX);
	CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 2].iov_base == (void *)0x11F0000);
	CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 2].iov_len == 0x10000);
	CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_base == (void *)0x1200000);
	CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_len == 0x10000);
	memset(iov, 0, sizeof(iov));

	/*
	 * This test should fail.  The first part of the descriptor will fit in the last
	 * iov, but the part after the 2MB boundary would overflow.
	 */
	iov_index = SPDK_VHOST_IOVS_MAX - 1;
	rc = spdk_vhost_vring_desc_to_iov(vdev, iov, &iov_index, &desc);
	CU_ASSERT(rc != 0);
	CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_base == (void *)0x11F0000);
	CU_ASSERT(iov[SPDK_VHOST_IOVS_MAX - 1].iov_len == 0x20000);
	memset(iov, 0, sizeof(iov));

	/* Test case where iov spans a vhost memory region. */