Commit 3d1b6055 authored by Jim Harris's avatar Jim Harris
Browse files

env: add spdk_vtophys_register/unregister



These APIs can be used to register/unregister regions
of pinned, huge page memory that are separate from
huge page memory allocated by the default DPDK
allocations.  These APIs will be used by an upcoming
SPDK vhost-scsi target to enable SPDK to target
NVMe DMA operations directly to VM memory that has
been allocated by QEMU using pinned huge pages.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I649a4adeeb758b29bd29cd42c8872eed3d5d6ce9
parent e2abf192
Loading
Loading
Loading
Loading
+13 −0
Original line number Diff line number Diff line
@@ -154,6 +154,19 @@ void spdk_delay_us(unsigned int us);

uint64_t spdk_vtophys(void *buf);

/**
 * Register the specified memory region for vtophys address translation.
 * The memory region must map to pinned huge pages (2MB or greater).
 */
void spdk_vtophys_register(void *vaddr, uint64_t len);

/**
 * Unregister the specified memory region from vtophys address translation.
 * The caller must ensure all in-flight DMA operations to this memory region
 *  are completed or cancelled before calling this function.
 */
void spdk_vtophys_unregister(void *vaddr, uint64_t len);

enum spdk_pci_device_type {
	SPDK_PCI_DEVICE_NVME,
	SPDK_PCI_DEVICE_IOAT,
+138 −4
Original line number Diff line number Diff line
@@ -64,9 +64,16 @@
#define MAP_128TB_IDX(vfn_2mb)	((vfn_2mb) >> (SHIFT_1GB - SHIFT_2MB))
#define MAP_1GB_IDX(vfn_2mb)	((vfn_2mb) & ((1ULL << (SHIFT_1GB - SHIFT_2MB + 1)) - 1))

/* Max value for a 48-bit PFN. */
#define INVALID_PFN	(0xFFFFFFFFFFFFULL)

/* Max value for a 16-bit ref count. */
#define VTOPHYS_MAX_REF_COUNT (0xFFFF)

/* Physical page frame number of a single 2MB page. */
struct map_2mb {
	uint64_t pfn_2mb;
	uint64_t pfn_2mb : 48;
	uint64_t ref_count : 16;
};

/* Second-level map table indexed by bits [21..29] of the virtual address.
@@ -123,6 +130,25 @@ vtophys_get_map(uint64_t vfn_2mb)
	return map_2mb;
}

static uint64_t
vtophys_get_dpdk_paddr(void *vaddr)
{
	uintptr_t paddr;

	paddr = rte_mem_virt2phy(vaddr);
	if (paddr == 0) {
		/*
		 * The vaddr was valid but returned 0.  Touch the page
		 *  to ensure a backing page gets assigned, then call
		 *  rte_mem_virt2phy() again.
		 */
		rte_atomic64_read((rte_atomic64_t *)vaddr);
		paddr = rte_mem_virt2phy(vaddr);
	}

	return paddr;
}

static uint64_t
vtophys_get_pfn_2mb(uint64_t vfn_2mb)
{
@@ -149,7 +175,115 @@ vtophys_get_pfn_2mb(uint64_t vfn_2mb)
	}

	fprintf(stderr, "could not find 2MB vfn 0x%jx in DPDK mem config\n", vfn_2mb);
	return -1;
	return INVALID_PFN;
}

static void
_spdk_vtophys_register_one(uint64_t vfn_2mb)
{
	struct map_2mb *map_2mb;
	void *vaddr;
	uint64_t paddr;

	map_2mb = vtophys_get_map(vfn_2mb);
	if (!map_2mb) {
		fprintf(stderr, "could not get vfn_2mb %p map\n", (void *)vfn_2mb);
		return;
	}

	if (map_2mb->pfn_2mb == INVALID_PFN) {
		vaddr = (void *)(vfn_2mb << SHIFT_2MB);
		paddr = vtophys_get_dpdk_paddr(vaddr);
		if (paddr == RTE_BAD_PHYS_ADDR) {
			fprintf(stderr, "could not get phys addr for %p\n", vaddr);
			return;
		}

		map_2mb->pfn_2mb = paddr >> SHIFT_2MB;
		map_2mb->ref_count = 0;
	}

	if (map_2mb->ref_count == VTOPHYS_MAX_REF_COUNT) {
		fprintf(stderr, "ref count for %p already at %d\n",
			(void *)(vfn_2mb << SHIFT_2MB), VTOPHYS_MAX_REF_COUNT);
		return;
	}

	map_2mb->ref_count++;
}

static void
_spdk_vtophys_unregister_one(uint64_t vfn_2mb)
{
	struct map_2mb *map_2mb;

	map_2mb = vtophys_get_map(vfn_2mb);
	if (!map_2mb) {
		fprintf(stderr, "could not get vfn_2mb %p map\n", (void *)vfn_2mb);
		return;
	}

	if (map_2mb->pfn_2mb == INVALID_PFN || map_2mb->ref_count == 0) {
		fprintf(stderr, "vaddr %p not registered\n", (void *)(vfn_2mb << SHIFT_2MB));
		return;
	}

	map_2mb->ref_count--;
	if (map_2mb->ref_count == 0) {
		map_2mb->pfn_2mb = INVALID_PFN;
	}
}

void
spdk_vtophys_register(void *vaddr, uint64_t len)
{
	uint64_t vfn_2mb;

	if ((uintptr_t)vaddr & ~MASK_128TB) {
		printf("invalid usermode virtual address %p\n", vaddr);
		return;
	}

	if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) {
		fprintf(stderr, "invalid %s parameters, vaddr=%p len=%ju\n",
			__func__, vaddr, len);
		return;
	}

	vfn_2mb = (uintptr_t)vaddr >> SHIFT_2MB;
	len = len >> SHIFT_2MB;

	while (len > 0) {
		_spdk_vtophys_register_one(vfn_2mb);
		vfn_2mb++;
		len--;
	}
}

void
spdk_vtophys_unregister(void *vaddr, uint64_t len)
{
	uint64_t vfn_2mb;

	if ((uintptr_t)vaddr & ~MASK_128TB) {
		printf("invalid usermode virtual address %p\n", vaddr);
		return;
	}

	if (((uintptr_t)vaddr & MASK_2MB) || (len & MASK_2MB)) {
		fprintf(stderr, "invalid %s parameters, vaddr=%p len=%ju\n",
			__func__, vaddr, len);
		return;
	}

	vfn_2mb = (uintptr_t)vaddr >> SHIFT_2MB;
	len = len >> SHIFT_2MB;

	while (len > 0) {
		_spdk_vtophys_unregister_one(vfn_2mb);
		vfn_2mb++;
		len--;
	}
}

uint64_t
@@ -172,9 +306,9 @@ spdk_vtophys(void *buf)
	}

	pfn_2mb = map_2mb->pfn_2mb;
	if (pfn_2mb == SPDK_VTOPHYS_ERROR) {
	if (pfn_2mb == INVALID_PFN) {
		pfn_2mb = vtophys_get_pfn_2mb(vfn_2mb);
		if (pfn_2mb == SPDK_VTOPHYS_ERROR) {
		if (pfn_2mb == INVALID_PFN) {
			return SPDK_VTOPHYS_ERROR;
		}
		map_2mb->pfn_2mb = pfn_2mb;