Commit fbc53ae3 authored by Changpeng Liu's avatar Changpeng Liu Committed by Darek Stojaczyk
Browse files

vhost/nvme: add shared BAR space to enable old Guest kernel



For some old Linux Guest kernels, the new NVMe 1.3 feature: shadow
doorbell buffer is not enabled, while here, make a dummy BAR region
inside slave target, when Guest submits a new request, the doorbell
value will be write to the shared memory between Guest and vhost
target, so that the existing vhost target can support both new
Linux Guest kernel(newer than 4.12) and old Guest kernel.

Also, the shared BAR space can be used in future which we can move
ADMIN queue processing into SPDK vhost target, with this feature,
the QEMU driver will become very small and easy for upstreaming.

Change-Id: I9463e9f13421368f43bfe4076facddd119f4552e
Signed-off-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/419157


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarDarek Stojaczyk <dariusz.stojaczyk@intel.com>
parent 6569a529
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -106,6 +106,7 @@ struct vhost_device_ops {
	int (*features_changed)(int vid, uint64_t features);
	int (*vhost_nvme_admin_passthrough)(int vid, void *cmd, void *cqe, void *buf);
	int (*vhost_nvme_set_cq_call)(int vid, uint16_t qid, int fd);
	int (*vhost_nvme_set_bar_mr)(int vid, void *bar_addr, uint64_t bar_size);
	int (*vhost_nvme_get_cap)(int vid, uint64_t *cap);

	int (*new_connection)(int vid);
+2 −0
Original line number Diff line number Diff line
@@ -200,6 +200,8 @@ struct virtio_net {
	uint32_t		max_guest_pages;
	struct guest_page       *guest_pages;
	int                     has_new_mem_table;
	void			*bar_addr;
	uint64_t		bar_size;
	struct VhostUserMemory  mem_table;
	int                     mem_table_fds[VHOST_MEMORY_MAX_NREGIONS];
} __rte_cache_aligned;
+94 −1
Original line number Diff line number Diff line
@@ -84,7 +84,8 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
	[VHOST_USER_NVME_SET_CQ_CALL] = "VHOST_USER_NVME_SET_CQ_CALL",
	[VHOST_USER_NVME_GET_CAP] = "VHOST_USER_NVME_GET_CAP",
	[VHOST_USER_NVME_START_STOP] = "VHOST_USER_NVME_START_STOP",
	[VHOST_USER_NVME_IO_CMD] = "VHOST_USER_NVME_IO_CMD"
	[VHOST_USER_NVME_IO_CMD] = "VHOST_USER_NVME_IO_CMD",
	[VHOST_USER_NVME_SET_BAR_MR] = "VHOST_USER_NVME_SET_BAR_MR"
};

static uint64_t
@@ -139,6 +140,11 @@ vhost_backend_cleanup(struct virtio_net *dev)
		munmap((void *)(uintptr_t)dev->log_addr, dev->log_size);
		dev->log_addr = 0;
	}
	if (dev->bar_addr) {
		munmap((void *)(uintptr_t)dev->bar_addr, dev->bar_size);
		dev->bar_addr = NULL;
		dev->bar_size = 0;
	}
}

/*
@@ -1119,6 +1125,90 @@ vhost_user_nvme_get_cap(struct virtio_net *dev, uint64_t *cap)
	return -1;
}

static int
vhost_user_nvme_set_bar_mr(struct virtio_net *dev, struct VhostUserMsg *pmsg)
{
	struct VhostUserMemory mem_table;
	int fd = pmsg->fds[0];
	void *mmap_addr;
	uint64_t mmap_size;
	uint64_t mmap_offset;
	uint64_t alignment;
	struct rte_vhost_mem_region reg;
	int ret = 0;

	memcpy(&mem_table, &pmsg->payload.memory, sizeof(mem_table));

	reg.guest_phys_addr = mem_table.regions[0].guest_phys_addr;
	reg.guest_user_addr = mem_table.regions[0].userspace_addr;
	reg.size            = mem_table.regions[0].memory_size;
	reg.fd              = fd;
	mmap_offset = mem_table.regions[0].mmap_offset;
	mmap_size   = reg.size + mmap_offset;

	alignment = get_blk_size(fd);
	if (alignment == (uint64_t)-1) {
		RTE_LOG(ERR, VHOST_CONFIG,
			"couldn't get hugepage size through fstat\n");
			return -1;
	}
	mmap_size = RTE_ALIGN_CEIL(mmap_size, alignment);

	mmap_addr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
			 MAP_SHARED | MAP_POPULATE, fd, 0);

	if (mmap_addr == MAP_FAILED) {
		RTE_LOG(ERR, VHOST_CONFIG,
			"mmap region failed.\n");
		return -1;
	}

	if (madvise(mmap_addr, mmap_size, MADV_DONTDUMP) != 0) {
		RTE_LOG(INFO, VHOST_CONFIG,
			"MADV_DONTDUMP advice setting failed.\n");
	}

	reg.mmap_addr = mmap_addr;
	reg.mmap_size = mmap_size;
	reg.host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
				      mmap_offset;

	RTE_LOG(INFO, VHOST_CONFIG,
			"BAR memory region %u, size: 0x%" PRIx64 "\n"
			"\t guest physical addr: 0x%" PRIx64 "\n"
			"\t guest virtual  addr: 0x%" PRIx64 "\n"
			"\t host  virtual  addr: 0x%" PRIx64 "\n"
			"\t mmap addr : 0x%" PRIx64 "\n"
			"\t mmap size : 0x%" PRIx64 "\n"
			"\t mmap align: 0x%" PRIx64 "\n"
			"\t mmap off  : 0x%" PRIx64 "\n",
			0, reg.size,
			reg.guest_phys_addr,
			reg.guest_user_addr,
			reg.host_user_addr,
			(uint64_t)(uintptr_t)mmap_addr,
			mmap_size,
			alignment,
			mmap_offset);

	if (dev->bar_addr) {
		munmap((void *)(uintptr_t)dev->bar_addr, dev->bar_size);
	}
	dev->bar_addr = (void *)(uintptr_t)reg.host_user_addr;
	dev->bar_size = reg.mmap_size;

	if (dev->notify_ops->vhost_nvme_set_bar_mr) {
		ret = dev->notify_ops->vhost_nvme_set_bar_mr(dev->vid, dev->bar_addr, dev->bar_size);
		if (ret) {
			munmap((void *)(uintptr_t)dev->bar_addr, dev->bar_size);
			dev->bar_addr = NULL;
			dev->bar_size = 0;
		}
	}

	return ret;
}

int
vhost_user_msg_handler(int vid, int fd)
{
@@ -1243,6 +1333,9 @@ vhost_user_msg_handler(int vid, int fd)
		is_submission_queue = (msg.payload.nvme_io.queue_type == VHOST_USER_NVME_SUBMISSION_QUEUE) ? true : false;
		vhost_user_nvme_io_request_passthrough(dev, qid, tail_head, is_submission_queue);
		break;
	case VHOST_USER_NVME_SET_BAR_MR:
		ret = vhost_user_nvme_set_bar_mr(dev, &msg);
		break;
	case VHOST_USER_GET_FEATURES:
		msg.payload.u64 = vhost_user_get_features(dev);
		msg.size = sizeof(msg.payload.u64);
+1 −0
Original line number Diff line number Diff line
@@ -91,6 +91,7 @@ typedef enum VhostUserRequest {
	VHOST_USER_NVME_GET_CAP = 82,
	VHOST_USER_NVME_START_STOP = 83,
	VHOST_USER_NVME_IO_CMD = 84,
	VHOST_USER_NVME_SET_BAR_MR = 85,
	VHOST_USER_MAX
} VhostUserRequest;

+1 −0
Original line number Diff line number Diff line
@@ -82,6 +82,7 @@ const struct vhost_device_ops g_spdk_vhost_ops = {
	.vhost_nvme_admin_passthrough = spdk_vhost_nvme_admin_passthrough,
	.vhost_nvme_set_cq_call = spdk_vhost_nvme_set_cq_call,
	.vhost_nvme_get_cap = spdk_vhost_nvme_get_cap,
	.vhost_nvme_set_bar_mr = spdk_vhost_nvme_set_bar_mr,
};

static TAILQ_HEAD(, spdk_vhost_dev) g_spdk_vhost_devices = TAILQ_HEAD_INITIALIZER(
Loading