Commit 8d7acdaa authored by Pawel Wodkowski's avatar Pawel Wodkowski Committed by Jim Harris
Browse files

vhost: upgrade SPDK vhost code to DPDK 17.05



Also replace the internal DPDK v17.02-based rte_vhost
library with the patched DPDK v17.05-based version.

Change-Id: Ibec0b0746592a1a3911c31642a945ab65495e33e
Signed-off-by: default avatarDariusz Stojaczyk <dariuszx.stojaczyk@intel.com>
Signed-off-by: default avatarPawel Wodkowski <pawelx.wodkowski@intel.com>
parent d391647b
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -34,10 +34,11 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk

CFLAGS += -I.
CFLAGS += $(ENV_CFLAGS)

# These are the DPDK vhost files copied (for now) into SPDK
C_SRCS += fd_man.c socket.c vhost_user.c virtio_net.c vhost.c
C_SRCS += fd_man.c socket.c vhost_user.c vhost.c

LIBNAME = rte_vhost

+111 −110
Original line number Diff line number Diff line
@@ -35,93 +35,95 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/select.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#include <string.h>

#include <rte_common.h>
#include <rte_log.h>

#include "fd_man.h"

/**
 * Returns the index in the fdset for a given fd.
 * If fd is -1, it means to search for a free entry.
 * @return
 *   index for the fd, or -1 if fd isn't in the fdset.
 */
#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)

static int
fdset_find_fd(struct fdset *pfdset, int fd)
get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
{
	int i;

	if (pfdset == NULL)
		return -1;

	for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++)
	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
		;

	return i ==  MAX_FDS ? -1 : i;
	return i;
}

static int
fdset_find_free_slot(struct fdset *pfdset)
static void
fdset_move(struct fdset *pfdset, int dst, int src)
{
	return fdset_find_fd(pfdset, -1);
	pfdset->fd[dst]    = pfdset->fd[src];
	pfdset->rwfds[dst] = pfdset->rwfds[src];
}

static int
fdset_add_fd(struct fdset  *pfdset, int idx, int fd,
	fd_cb rcb, fd_cb wcb, void *dat)
static void
fdset_shrink_nolock(struct fdset *pfdset)
{
	struct fdentry *pfdentry;
	int i;
	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);

	if (pfdset == NULL || idx >= MAX_FDS || fd >= FD_SETSIZE)
		return -1;
	for (i = 0; i < last_valid_idx; i++) {
		if (pfdset->fd[i].fd != -1)
			continue;

	pfdentry = &pfdset->fd[idx];
	pfdentry->fd = fd;
	pfdentry->rcb = rcb;
	pfdentry->wcb = wcb;
	pfdentry->dat = dat;
		fdset_move(pfdset, i, last_valid_idx);
		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
	}
	pfdset->num = last_valid_idx + 1;
}

	return 0;
/*
 * Find deleted fd entries and remove them
 */
static void
fdset_shrink(struct fdset *pfdset)
{
	pthread_mutex_lock(&pfdset->fd_mutex);
	fdset_shrink_nolock(pfdset);
	pthread_mutex_unlock(&pfdset->fd_mutex);
}

/**
 * Fill the read/write fd_set with the fds in the fdset.
 * Returns the index in the fdset for a given fd.
 * @return
 *  the maximum fds filled in the read/write fd_set.
 *   index for the fd, or -1 if fd isn't in the fdset.
 */
static int
fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset)
fdset_find_fd(struct fdset *pfdset, int fd)
{
	struct fdentry *pfdentry;
	int i, maxfds = -1;
	int num = MAX_FDS;
	int i;

	if (pfdset == NULL)
		return -1;
	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
		;

	for (i = 0; i < num; i++) {
		pfdentry = &pfdset->fd[i];
		if (pfdentry->fd != -1) {
			int added = 0;
			if (pfdentry->rcb && rfset) {
				FD_SET(pfdentry->fd, rfset);
				added = 1;
			}
			if (pfdentry->wcb && wfset) {
				FD_SET(pfdentry->fd, wfset);
				added = 1;
			}
			if (added)
				maxfds = pfdentry->fd < maxfds ?
					maxfds : pfdentry->fd;
		}
	return i == pfdset->num ? -1 : i;
}
	return maxfds;

static void
fdset_add_fd(struct fdset *pfdset, int idx, int fd,
	fd_cb rcb, fd_cb wcb, void *dat)
{
	struct fdentry *pfdentry = &pfdset->fd[idx];
	struct pollfd *pfd = &pfdset->rwfds[idx];

	pfdentry->fd  = fd;
	pfdentry->rcb = rcb;
	pfdentry->wcb = wcb;
	pfdentry->dat = dat;

	pfd->fd = fd;
	pfd->events  = rcb ? POLLIN : 0;
	pfd->events |= wcb ? POLLOUT : 0;
	pfd->revents = 0;
}

void
@@ -151,16 +153,17 @@ fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
		return -1;

	pthread_mutex_lock(&pfdset->fd_mutex);

	/* Find a free slot in the list. */
	i = fdset_find_free_slot(pfdset);
	if (i == -1 || fdset_add_fd(pfdset, i, fd, rcb, wcb, dat) < 0) {
	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
	if (i == -1) {
		fdset_shrink_nolock(pfdset);
		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
		if (i == -1) {
			pthread_mutex_unlock(&pfdset->fd_mutex);
			return -2;
		}
	}

	pfdset->num++;

	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
	pthread_mutex_unlock(&pfdset->fd_mutex);

	return 0;
@@ -189,7 +192,6 @@ fdset_del(struct fdset *pfdset, int fd)
			pfdset->fd[i].fd = -1;
			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
			pfdset->fd[i].dat = NULL;
			pfdset->num--;
			i = -1;
		}
		pthread_mutex_unlock(&pfdset->fd_mutex);
@@ -198,24 +200,6 @@ fdset_del(struct fdset *pfdset, int fd)
	return dat;
}

/**
 *  Unregister the fd at the specified slot from the fdset.
 */
static void
fdset_del_slot(struct fdset *pfdset, int index)
{
	if (pfdset == NULL || index < 0 || index >= MAX_FDS)
		return;

	pthread_mutex_lock(&pfdset->fd_mutex);

	pfdset->fd[index].fd = -1;
	pfdset->fd[index].rcb = pfdset->fd[index].wcb = NULL;
	pfdset->fd[index].dat = NULL;
	pfdset->num--;

	pthread_mutex_unlock(&pfdset->fd_mutex);
}

/**
 * This functions runs in infinite blocking loop until there is no fd in
@@ -226,58 +210,68 @@ fdset_del_slot(struct fdset *pfdset, int index)
 * will wait until the flag is reset to zero(which indicates the callback is
 * finished), then it could free the context after fdset_del.
 */
void
fdset_event_dispatch(struct fdset *pfdset)
void *
fdset_event_dispatch(void *arg)
{
	fd_set rfds, wfds;
	int i, maxfds;
	int i;
	struct pollfd *pfd;
	struct fdentry *pfdentry;
	int num = MAX_FDS;
	fd_cb rcb, wcb;
	void *dat;
	int fd;
	int fd, numfds;
	int remove1, remove2;
	int ret;
	int need_shrink;
	struct fdset *pfdset = arg;

	if (pfdset == NULL)
		return;
		return NULL;

	while (1) {
		struct timeval tv;
		tv.tv_sec = 1;
		tv.tv_usec = 0;
		FD_ZERO(&rfds);
		FD_ZERO(&wfds);
		pthread_mutex_lock(&pfdset->fd_mutex);

		maxfds = fdset_fill(&rfds, &wfds, pfdset);

		pthread_mutex_unlock(&pfdset->fd_mutex);

		/*
		 * When select is blocked, other threads might unregister
		 * When poll is blocked, other threads might unregister
		 * listenfds from and register new listenfds into fdset.
		 * When select returns, the entries for listenfds in the fdset
		 * When poll returns, the entries for listenfds in the fdset
		 * might have been updated. It is ok if there is unwanted call
		 * for new listenfds.
		 */
		ret = select(maxfds + 1, &rfds, &wfds, NULL, &tv);
		if (ret <= 0)
			continue;
		pthread_mutex_lock(&pfdset->fd_mutex);
		numfds = pfdset->num;
		pthread_mutex_unlock(&pfdset->fd_mutex);

		for (i = 0; i < num; i++) {
			remove1 = remove2 = 0;
		poll(pfdset->rwfds, numfds, 1000 /* millisecs */);

		need_shrink = 0;
		for (i = 0; i < numfds; i++) {
			pthread_mutex_lock(&pfdset->fd_mutex);

			pfdentry = &pfdset->fd[i];
			fd = pfdentry->fd;
			pfd = &pfdset->rwfds[i];

			if (fd < 0) {
				need_shrink = 1;
				pthread_mutex_unlock(&pfdset->fd_mutex);
				continue;
			}

			if (!pfd->revents) {
				pthread_mutex_unlock(&pfdset->fd_mutex);
				continue;
			}

			remove1 = remove2 = 0;

			rcb = pfdentry->rcb;
			wcb = pfdentry->wcb;
			dat = pfdentry->dat;
			pfdentry->busy = 1;

			pthread_mutex_unlock(&pfdset->fd_mutex);
			if (fd >= 0 && FD_ISSET(fd, &rfds) && rcb)

			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
				rcb(fd, dat, &remove1);
			if (fd >= 0 && FD_ISSET(fd, &wfds) && wcb)
			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
				wcb(fd, dat, &remove2);
			pfdentry->busy = 0;
			/*
@@ -292,8 +286,15 @@ fdset_event_dispatch(struct fdset *pfdset)
			 * listen fd in another thread, we couldn't call
			 * fd_set_del.
			 */
			if (remove1 || remove2)
				fdset_del_slot(pfdset, i);
			if (remove1 || remove2) {
				pfdentry->fd = -1;
				need_shrink = 1;
			}
		}

		if (need_shrink)
			fdset_shrink(pfdset);
	}

	return NULL;
}
+3 −1
Original line number Diff line number Diff line
@@ -35,6 +35,7 @@
#define _FD_MAN_H_
#include <stdint.h>
#include <pthread.h>
#include <poll.h>

#define MAX_FDS 1024

@@ -49,6 +50,7 @@ struct fdentry {
};

struct fdset {
	struct pollfd rwfds[MAX_FDS];
	struct fdentry fd[MAX_FDS];
	pthread_mutex_t fd_mutex;
	int num;	/* current fd number of this fdset */
@@ -62,6 +64,6 @@ int fdset_add(struct fdset *pfdset, int fd,

void *fdset_del(struct fdset *pfdset, int fd);

void fdset_event_dispatch(struct fdset *pfdset);
void *fdset_event_dispatch(void *arg);

#endif
+0 −193
Original line number Diff line number Diff line
/*-
 *   BSD LICENSE
 *
 *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef _VIRTIO_NET_H_
#define _VIRTIO_NET_H_

/**
 * @file
 * Interface to vhost net
 */

#include <stdint.h>
#include <linux/vhost.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_net.h>
#include <sys/eventfd.h>
#include <sys/socket.h>
#include <linux/if.h>

#include <rte_memory.h>
#include <rte_mempool.h>
#include <rte_ether.h>

#define RTE_VHOST_USER_CLIENT		(1ULL << 0)
#define RTE_VHOST_USER_NO_RECONNECT	(1ULL << 1)
#define RTE_VHOST_USER_DEQUEUE_ZERO_COPY	(1ULL << 2)

/* Enum for virtqueue management. */
enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};

/**
 * Device and vring operations.
 */
struct virtio_net_device_ops {
	int (*new_device)(int vid);		/**< Add device. */
	void (*destroy_device)(int vid);	/**< Remove device. */

	int (*vring_state_changed)(int vid, uint16_t queue_id, int enable);	/**< triggered when a vring is enabled or disabled */

	void *reserved[5]; /**< Reserved for future extension */
};

/**
 *  Disable features in feature_mask. Returns 0 on success.
 */
int rte_vhost_feature_disable(uint64_t feature_mask);

/**
 *  Enable features in feature_mask. Returns 0 on success.
 */
int rte_vhost_feature_enable(uint64_t feature_mask);

/* Returns currently supported vhost features */
uint64_t rte_vhost_feature_get(void);

int rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable);

/**
 * Register vhost driver. path could be different for multiple
 * instance support.
 */
int rte_vhost_driver_register(const char *path, uint64_t flags);

/* Unregister vhost driver. This is only meaningful to vhost user. */
int rte_vhost_driver_unregister(const char *path);

/* Register callbacks. */
int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const);
/* Start vhost driver session blocking loop. */
int rte_vhost_driver_session_start(void);

/**
 * Get the numa node from which the virtio net device's memory
 * is allocated.
 *
 * @param vid
 *  virtio-net device ID
 *
 * @return
 *  The numa node, -1 on failure
 */
int rte_vhost_get_numa_node(int vid);

/**
 * Get the number of queues the device supports.
 *
 * @param vid
 *  virtio-net device ID
 *
 * @return
 *  The number of queues, 0 on failure
 */
uint32_t rte_vhost_get_queue_num(int vid);

/**
 * Get the virtio net device's ifname, which is the vhost-user socket
 * file path.
 *
 * @param vid
 *  virtio-net device ID
 * @param buf
 *  The buffer to stored the queried ifname
 * @param len
 *  The length of buf
 *
 * @return
 *  0 on success, -1 on failure
 */
int rte_vhost_get_ifname(int vid, char *buf, size_t len);

/**
 * Get how many avail entries are left in the queue
 *
 * @param vid
 *  virtio-net device ID
 * @param queue_id
 *  virtio queue index
 *
 * @return
 *  num of avail entires left
 */
uint16_t rte_vhost_avail_entries(int vid, uint16_t queue_id);

/**
 * This function adds buffers to the virtio devices RX virtqueue. Buffers can
 * be received from the physical port or from another virtual device. A packet
 * count is returned to indicate the number of packets that were succesfully
 * added to the RX queue.
 * @param vid
 *  virtio-net device ID
 * @param queue_id
 *  virtio queue index in mq case
 * @param pkts
 *  array to contain packets to be enqueued
 * @param count
 *  packets num to be enqueued
 * @return
 *  num of packets enqueued
 */
uint16_t rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
	struct rte_mbuf **pkts, uint16_t count);

/**
 * This function gets guest buffers from the virtio device TX virtqueue,
 * construct host mbufs, copies guest buffer content to host mbufs and
 * store them in pkts to be processed.
 * @param vid
 *  virtio-net device
 * @param queue_id
 *  virtio queue index in mq case
 * @param mbuf_pool
 *  mbuf_pool where host mbuf is allocated.
 * @param pkts
 *  array to contain packets to be dequeued
 * @param count
 *  packets num to be dequeued
 * @return
 *  num of packets dequeued
 */
uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count);

#endif /* _VIRTIO_NET_H_ */
Loading