Commit 59237d22 authored by Jin Yu's avatar Jin Yu Committed by Tomasz Zawadzki
Browse files

lib: add a hotplug lib for device



The hotplug lib can be used for pcie devices
such as nvme, virtio_blk and virtio scsi.

For the sigbus handler, there is only one in a
process and it should handle all the devices.

And align nvme to the hotplug lib

Add the ADD uevent support for allowing the
device hotplug.

Change-Id: I82cd3b4af38ca24cee8b041a215a85c4a69e60f7
Signed-off-by: default avatarJin Yu <jin.yu@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/5653


Community-CI: Broadcom CI
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatar <dongx.yi@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
parent 1c3bc9d6
Loading
Loading
Loading
Loading
+50 −0
Original line number Diff line number Diff line
@@ -1333,6 +1333,56 @@ int spdk_mem_reserve(void *vaddr, size_t len);
 */
int spdk_mem_get_fd_and_offset(void *vaddr, uint64_t *offset);

enum spdk_pci_event_type {
	SPDK_UEVENT_ADD = 0,
	SPDK_UEVENT_REMOVE = 1,
};

struct spdk_pci_event {
	enum spdk_pci_event_type action;
	struct spdk_pci_addr traddr;
};

typedef void (*spdk_pci_error_handler)(siginfo_t *info, void *ctx);

/**
 * Begin listening for PCI bus events. This is used to detect hot-insert and
 * hot-remove events. Once the system is listening, events may be retrieved
 * by calling spdk_pci_get_event() periodically.
 *
 * \return negative errno on failure, otherwise,  return a file descriptor
 * that may be later passed to spdk_pci_get_event().
 */
int spdk_pci_event_listen(void);

/**
 * Get the next PCI bus event.
 *
 * \param fd A file descriptor returned by spdk_pci_event_listen()
 * \param event An event on the PCI bus
 *
 * \return Negative errno on failure. 0 for no event. A positive number
 * when an event has been returned
 */
int spdk_pci_get_event(int fd, struct spdk_pci_event *event);

/**
 * Register a signal handler to handle bus errors on the PCI bus
 *
 * \param sighandler Signal bus handler of the PCI bus
 * \param ctx The arg pass to the registered signal bus handler.
 *
 * \return negative errno on failure, otherwise it means successful
 */
int spdk_pci_register_error_handler(spdk_pci_error_handler sighandler, void *ctx);

/**
 * Register a signal handler to handle bus errors on the PCI bus
 *
 * \param sighandler Signal bus handler of the PCI bus
 */
void spdk_pci_unregister_error_handler(spdk_pci_error_handler sighandler);

#ifdef __cplusplus
}
#endif
+1 −0
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@ SO_MINOR := 0
CFLAGS += $(ENV_CFLAGS)
C_SRCS = env.c memory.c pci.c init.c threads.c
C_SRCS += pci_ioat.c pci_virtio.c pci_vmd.c pci_idxd.c
C_SRCS += pci_event.c sigbus_handler.c
LIBNAME = env_dpdk

SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_env_dpdk.map)
+92 −63
Original line number Diff line number Diff line
@@ -35,8 +35,7 @@
#include "spdk/string.h"

#include "spdk/log.h"

#include "nvme_uevent.h"
#include "spdk/env.h"

#ifdef __linux__

@@ -46,12 +45,12 @@
#define SPDK_UEVENT_RECVBUF_SIZE 1024 * 1024

int
nvme_uevent_connect(void)
spdk_pci_event_listen(void)
{
	struct sockaddr_nl addr;
	int netlink_fd;
	int size = SPDK_UEVENT_RECVBUF_SIZE;
	int flag;
	int flag, rc;

	memset(&addr, 0, sizeof(addr));
	addr.nl_family = AF_NETLINK;
@@ -60,58 +59,79 @@ nvme_uevent_connect(void)

	netlink_fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_KOBJECT_UEVENT);
	if (netlink_fd < 0) {
		return -1;
		SPDK_ERRLOG("Failed to create netlink socket\n");
		return netlink_fd;
	}

	setsockopt(netlink_fd, SOL_SOCKET, SO_RCVBUFFORCE, &size, sizeof(size));
	if (setsockopt(netlink_fd, SOL_SOCKET, SO_RCVBUFFORCE, &size, sizeof(size)) < 0) {
		rc = errno;
		SPDK_ERRLOG("Failed to set socket option\n");
		return -rc;
	}

	flag = fcntl(netlink_fd, F_GETFL);
	if (flag < 0) {
		rc = errno;
		SPDK_ERRLOG("Failed to get socket flag, fd: %d\n", netlink_fd);
		close(netlink_fd);
		return -rc;
	}

	if (fcntl(netlink_fd, F_SETFL, flag | O_NONBLOCK) < 0) {
		SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%s)\n", netlink_fd,
			    spdk_strerror(errno));
		rc = errno;
		SPDK_ERRLOG("Fcntl can't set nonblocking mode for socket, fd: %d\n", netlink_fd);
		close(netlink_fd);
		return -1;
		return -rc;
	}

	if (bind(netlink_fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
		rc = errno;
		SPDK_ERRLOG("Failed to bind the netlink\n");
		close(netlink_fd);
		return -1;
		return -rc;
	}

	return netlink_fd;
}

/* Note: We only parse the event from uio subsystem and will ignore
/* Note: We parse the event from uio and vfio subsystem and will ignore
 *       all the event from other subsystem. the event from uio subsystem
 *       as below:
 *       action: "add" or "remove"
 *       subsystem: "uio"
 *       dev_path: "/devices/pci0000:80/0000:80:01.0/0000:81:00.0/uio/uio0"
 *       VFIO subsystem add event:
 *       ACTION=bind
 *       DRIVER=vfio-pci
 *       PCI_SLOT_NAME=0000:d8:00.0
 */
static int
parse_event(const char *buf, struct spdk_uevent *event)
parse_subsystem_event(const char *buf, struct spdk_pci_event *event)
{
	char action[SPDK_UEVENT_MSG_LEN];
	char subsystem[SPDK_UEVENT_MSG_LEN];
	char action[SPDK_UEVENT_MSG_LEN];
	char dev_path[SPDK_UEVENT_MSG_LEN];
	char driver[SPDK_UEVENT_MSG_LEN];
	char vfio_pci_addr[SPDK_UEVENT_MSG_LEN];
	char *pci_address, *tmp;
	int rc;

	memset(action, 0, SPDK_UEVENT_MSG_LEN);
	memset(subsystem, 0, SPDK_UEVENT_MSG_LEN);
	memset(action, 0, SPDK_UEVENT_MSG_LEN);
	memset(dev_path, 0, SPDK_UEVENT_MSG_LEN);
	memset(driver, 0, SPDK_UEVENT_MSG_LEN);
	memset(vfio_pci_addr, 0, SPDK_UEVENT_MSG_LEN);

	while (*buf) {
		if (!strncmp(buf, "ACTION=", 7)) {
		if (!strncmp(buf, "SUBSYSTEM=", 10)) {
			buf += 10;
			snprintf(subsystem, sizeof(subsystem), "%s", buf);
		} else if (!strncmp(buf, "ACTION=", 7)) {
			buf += 7;
			snprintf(action, sizeof(action), "%s", buf);
		} else if (!strncmp(buf, "DEVPATH=", 8)) {
			buf += 8;
			snprintf(dev_path, sizeof(dev_path), "%s", buf);
		} else if (!strncmp(buf, "SUBSYSTEM=", 10)) {
			buf += 10;
			snprintf(subsystem, sizeof(subsystem), "%s", buf);
		} else if (!strncmp(buf, "DRIVER=", 7)) {
			buf += 7;
			snprintf(driver, sizeof(driver), "%s", buf);
@@ -119,102 +139,111 @@ parse_event(const char *buf, struct spdk_uevent *event)
			buf += 14;
			snprintf(vfio_pci_addr, sizeof(vfio_pci_addr), "%s", buf);
		}

		while (*buf++)
			;
	}

	if (!strncmp(subsystem, "uio", 3)) {
		char *pci_address, *tmp;
		struct spdk_pci_addr pci_addr;

		event->subsystem = SPDK_NVME_UEVENT_SUBSYSTEM_UIO;
		if (!strncmp(action, "add", 3)) {
			event->action = SPDK_NVME_UEVENT_ADD;
		}
		if (!strncmp(action, "remove", 6)) {
			event->action = SPDK_NVME_UEVENT_REMOVE;
			event->action = SPDK_UEVENT_REMOVE;
		} else if (!strncmp(action, "add", 3)) {
			/* Support the ADD UEVENT for the device allow */
			event->action = SPDK_UEVENT_ADD;
		} else {
			return 0;
		}

		tmp = strstr(dev_path, "/uio/");
		if (!tmp) {
			SPDK_ERRLOG("Invalid format of uevent: %s\n", dev_path);
			return -1;
			return -EBADMSG;
		}
		memset(tmp, 0, SPDK_UEVENT_MSG_LEN - (tmp - dev_path));

		pci_address = strrchr(dev_path, '/');
		if (!pci_address) {
			SPDK_ERRLOG("Not found NVMe BDF in uevent: %s\n", dev_path);
			return -1;
			SPDK_ERRLOG("Not found PCI device BDF in uevent: %s\n", dev_path);
			return -EBADMSG;
		}
		pci_address++;
		if (spdk_pci_addr_parse(&pci_addr, pci_address) != 0) {
			SPDK_ERRLOG("Invalid format for NVMe BDF: %s\n", pci_address);
			return -1;
		}
		spdk_pci_addr_fmt(event->traddr, sizeof(event->traddr), &pci_addr);
	} else if (!strncmp(driver, "vfio-pci", 8)) {
		struct spdk_pci_addr pci_addr;

		event->subsystem = SPDK_NVME_UEVENT_SUBSYSTEM_VFIO;
		if (!strncmp(action, "bind", 4)) {
			event->action = SPDK_NVME_UEVENT_ADD;
		rc = spdk_pci_addr_parse(&event->traddr, pci_address);
		if (rc != 0) {
			SPDK_ERRLOG("Invalid format for PCI device BDF: %s\n", pci_address);
			return rc;
		}
		if (!strncmp(action, "remove", 6)) {
			event->action = SPDK_NVME_UEVENT_REMOVE;
		}
		if (spdk_pci_addr_parse(&pci_addr, vfio_pci_addr) != 0) {
			SPDK_ERRLOG("Invalid format for NVMe BDF: %s\n", vfio_pci_addr);
			return -1;

		return 1;
	}
		spdk_pci_addr_fmt(event->traddr, sizeof(event->traddr), &pci_addr);

	if (!strncmp(driver, "vfio-pci", 8)) {
		if (!strncmp(action, "bind", 4)) {
			/* Support the ADD UEVENT for the device allow */
			event->action = SPDK_UEVENT_ADD;
		} else {
		event->subsystem = SPDK_NVME_UEVENT_SUBSYSTEM_UNRECOGNIZED;
			/* Only need to support add event.
			 * VFIO hotplug interface is "pci.c:pci_device_rte_dev_event".
			 * VFIO informs the userspace hotplug through vfio req notifier interrupt.
			 * The app needs to free the device userspace driver resource first then
			 * the OS remove the device VFIO driver and boardcast the VFIO uevent.
			 */
			return 0;
		}

		rc = spdk_pci_addr_parse(&event->traddr, vfio_pci_addr);
		if (rc != 0) {
			SPDK_ERRLOG("Invalid format for PCI device BDF: %s\n", vfio_pci_addr);
			return rc;
		}

		return 1;
	}

	return 0;
}

int
nvme_get_uevent(int fd, struct spdk_uevent *uevent)
spdk_pci_get_event(int fd, struct spdk_pci_event *event)
{
	int ret;
	char buf[SPDK_UEVENT_MSG_LEN];

	memset(uevent, 0, sizeof(struct spdk_uevent));
	memset(buf, 0, SPDK_UEVENT_MSG_LEN);
	memset(event, 0, sizeof(*event));

	ret = recv(fd, buf, SPDK_UEVENT_MSG_LEN - 1, MSG_DONTWAIT);
	if (ret > 0) {
		return parse_event(buf, uevent);
	}

	if (ret < 0) {
		return parse_subsystem_event(buf, event);
	} else if (ret < 0) {
		if (errno == EAGAIN || errno == EWOULDBLOCK) {
			return 0;
		} else {
			SPDK_ERRLOG("Socket read error(%d): %s\n", errno, spdk_strerror(errno));
			return -1;
		}
			ret = errno;
			SPDK_ERRLOG("Socket read error %d\n", errno);
			return -ret;
		}

	} else {
		/* connection closed */
	if (ret == 0) {
		return -1;
		return -ENOTCONN;
	}

	return 0;
}

#else /* Not Linux */

int
nvme_uevent_connect(void)
spdk_pci_event_listen(void)
{
	return -1;
	SPDK_ERRLOG("Non-Linux does not support this operation\n");
	return -ENOTSUP;
}

int
nvme_get_uevent(int fd, struct spdk_uevent *uevent)
spdk_pci_get_event(int fd, struct spdk_pci_event *event)
{
	return -1;
	SPDK_ERRLOG("Non-Linux does not support this operation\n");
	return -ENOTSUP;
}
#endif
+137 −0
Original line number Diff line number Diff line
@@ -31,32 +31,107 @@
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/** \file
 * SPDK uevent
 */

#include "spdk/stdinc.h"
#include "spdk/env.h"
#include "spdk/nvmf_spec.h"

#ifndef SPDK_UEVENT_H_
#define SPDK_UEVENT_H_
#include "spdk/log.h"

#define SPDK_NVME_UEVENT_SUBSYSTEM_UNRECOGNIZED 0
#define SPDK_NVME_UEVENT_SUBSYSTEM_UIO 1
#define SPDK_NVME_UEVENT_SUBSYSTEM_VFIO 2
struct sigbus_handler {
	spdk_pci_error_handler func;
	void *ctx;

enum spdk_nvme_uevent_action {
	SPDK_NVME_UEVENT_ADD = 0,
	SPDK_NVME_UEVENT_REMOVE = 1,
	TAILQ_ENTRY(sigbus_handler) tailq;
};

struct spdk_uevent {
	enum spdk_nvme_uevent_action action;
	int subsystem;
	char traddr[SPDK_NVMF_TRADDR_MAX_LEN + 1];
};
static pthread_mutex_t g_sighandler_mutex = PTHREAD_MUTEX_INITIALIZER;
static TAILQ_HEAD(, sigbus_handler) g_sigbus_handler =
	TAILQ_HEAD_INITIALIZER(g_sigbus_handler);

static void
sigbus_fault_sighandler(int signum, siginfo_t *info, void *ctx)
{
	struct sigbus_handler *sigbus_handler;

	pthread_mutex_lock(&g_sighandler_mutex);
	TAILQ_FOREACH(sigbus_handler, &g_sigbus_handler, tailq) {
		sigbus_handler->func(info, sigbus_handler->ctx);
	}
	pthread_mutex_unlock(&g_sighandler_mutex);
}

__attribute__((constructor)) static void
device_set_signal(void)
{
	struct sigaction sa;

	sa.sa_sigaction = sigbus_fault_sighandler;
	sigemptyset(&sa.sa_mask);
	sa.sa_flags = SA_SIGINFO;
	sigaction(SIGBUS, &sa, NULL);
}

__attribute__((destructor)) static void
device_destroy_signal(void)
{
	struct sigbus_handler *sigbus_handler, *tmp;

	TAILQ_FOREACH_SAFE(sigbus_handler, &g_sigbus_handler, tailq, tmp) {
		free(sigbus_handler);
	}
}

int
spdk_pci_register_error_handler(spdk_pci_error_handler sighandler, void *ctx)
{
	struct sigbus_handler *sigbus_handler;

	if (!sighandler) {
		SPDK_ERRLOG("Error handler is NULL\n");
		return -EINVAL;
	}

	pthread_mutex_lock(&g_sighandler_mutex);
	TAILQ_FOREACH(sigbus_handler, &g_sigbus_handler, tailq) {
		if (sigbus_handler->func == sighandler) {
			pthread_mutex_unlock(&g_sighandler_mutex);
			SPDK_ERRLOG("Error handler has been registered\n");
			return -EINVAL;
		}
	}
	pthread_mutex_unlock(&g_sighandler_mutex);

	sigbus_handler = calloc(1, sizeof(*sigbus_handler));
	if (!sigbus_handler) {
		SPDK_ERRLOG("Failed to allocate sigbus handler\n");
		return -ENOMEM;
	}

	sigbus_handler->func = sighandler;
	sigbus_handler->ctx = ctx;

	pthread_mutex_lock(&g_sighandler_mutex);
	TAILQ_INSERT_TAIL(&g_sigbus_handler, sigbus_handler, tailq);
	pthread_mutex_unlock(&g_sighandler_mutex);

	return 0;
}

void
spdk_pci_unregister_error_handler(spdk_pci_error_handler sighandler)
{
	struct sigbus_handler *sigbus_handler;

int nvme_uevent_connect(void);
int nvme_get_uevent(int fd, struct spdk_uevent *uevent);
	if (!sighandler) {
		return;
	}

#endif /* SPDK_UEVENT_H_ */
	pthread_mutex_lock(&g_sighandler_mutex);
	TAILQ_FOREACH(sigbus_handler, &g_sigbus_handler, tailq) {
		if (sigbus_handler->func == sighandler) {
			TAILQ_REMOVE(&g_sigbus_handler, sigbus_handler, tailq);
			free(sigbus_handler);
			pthread_mutex_unlock(&g_sighandler_mutex);
			return;
		}
	}
	pthread_mutex_unlock(&g_sighandler_mutex);
}
+4 −0
Original line number Diff line number Diff line
@@ -105,6 +105,10 @@
	spdk_mem_register;
	spdk_mem_unregister;
	spdk_mem_get_fd_and_offset;
	spdk_pci_event_listen;
	spdk_pci_get_event;
	spdk_pci_register_error_handler;
	spdk_pci_unregister_error_handler;

	# Public functions in env_dpdk.h
	spdk_env_dpdk_post_init;
Loading