Commit efb18b9b authored by Pawel Wodkowski's avatar Pawel Wodkowski Committed by Jim Harris
Browse files

vhost: add live migration support



This patch adds support for live migration for vhost-scsi and vhost-blk
backends.

Change-Id: Ibfc8a713dbba14ba8cb38377a71e28fd340b1487
Signed-off-by: default avatarPawel Wodkowski <pawelx.wodkowski@intel.com>
Reviewed-on: https://review.gerrithub.io/394203


Tested-by: default avatarSPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: default avatarDaniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent f69503f1
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -1076,8 +1076,8 @@ vhost_user_msg_handler(int vid, int fd)
		return -1;
	}

	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
		vhost_message_str[msg.request]);
	RTE_LOG(INFO, VHOST_CONFIG, "%s: read message %s\n",
		dev->ifname, vhost_message_str[msg.request]);

	ret = vhost_user_check_and_alloc_queue_pair(dev, &msg);
	if (ret < 0) {
+69 −1
Original line number Diff line number Diff line
@@ -91,6 +91,71 @@ void *spdk_vhost_gpa_to_vva(struct spdk_vhost_dev *vdev, uint64_t addr)
	return (void *)rte_vhost_gpa_to_vva(vdev->mem, addr);
}

static void
spdk_vhost_log_req_desc(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue,
			uint16_t req_id)
{
	struct vring_desc *desc, *desc_table;
	uint32_t desc_table_size;
	int rc;

	if (spdk_likely(!spdk_vhost_dev_has_feature(vdev, VHOST_F_LOG_ALL))) {
		return;
	}

	rc = spdk_vhost_vq_get_desc(vdev, virtqueue, req_id, &desc, &desc_table, &desc_table_size);
	if (spdk_unlikely(rc != 0)) {
		SPDK_ERRLOG("Can't log used ring descriptors!\n");
		return;
	}

	do {
		if (spdk_vhost_vring_desc_is_wr(desc)) {
			/* To be honest, only pages realy touched should be logged, but
			 * doing so would require tracking those changes in each backed.
			 * Also backend most likely will touch all/most of those pages so
			 * for lets assume we touched all pages passed to as writeable buffers. */
			rte_vhost_log_write(vdev->vid, desc->addr, desc->len);
		}
		spdk_vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
	} while (desc);
}

static void
spdk_vhost_log_used_vring_elem(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue,
			       uint16_t idx)
{
	uint64_t offset, len;
	uint16_t vq_idx;

	if (spdk_likely(!spdk_vhost_dev_has_feature(vdev, VHOST_F_LOG_ALL))) {
		return;
	}

	offset = offsetof(struct vring_used, ring[idx]);
	len = sizeof(virtqueue->vring.used->ring[idx]);
	vq_idx = virtqueue - vdev->virtqueue;

	rte_vhost_log_used_vring(vdev->vid, vq_idx, offset, len);
}

static void
spdk_vhost_log_used_vring_idx(struct spdk_vhost_dev *vdev, struct spdk_vhost_virtqueue *virtqueue)
{
	uint64_t offset, len;
	uint16_t vq_idx;

	if (spdk_likely(!spdk_vhost_dev_has_feature(vdev, VHOST_F_LOG_ALL))) {
		return;
	}

	offset = offsetof(struct vring_used, idx);
	len = sizeof(virtqueue->vring.used->idx);
	vq_idx = virtqueue - vdev->virtqueue;

	rte_vhost_log_used_vring(vdev->vid, vq_idx, offset, len);
}

/*
 * Get available requests from avail ring.
 */
@@ -290,13 +355,17 @@ spdk_vhost_vq_used_ring_enqueue(struct spdk_vhost_dev *vdev, struct spdk_vhost_v
		      "Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n",
		      virtqueue - vdev->virtqueue, vring->last_used_idx, id, len);

	spdk_vhost_log_req_desc(vdev, virtqueue, id);

	vring->last_used_idx++;
	used->ring[last_idx].id = id;
	used->ring[last_idx].len = len;
	spdk_vhost_log_used_vring_elem(vdev, virtqueue, last_idx);

	/* Ensure the used ring is updated before we increment used->idx. */
	spdk_smp_wmb();
	* (volatile uint16_t *) &used->idx = vring->last_used_idx;
	spdk_vhost_log_used_vring_idx(vdev, virtqueue);

	/* Ensure all our used ring changes are visible to the guest at the time
	 * of interrupt.
@@ -970,7 +1039,6 @@ start_device(int vid)
			SPDK_ERRLOG("vhost device %d: Failed to disable guest notification on queue %"PRIu16"\n", vid, i);
			goto out;
		}

	}

	vdev->num_queues = num_queues;
+1 −2
Original line number Diff line number Diff line
@@ -90,8 +90,7 @@
	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))

#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VHOST_F_LOG_ALL) | \
	(1ULL << VIRTIO_RING_F_EVENT_IDX) | \
#define SPDK_VHOST_DISABLED_FEATURES ((1ULL << VIRTIO_RING_F_EVENT_IDX) | \
	(1ULL << VIRTIO_RING_F_INDIRECT_DESC))

enum spdk_vhost_dev_type {
+3 −0
Original line number Diff line number Diff line
@@ -81,6 +81,9 @@ DEFINE_STUB(rte_vhost_driver_callback_register, int,
DEFINE_STUB(rte_vhost_driver_disable_features, int, (const char *path, uint64_t features), 0);
DEFINE_STUB(rte_vhost_driver_set_features, int, (const char *path, uint64_t features), 0);
DEFINE_STUB(rte_vhost_driver_register, int, (const char *path, uint64_t flags), 0);
DEFINE_STUB_V(rte_vhost_log_used_vring, (int vid, uint16_t vring_idx, uint64_t offset,
		uint64_t len));
DEFINE_STUB_V(rte_vhost_log_write, (int vid, uint64_t addr, uint64_t len));
DEFINE_STUB(spdk_vhost_scsi_controller_construct, int, (void), 0);
DEFINE_STUB(spdk_vhost_blk_controller_construct, int, (void), 0);
DEFINE_STUB(rte_vhost_set_vhost_vring_last_idx, int,
+90 −19
Original line number Diff line number Diff line
@@ -388,7 +388,7 @@ function vm_shutdown_all()
	while [[ $timeo -gt 0 ]]; do
		all_vms_down=1
		for vm in $VM_BASE_DIR/[0-9]*; do
			if /bin/kill -0 "$(cat $vm/qemu.pid)"; then
			if [[ -r $vm/qemu.pid ]] && pkill -0 -F "$vm/qemu.pid"; then
				all_vms_down=0
				break
			fi
@@ -412,13 +412,16 @@ function vm_shutdown_all()
function vm_setup()
{
	local shell_restore_x="$( [[ "$-" =~ x ]] && echo 'set -x' )"
	local OPTIND optchar a
	local OPTIND optchar vm_num

	local os=""
	local os_mode=""
	local qemu_args=""
	local disk_type=NOT_DEFINED
	local disks=""
	local raw_cache=""
	local vm_incoming=""
	local vm_migrate_to=""
	local force_vm=""
	local guest_memory=1024
	local queue_number=""
@@ -435,6 +438,8 @@ function vm_setup()
				force=*) local force_vm=${OPTARG#*=} ;;
				memory=*) local guest_memory=${OPTARG#*=} ;;
				queue_num=*) local queue_number=${OPTARG#*=} ;;
				incoming=*) local vm_incoming="${OPTARG#*=}" ;;
				migrate-to=*) local vm_migrate_to="${OPTARG#*=}" ;;
				*)
					error "unknown argument $OPTARG"
					return 1
@@ -454,8 +459,6 @@ function vm_setup()
		vm_num_is_valid $vm_num || return 1
		local vm_dir="$VM_BASE_DIR/$vm_num"
		[[ -d $vm_dir ]] && warning "removing existing VM in '$vm_dir'"
		# FIXME: why this is just echo???
		echo "rm -rf $vm_dir"
	else
		local vm_dir=""

@@ -474,13 +477,45 @@ function vm_setup()
		return 1
	fi

	if [[ ! -z "$vm_migrate_to" && ! -z "$vm_incoming" ]]; then
		error "'--incoming' and '--migrate-to' cannot be used together"
		return 1
	elif [[ ! -z "$vm_incoming" ]]; then
		if [[ ! -z "$os_mode" || ! -z "$os_img" ]]; then
			error "'--incoming' can't be used together with '--os' nor '--os-mode'"
			return 1
		fi

		os_mode="original"
		os="$VM_BASE_DIR/$vm_incoming/os.qcow2"
	elif [[ ! -z "$vm_migrate_to" ]]; then
		[[ "$os_mode" != "backing" ]] && warning "Using 'backing' mode for OS since '--migrate-to' is used"
		os_mode=backing
	fi

	notice "Creating new VM in $vm_dir"
	mkdir -p $vm_dir
	if [[ ! -r $os ]]; then
		error "file not found: $os"

	if [[ "$os_mode" == "backing" ]]; then
		notice "Creating backing file for OS image file: $os"
		if ! $INSTALL_DIR/bin/qemu-img create -f qcow2 -b $os $vm_dir/os.qcow2; then
			error "Failed to create OS backing file in '$vm_dir/os.qcow2' using '$os'"
			return 1
		fi

		local os=$vm_dir/os.qcow2
	elif [[ "$os_mode" == "original" ]]; then
		warning "Using original OS image file: $os"
	elif [[ "$os_mode" != "snapshot" ]]; then
		if [[ -z "$os_mode" ]]; then
			notice "No '--os-mode' parameter provided - using 'snapshot'"
			os_mode="snapshot"
		else
			error "Invalid '--os-mode=$os_mode'"
			return 1
		fi
	fi

	# WARNING:
	# each cmd+= must contain ' ${eol}' at the end
	#
@@ -501,8 +536,8 @@ function vm_setup()

	local ssh_socket=$(( vm_socket_offset + 0 ))
	local fio_socket=$(( vm_socket_offset + 1 ))
	# vm_socket_offset + 2 - can be reused
	# vm_socket_offset + 3 - can be reused
	local monitor_port=$(( vm_socket_offset + 2 ))
	local migration_port=$(( vm_socket_offset + 3 ))
	local gdbserver_socket=$(( vm_socket_offset + 4 ))
	local vnc_socket=$(( 100 + vm_num ))
	local qemu_pid_file="$vm_dir/qemu.pid"
@@ -520,18 +555,19 @@ function vm_setup()

	$shell_restore_x

	#-cpu host
	local node_num=${!qemu_numa_node_param}
	notice "NUMA NODE: $node_num"
	cmd+="-m $guest_memory --enable-kvm -cpu host -smp $cpu_num -vga std -vnc :$vnc_socket -daemonize -snapshot ${eol}"
	cmd+="-m $guest_memory --enable-kvm -cpu host -smp $cpu_num -vga std -vnc :$vnc_socket -daemonize ${eol}"
	cmd+="-object memory-backend-file,id=mem,size=${guest_memory}M,mem-path=/dev/hugepages,share=on,prealloc=yes,host-nodes=$node_num,policy=bind ${eol}"
	[[ $os_mode == snapshot ]] && cmd+="-snapshot ${eol}"
	[[ ! -z "$vm_incoming" ]] && cmd+=" -incoming tcp:0:$migration_port ${eol}"
	cmd+="-monitor telnet:127.0.0.1:$monitor_port,server,nowait ${eol}"
	cmd+="-numa node,memdev=mem ${eol}"
	cmd+="-pidfile $qemu_pid_file ${eol}"
	cmd+="-serial file:$vm_dir/serial.log ${eol}"
	cmd+="-D $vm_dir/qemu.log ${eol}"
	cmd+="-net user,hostfwd=tcp::$ssh_socket-:22,hostfwd=tcp::$fio_socket-:8765 ${eol}"
	cmd+="-net nic ${eol}"

	cmd+="-drive file=$os,if=none,id=os_disk ${eol}"
	cmd+="-device ide-hd,drive=os_disk,bootindex=0 ${eol}"

@@ -608,7 +644,7 @@ function vm_setup()
	# remove last $eol
	cmd="${cmd%\\\\\\n  }"

	notice "Saving to $vm_dir/run.sh:"
	notice "Saving to $vm_dir/run.sh"
	(
	echo '#!/bin/bash'
	echo 'if [[ $EUID -ne 0 ]]; then '
@@ -638,8 +674,16 @@ function vm_setup()
	# Save generated sockets redirection
	echo $ssh_socket > $vm_dir/ssh_socket
	echo $fio_socket > $vm_dir/fio_socket
	echo $monitor_port > $vm_dir/monitor_port

	rm -f $vm_dir/migration_port
	[[ -z $vm_incoming ]] || echo $migration_port > $vm_dir/migration_port

	echo $gdbserver_socket > $vm_dir/gdbserver_socket
	echo $vnc_socket >> $vm_dir/vnc_socket

	[[ -z $vm_incoming ]] || ln -fs $VM_BASE_DIR/$vm_incoming $vm_dir/vm_incoming
	[[ -z $vm_migrate_to ]] || ln -fs $VM_BASE_DIR/$vm_migrate_to $vm_dir/vm_migrate_to
}

function vm_run()
@@ -840,16 +884,18 @@ function run_fio()
	local out=""
	local fio_disks=""
	local vm
	local run_server_mode=true

	for arg in $@; do
		case "$arg" in
			--job-file=*) local job_file="${arg#*=}" ;;
			--fio-bin=*) local fio_bin="--fio-bin=${arg#*=}" ;;
			--fio-bin=*) local fio_bin="${arg#*=}" ;;
			--vm=*) vms+=( "${arg#*=}" ) ;;
			--out=*)
				local out="$arg"
				mkdir -p ${out#*=}
				local out="${arg#*=}"
				mkdir -p $out
				;;
			--local) run_server_mode=false ;;
		*)
			error "Invalid argument '$arg'"
			return 1
@@ -857,8 +903,17 @@ function run_fio()
		esac
	done

	local job_fname=$(basename "$job_file")
	if [[ ! -z "$fio_bin" && ! -r "$fio_bin" ]]; then
		error "FIO binary '$fio_bin' does not exist"
		return 1
	fi

	if [[ ! -r "$job_file" ]]; then
		error "Fio job '$job_file' does not exist"
		return 1
	fi

	local job_fname=$(basename "$job_file")
	# prepare job file for each VM
	for vm in ${vms[@]}; do
		local vm_num=${vm%%:*}
@@ -868,9 +923,25 @@ function run_fio()
		fio_disks+="127.0.0.1:$(vm_fio_socket $vm_num):$vmdisks,"

		vm_ssh $vm_num cat /root/$job_fname
		if ! $run_server_mode; then
			if [[ ! -z "$fio_bin" ]]; then
				cat $fio_bin | vm_ssh $vm_num 'cat > /root/fio; chmod +x /root/fio'
			fi

			notice "Running local fio on VM $vm_num"
			vm_ssh $vm_num "nohup /root/fio /root/$job_fname 1>/root/$job_fname.out 2>/root/$job_fname.out </dev/null & echo \$! > /root/fio.pid"
		fi
	done

	python $SPDK_BUILD_DIR/test/vhost/common/run_fio.py --job-file=/root/$job_fname $fio_bin $out ${fio_disks%,}
	if ! $run_server_mode; then
		# Give FIO time to run
		sleep 0.5
		return 0
	fi

	python $SPDK_BUILD_DIR/test/vhost/common/run_fio.py --job-file=/root/$job_fname \
		$([[ ! -z "$fio_bin" ]] && echo "--fio-bin=$fio_bin") \
		--out=$out ${fio_disks%,}
}

# Shutdown or kill any running VM and SPDK APP.
Loading