Commit 232ca0e6 authored by Changpeng Liu's avatar Changpeng Liu
Browse files

vhost-blk: add discard/write zeroes commands support



Discard and Write Zeroes commands was supported with
commit 1f23816b in Linux virtio-blk driver.  While
here, also add the support in SPDK vhost target.

Change-Id: I425be2a4961eac04e27ff71151d40c8d799cd37d
Signed-off-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-on: https://review.gerrithub.io/c/431723


Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarDarek Stojaczyk <dariusz.stojaczyk@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent 0ab3fa89
Loading
Loading
Loading
Loading
+54 −0
Original line number Diff line number Diff line
@@ -38,6 +38,8 @@
#define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available */
#define VIRTIO_BLK_F_TOPOLOGY	10	/* Topology information is available */
#define VIRTIO_BLK_F_MQ		12	/* support more than one vq */
#define VIRTIO_BLK_F_DISCARD	13	/* DISCARD is supported */
#define VIRTIO_BLK_F_WRITE_ZEROES	14	/* WRITE ZEROES is supported */

/* Legacy feature bits */
#ifndef VIRTIO_BLK_NO_LEGACY
@@ -84,6 +86,39 @@ struct virtio_blk_config {

	/* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
	__u16 num_queues;

	/* the next 3 entries are guarded by VIRTIO_BLK_F_DISCARD */
	/*
	 * The maximum discard sectors (in 512-byte sectors) for
	 * one segment.
	 */
	__u32 max_discard_sectors;
	/*
	 * The maximum number of discard segments in a
	 * discard command.
	 */
	__u32 max_discard_seg;
	/* Discard commands must be aligned to this number of sectors. */
	__u32 discard_sector_alignment;

	/* the next 3 entries are guarded by VIRTIO_BLK_F_WRITE_ZEROES */
	/*
	 * The maximum number of write zeroes sectors (in 512-byte sectors) in
	 * one segment.
	 */
	__u32 max_write_zeroes_sectors;
	/*
	 * The maximum number of segments in a write zeroes
	 * command.
	 */
	__u32 max_write_zeroes_seg;
	/*
	 * Set if a VIRTIO_BLK_T_WRITE_ZEROES request may result in the
	 * deallocation of one or more of the sectors.
	 */
	__u8 write_zeroes_may_unmap;

	__u8 unused1[3];
} __attribute__((packed));

/*
@@ -112,6 +147,12 @@ struct virtio_blk_config {
/* Get device ID command */
#define VIRTIO_BLK_T_GET_ID    8

/* Discard command */
#define VIRTIO_BLK_T_DISCARD   11

/* Write zeroes command */
#define VIRTIO_BLK_T_WRITE_ZEROES   13

#ifndef VIRTIO_BLK_NO_LEGACY
/* Barrier before this op. */
#define VIRTIO_BLK_T_BARRIER	0x80000000
@@ -131,6 +172,19 @@ struct virtio_blk_outhdr {
	__virtio64 sector;
};

/* Unmap this range (only valid for write zeroes command) */
#define VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP	0x00000001

/* Discard/write zeroes range for each request. */
struct virtio_blk_discard_write_zeroes {
	/* discard/write zeroes start sector */
	__le64 sector;
	/* number of discard/write zeroes sectors */
	__le32 num_sectors;
	/* flags for this range */
	__le32 flags;
};

#ifndef VIRTIO_BLK_NO_LEGACY
struct virtio_scsi_inhdr {
	__virtio32 errors;
+89 −17
Original line number Diff line number Diff line
@@ -227,6 +227,7 @@ process_blk_request(struct spdk_vhost_blk_task *task, struct spdk_vhost_blk_dev
		    struct spdk_vhost_virtqueue *vq)
{
	const struct virtio_blk_outhdr *req;
	struct virtio_blk_discard_write_zeroes *desc;
	struct iovec *iov;
	uint32_t type;
	uint32_t payload_len;
@@ -304,6 +305,55 @@ process_blk_request(struct spdk_vhost_blk_task *task, struct spdk_vhost_blk_dev
			}
		}
		break;
	case VIRTIO_BLK_T_DISCARD:
		desc = task->iovs[1].iov_base;
		if (payload_len != sizeof(*desc)) {
			SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len);
			invalid_blk_request(task, VIRTIO_BLK_S_IOERR);
			return -1;
		}

		rc = spdk_bdev_unmap(bvdev->bdev_desc, bvdev->bdev_io_channel,
				     desc->sector * 512, desc->num_sectors * 512,
				     blk_request_complete_cb, task);
		if (rc) {
			if (rc == -ENOMEM) {
				SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n");
				blk_request_queue_io(task);
			} else {
				invalid_blk_request(task, VIRTIO_BLK_S_IOERR);
				return -1;
			}
		}
		break;
	case VIRTIO_BLK_T_WRITE_ZEROES:
		desc = task->iovs[1].iov_base;
		if (payload_len != sizeof(*desc)) {
			SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len);
			invalid_blk_request(task, VIRTIO_BLK_S_IOERR);
			return -1;
		}

		/* Zeroed and Unmap the range, SPDK doen't support it. */
		if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
			SPDK_NOTICELOG("Can't support Write Zeroes with Unmap flag\n");
			invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP);
			return -1;
		}

		rc = spdk_bdev_write_zeroes(bvdev->bdev_desc, bvdev->bdev_io_channel,
					    desc->sector * 512, desc->num_sectors * 512,
					    blk_request_complete_cb, task);
		if (rc) {
			if (rc == -ENOMEM) {
				SPDK_DEBUGLOG(SPDK_LOG_VHOST_BLK, "No memory, start to queue io.\n");
				blk_request_queue_io(task);
			} else {
				invalid_blk_request(task, VIRTIO_BLK_S_IOERR);
				return -1;
			}
		}
		break;
	case VIRTIO_BLK_T_GET_ID:
		if (!task->iovcnt || !payload_len) {
			invalid_blk_request(task, VIRTIO_BLK_S_UNSUPP);
@@ -709,7 +759,7 @@ static int
spdk_vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config,
			  uint32_t len)
{
	struct virtio_blk_config *blkcfg = (struct virtio_blk_config *)config;
	struct virtio_blk_config blkcfg;
	struct spdk_vhost_blk_dev *bvdev;
	struct spdk_bdev *bdev;
	uint32_t blk_size;
@@ -721,10 +771,6 @@ spdk_vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config,
		return -1;
	}

	if (len < sizeof(*blkcfg)) {
		return -1;
	}

	bdev = bvdev->bdev;
	if (bdev == NULL) {
		/* We can't just return -1 here as this GET_CONFIG message might
@@ -745,17 +791,30 @@ spdk_vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config,
		blkcnt = spdk_bdev_get_num_blocks(bdev);
	}

	memset(blkcfg, 0, sizeof(*blkcfg));
	blkcfg->blk_size = blk_size;
	memset(&blkcfg, 0, sizeof(blkcfg));
	blkcfg.blk_size = blk_size;
	/* minimum I/O size in blocks */
	blkcfg->min_io_size = 1;
	blkcfg.min_io_size = 1;
	/* expressed in 512 Bytes sectors */
	blkcfg->capacity = (blkcnt * blk_size) / 512;
	blkcfg->size_max = 131072;
	blkcfg.capacity = (blkcnt * blk_size) / 512;
	blkcfg.size_max = 131072;
	/*  -2 for REQ and RESP and -1 for region boundary splitting */
	blkcfg->seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1;
	blkcfg.seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1;
	/* QEMU can overwrite this value when started */
	blkcfg->num_queues = SPDK_VHOST_MAX_VQUEUES;
	blkcfg.num_queues = SPDK_VHOST_MAX_VQUEUES;

	if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
		/* 16MiB, expressed in 512 Bytes */
		blkcfg.max_discard_sectors = 32768;
		blkcfg.max_discard_seg = 1;
		blkcfg.discard_sector_alignment = blk_size / 512;
	}
	if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
		blkcfg.max_write_zeroes_sectors = 32768;
		blkcfg.max_write_zeroes_seg = 1;
	}

	memcpy(config, &blkcfg, spdk_min(len, sizeof(blkcfg)));

	return 0;
}
@@ -767,10 +826,12 @@ static const struct spdk_vhost_dev_backend vhost_blk_device_backend = {
	(1ULL << VIRTIO_BLK_F_BLK_SIZE) | (1ULL << VIRTIO_BLK_F_TOPOLOGY) |
	(1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI) |
	(1ULL << VIRTIO_BLK_F_FLUSH)    | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) |
	(1ULL << VIRTIO_BLK_F_MQ),
	(1ULL << VIRTIO_BLK_F_MQ)       | (1ULL << VIRTIO_BLK_F_DISCARD) |
	(1ULL << VIRTIO_BLK_F_WRITE_ZEROES),
	.disabled_features = SPDK_VHOST_DISABLED_FEATURES | (1ULL << VIRTIO_BLK_F_GEOMETRY) |
	(1ULL << VIRTIO_BLK_F_RO) | (1ULL << VIRTIO_BLK_F_FLUSH) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) |
	(1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI),
	(1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI) | (1ULL << VIRTIO_BLK_F_DISCARD) |
	(1ULL << VIRTIO_BLK_F_WRITE_ZEROES),
	.start_device =  spdk_vhost_blk_start,
	.stop_device = spdk_vhost_blk_stop,
	.vhost_get_config = spdk_vhost_blk_get_config,
@@ -827,6 +888,7 @@ spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_
{
	struct spdk_vhost_blk_dev *bvdev = NULL;
	struct spdk_bdev *bdev;
	uint64_t features = 0;
	int ret = 0;

	spdk_vhost_lock();
@@ -859,14 +921,24 @@ spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_
		goto out;
	}

	if (readonly && rte_vhost_driver_enable_features(bvdev->vdev.path, (1ULL << VIRTIO_BLK_F_RO))) {
		SPDK_ERRLOG("Controller %s: failed to set as a readonly\n", name);
		spdk_bdev_close(bvdev->bdev_desc);
	if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
		features |= (1ULL << VIRTIO_BLK_F_DISCARD);
	}
	if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
		features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
	}
	if (readonly) {
		features |= (1ULL << VIRTIO_BLK_F_RO);
	}

	if (features && rte_vhost_driver_enable_features(bvdev->vdev.path, features)) {
		SPDK_ERRLOG("Controller %s: failed to enable features 0x%"PRIx64"\n", name, features);

		if (spdk_vhost_dev_unregister(&bvdev->vdev) != 0) {
			SPDK_ERRLOG("Controller %s: failed to remove controller\n", name);
		}

		spdk_bdev_close(bvdev->bdev_desc);
		ret = -1;
		goto out;
	}