Commit 648a5414 authored by zhenwei pi's avatar zhenwei pi Committed by Tomasz Zawadzki
Browse files

bdev/aio: support UNMAP



Support UNMAP by fallocate FALLOC_FL_PUNCH_HOLE.

Note that fallocate() is synchronous syscall, reactor would block
until operations to disk complete. This could significantly hurt
performance. For general purpose, AIO based UNMAP(even WRITE ZERO)
is *not* suggested to be used in production environment. Instead of
an entire NVMe bdev, we can test UNMAP in regular file based
development environment.

Suggested by Changpeng, support RPC command to enable *fallocate* for
a single AIO bdev(rather than enabling this feature for all the AIO
bdev at compiling step), this looks appropriate enough.

Change-Id: Iced8b9506e75744ad2e45cecc9995c77224d077b
Signed-off-by: default avatarzhenwei pi <pizhenwei@bytedance.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/20454


Community-CI: Mellanox Build Bot
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
parent 05859ef1
Loading
Loading
Loading
Loading
+58 −1
Original line number Diff line number Diff line
@@ -67,6 +67,7 @@ struct file_disk {
	TAILQ_ENTRY(file_disk)  link;
	bool			block_size_override;
	bool			readonly;
	bool			fallocate;
};

/* For user space reaping of completions */
@@ -245,6 +246,38 @@ bdev_aio_flush(struct file_disk *fdisk, struct bdev_aio_task *aio_task)
	}
}

#ifndef __FreeBSD__
static void
bdev_aio_fallocate(struct spdk_bdev_io *bdev_io, int mode)
{
	struct file_disk *fdisk = (struct file_disk *)bdev_io->bdev->ctxt;
	struct bdev_aio_task *aio_task = (struct bdev_aio_task *)bdev_io->driver_ctx;
	uint64_t offset_bytes = bdev_io->u.bdev.offset_blocks * bdev_io->bdev->blocklen;
	uint64_t length_bytes = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
	int rc;

	if (!fdisk->fallocate) {
		spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -ENOTSUP);
		return;
	}

	rc = fallocate(fdisk->fd, mode, offset_bytes, length_bytes);
	if (rc == 0) {
		spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_SUCCESS);
	} else {
		spdk_bdev_io_complete_aio_status(spdk_bdev_io_from_ctx(aio_task), -errno);
	}
}

static void
bdev_aio_unmap(struct spdk_bdev_io *bdev_io)
{
	int mode = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE;

	bdev_aio_fallocate(bdev_io, mode);
}
#endif

static void
bdev_aio_destruct_cb(void *io_device)
{
@@ -577,6 +610,13 @@ _bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_i
		bdev_aio_reset((struct file_disk *)bdev_io->bdev->ctxt,
			       (struct bdev_aio_task *)bdev_io->driver_ctx);
		return 0;

#ifndef __FreeBSD__
	case SPDK_BDEV_IO_TYPE_UNMAP:
		bdev_aio_unmap(bdev_io);
		return 0;
#endif

	default:
		return -1;
	}
@@ -593,6 +633,8 @@ bdev_aio_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io
static bool
bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
{
	struct file_disk *fdisk = ctx;

	switch (io_type) {
	case SPDK_BDEV_IO_TYPE_READ:
	case SPDK_BDEV_IO_TYPE_WRITE:
@@ -600,6 +642,9 @@ bdev_aio_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
	case SPDK_BDEV_IO_TYPE_RESET:
		return true;

	case SPDK_BDEV_IO_TYPE_UNMAP:
		return fdisk->fallocate;

	default:
		return false;
	}
@@ -695,6 +740,8 @@ bdev_aio_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)

	spdk_json_write_named_bool(w, "readonly", fdisk->readonly);

	spdk_json_write_named_bool(w, "fallocate", fdisk->fallocate);

	spdk_json_write_object_end(w);

	return 0;
@@ -716,6 +763,7 @@ bdev_aio_write_json_config(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w
	}
	spdk_json_write_named_string(w, "filename", fdisk->filename);
	spdk_json_write_named_bool(w, "readonly", fdisk->readonly);
	spdk_json_write_named_bool(w, "fallocate", fdisk->fallocate);
	spdk_json_write_object_end(w);

	spdk_json_write_object_end(w);
@@ -814,19 +862,28 @@ bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
}

int
create_aio_bdev(const char *name, const char *filename, uint32_t block_size, bool readonly)
create_aio_bdev(const char *name, const char *filename, uint32_t block_size, bool readonly,
		bool fallocate)
{
	struct file_disk *fdisk;
	uint32_t detected_block_size;
	uint64_t disk_size;
	int rc;

#ifdef __FreeBSD__
	if (fallocate) {
		SPDK_ERRLOG("Unable to support fallocate on this platform\n");
		return -ENOTSUP;
	}
#endif

	fdisk = calloc(1, sizeof(*fdisk));
	if (!fdisk) {
		SPDK_ERRLOG("Unable to allocate enough memory for aio backend\n");
		return -ENOMEM;
	}
	fdisk->readonly = readonly;
	fdisk->fallocate = fallocate;

	fdisk->filename = strdup(filename);
	if (!fdisk->filename) {
+2 −1
Original line number Diff line number Diff line
@@ -11,7 +11,8 @@

typedef void (*delete_aio_bdev_complete)(void *cb_arg, int bdeverrno);

int create_aio_bdev(const char *name, const char *filename, uint32_t block_size, bool readonly);
int create_aio_bdev(const char *name, const char *filename, uint32_t block_size, bool readonly,
		    bool falloc);

int bdev_aio_rescan(const char *name);
void bdev_aio_delete(const char *name, delete_aio_bdev_complete cb_fn, void *cb_arg);
+4 −1
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@ struct rpc_construct_aio {
	char *filename;
	uint32_t block_size;
	bool readonly;
	bool fallocate;
};

struct rpc_construct_aio_ctx {
@@ -34,6 +35,7 @@ static const struct spdk_json_object_decoder rpc_construct_aio_decoders[] = {
	{"filename", offsetof(struct rpc_construct_aio, filename), spdk_json_decode_string},
	{"block_size", offsetof(struct rpc_construct_aio, block_size), spdk_json_decode_uint32, true},
	{"readonly", offsetof(struct rpc_construct_aio, readonly), spdk_json_decode_bool, true},
	{"fallocate", offsetof(struct rpc_construct_aio, fallocate), spdk_json_decode_bool, true},
};

static void
@@ -73,7 +75,8 @@ rpc_bdev_aio_create(struct spdk_jsonrpc_request *request,
	}

	ctx->request = request;
	rc = create_aio_bdev(ctx->req.name, ctx->req.filename, ctx->req.block_size, ctx->req.readonly);
	rc = create_aio_bdev(ctx->req.name, ctx->req.filename, ctx->req.block_size,
			     ctx->req.readonly, ctx->req.fallocate);
	if (rc) {
		spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
		free_rpc_construct_aio(ctx);
+5 −1
Original line number Diff line number Diff line
@@ -486,7 +486,7 @@ def bdev_raid_remove_base_bdev(client, name):
    return client.call('bdev_raid_remove_base_bdev', params)


def bdev_aio_create(client, filename, name, block_size=None, readonly=False):
def bdev_aio_create(client, filename, name, block_size=None, readonly=False, fallocate=False):
    """Construct a Linux AIO block device.

    Args:
@@ -494,6 +494,7 @@ def bdev_aio_create(client, filename, name, block_size=None, readonly=False):
        name: name of block device
        block_size: block size of device (optional; autodetected if omitted)
        readonly: set aio bdev as read-only
        fallocate: enable fallocate for UNMAP support (note that fallocate syscall would block reactor)

    Returns:
        Name of created block device.
@@ -507,6 +508,9 @@ def bdev_aio_create(client, filename, name, block_size=None, readonly=False):
    if readonly:
        params['readonly'] = readonly

    if fallocate:
        params['fallocate'] = fallocate

    return client.call('bdev_aio_create', params)


+3 −1
Original line number Diff line number Diff line
@@ -479,13 +479,15 @@ if __name__ == "__main__":
                                            filename=args.filename,
                                            name=args.name,
                                            block_size=args.block_size,
                                            readonly=args.readonly))
                                            readonly=args.readonly,
                                            fallocate=args.fallocate))

    p = subparsers.add_parser('bdev_aio_create', help='Add a bdev with aio backend')
    p.add_argument('filename', help='Path to device or file (ex: /dev/sda)')
    p.add_argument('name', help='Block device name')
    p.add_argument('block_size', help='Block size for this bdev', type=int, nargs='?')
    p.add_argument("-r", "--readonly", action='store_true', help='Set this bdev as read-only')
    p.add_argument("--fallocate", action='store_true', help='Support unmap by fallocate')
    p.set_defaults(func=bdev_aio_create)

    def bdev_aio_rescan(args):
Loading