Commit c1f1a876 authored by Piotr Pelplinski's avatar Piotr Pelplinski Committed by Jim Harris
Browse files

bdev: double buffering for unaligned buffers



Now, that _spdk_bdev_io_get_buf offers allocating aligned buffers,
add possibility to store original buffer and replace it by aligned
one for the time of IO.

Signed-off-by: default avatarPiotr Pelplinski <piotr.pelplinski@intel.com>
Change-Id: If0ed306175631613c0f9310dccaae6615364fb49

Reviewed-on: https://review.gerrithub.io/429754


Chandler-Test-Pool: SPDK Automated Test System <sys_sgsw@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 3e75e90a
Loading
Loading
Loading
Loading
+13 −4
Original line number Diff line number Diff line
@@ -489,6 +489,11 @@ struct spdk_bdev_io {
		/** requested size of the buffer associated with this I/O */
		uint64_t buf_len;

		/** if the request is double buffered, store original request iovs here */
		struct iovec  bounce_iov;
		struct iovec *orig_iovs;
		int           orig_iovcnt;

		/** Callback for when buf is allocated */
		spdk_bdev_io_get_buf_cb get_buf_cb;

@@ -650,10 +655,14 @@ const struct spdk_bdev_aliases_list *spdk_bdev_get_aliases(const struct spdk_bde

/**
 * Allocate a buffer for given bdev_io.  Allocation will happen
 * only if the bdev_io has no assigned SGL yet. The buffer will be
 * freed automatically on \c spdk_bdev_free_io() call. This call
 * will never fail - in case of lack of memory given callback \c cb
 * will be deferred until enough memory is freed.
 * only if the bdev_io has no assigned SGL yet or SGL is not
 * aligned to \c bdev->required_alignment.  If SGL is not aligned,
 * this call will cause copy from SGL to bounce buffer on write
 * path or copy from bounce buffer to SGL before completion
 * callback on read path.  The buffer will be freed automatically
 * on \c spdk_bdev_free_io() call. This call will never fail.
 * In case of lack of memory given callback \c cb will be deferred
 * until enough memory is freed.
 *
 * \param bdev_io I/O to allocate buffer for.
 * \param cb callback to be called when the buffer is allocated
+115 −7
Original line number Diff line number Diff line
@@ -411,6 +411,78 @@ spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
	iovs[0].iov_len = len;
}

static bool
_is_buf_allocated(struct iovec *iovs)
{
	return iovs[0].iov_base != NULL;
}

static bool
_are_iovs_aligned(struct iovec *iovs, int iovcnt, uint32_t alignment)
{
	int i;
	uintptr_t iov_base;

	if (spdk_likely(alignment == 1)) {
		return true;
	}

	for (i = 0; i < iovcnt; i++) {
		iov_base = (uintptr_t)iovs[i].iov_base;
		if ((iov_base & (alignment - 1)) != 0) {
			return false;
		}
	}

	return true;
}

static void
_copy_iovs_to_buf(void *buf, size_t buf_len, struct iovec *iovs, int iovcnt)
{
	int i;
	size_t len;

	for (i = 0; i < iovcnt; i++) {
		len = spdk_min(iovs[i].iov_len, buf_len);
		memcpy(buf, iovs[i].iov_base, len);
		buf += len;
		buf_len -= len;
	}
}

static void
_copy_buf_to_iovs(struct iovec *iovs, int iovcnt, void *buf, size_t buf_len)
{
	int i;
	size_t len;

	for (i = 0; i < iovcnt; i++) {
		len = spdk_min(iovs[i].iov_len, buf_len);
		memcpy(iovs[i].iov_base, buf, len);
		buf += len;
		buf_len -= len;
	}
}

static void
_bdev_io_set_bounce_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
{
	/* save original iovec */
	bdev_io->internal.orig_iovs = bdev_io->u.bdev.iovs;
	bdev_io->internal.orig_iovcnt = bdev_io->u.bdev.iovcnt;
	/* set bounce iov */
	bdev_io->u.bdev.iovs = &bdev_io->internal.bounce_iov;
	bdev_io->u.bdev.iovcnt = 1;
	/* set bounce buffer for this operation */
	bdev_io->u.bdev.iovs[0].iov_base = buf;
	bdev_io->u.bdev.iovs[0].iov_len = len;
	/* if this is write path, copy data from original buffer to bounce buffer */
	if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
		_copy_iovs_to_buf(buf, len, bdev_io->internal.orig_iovs, bdev_io->internal.orig_iovcnt);
	}
}

static void
spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
{
@@ -421,8 +493,7 @@ spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
	struct spdk_bdev_mgmt_channel *ch;
	uint64_t buf_len;
	uint64_t alignment;

	assert(bdev_io->u.bdev.iovcnt == 1);
	bool buf_allocated;

	buf = bdev_io->internal.buf;
	buf_len = bdev_io->internal.buf_len;
@@ -445,9 +516,15 @@ spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
		tmp = STAILQ_FIRST(stailq);

		alignment = spdk_bdev_get_buf_align(tmp->bdev);
		buf_allocated = _is_buf_allocated(tmp->u.bdev.iovs);

		aligned_buf = (void *)(((uintptr_t)buf +
					(alignment - 1)) & ~(alignment - 1));
		if (buf_allocated) {
			_bdev_io_set_bounce_buf(tmp, aligned_buf, tmp->internal.buf_len);
		} else {
			spdk_bdev_io_set_buf(tmp, aligned_buf, tmp->internal.buf_len);
		}

		STAILQ_REMOVE_HEAD(stailq, internal.buf_link);
		tmp->internal.buf = buf;
@@ -455,6 +532,25 @@ spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
	}
}

static void
_bdev_io_unset_bounce_buf(struct spdk_bdev_io *bdev_io)
{
	/* if this is read path, copy data from bounce buffer to original buffer */
	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ &&
	    bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
		_copy_buf_to_iovs(bdev_io->internal.orig_iovs, bdev_io->internal.orig_iovcnt,
				  bdev_io->internal.bounce_iov.iov_base, bdev_io->internal.bounce_iov.iov_len);
	}
	/* set orignal buffer for this io */
	bdev_io->u.bdev.iovcnt = bdev_io->internal.orig_iovcnt;
	bdev_io->u.bdev.iovs = bdev_io->internal.orig_iovs;
	/* disable bouncing buffer for this io */
	bdev_io->internal.orig_iovcnt = 0;
	bdev_io->internal.orig_iovs = NULL;
	/* return bounce buffer to the pool */
	spdk_bdev_io_put_buf(bdev_io);
}

void
spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len)
{
@@ -463,14 +559,17 @@ spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, u
	void *buf, *aligned_buf;
	struct spdk_bdev_mgmt_channel *mgmt_ch;
	uint64_t alignment;
	bool buf_allocated;

	assert(cb != NULL);
	assert(bdev_io->u.bdev.iovs != NULL);

	alignment = spdk_bdev_get_buf_align(bdev_io->bdev);
	buf_allocated = _is_buf_allocated(bdev_io->u.bdev.iovs);

	if (spdk_unlikely(bdev_io->u.bdev.iovs[0].iov_base != NULL)) {
		/* Buffer already present */
	if (buf_allocated &&
	    _are_iovs_aligned(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, alignment)) {
		/* Buffer already present and aligned */
		cb(bdev_io->internal.ch->channel, bdev_io);
		return;
	}
@@ -496,8 +595,11 @@ spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, u
	} else {
		aligned_buf = (void *)(((uintptr_t)buf + (alignment - 1)) & ~(alignment - 1));

		if (buf_allocated) {
			_bdev_io_set_bounce_buf(bdev_io, aligned_buf, len);
		} else {
			spdk_bdev_io_set_buf(bdev_io, aligned_buf, len);

		}
		bdev_io->internal.buf = buf;
		bdev_io->internal.get_buf_cb(bdev_io->internal.ch->channel, bdev_io);
	}
@@ -1499,6 +1601,8 @@ spdk_bdev_io_init(struct spdk_bdev_io *bdev_io,
	bdev_io->internal.in_submit_request = false;
	bdev_io->internal.buf = NULL;
	bdev_io->internal.io_submit_ch = NULL;
	bdev_io->internal.orig_iovs = NULL;
	bdev_io->internal.orig_iovcnt = 0;
}

static bool
@@ -3020,6 +3124,10 @@ spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status sta
			return;
		}
	} else {
		if (spdk_unlikely(bdev_io->internal.orig_iovcnt > 0)) {
			_bdev_io_unset_bounce_buf(bdev_io);
		}

		assert(bdev_ch->io_outstanding > 0);
		assert(shared_resource->io_outstanding > 0);
		bdev_ch->io_outstanding--;