Commit a91d250f authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Jim Harris
Browse files

bdev: Insert metadata using bounce/accel buffer if I/O is not aware of metadata



Add DIF insert/strip into the generic bdev layer.

Use bounce buffer if I/O allcates it regardless that I/O is not aware of
metadata. Allocate and use bounce buffer if I/O is not aware of metadata,
and it does not use memory domain or bdev module does not support accel sequence.
Allocate and use accel buffer if I/O uses memory domain, bdev module supports
accel sequence, and I/O is not aware of metadata.

When accel buffer is used, join the existing code path to pull data.

Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: I057d64ab4a1f48c838e1cddd08f3e12cc595817b
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/25087


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@nvidia.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
parent ff173863
Loading
Loading
Loading
Loading
+157 −13
Original line number Diff line number Diff line
@@ -388,6 +388,7 @@ enum bdev_io_retry_state {
	BDEV_IO_RETRY_STATE_SUBMIT,
	BDEV_IO_RETRY_STATE_PUSH,
	BDEV_IO_RETRY_STATE_PUSH_MD,
	BDEV_IO_RETRY_STATE_GET_ACCEL_BUF,
};

#define __bdev_to_io_dev(bdev)		(((char *)bdev) + 1)
@@ -399,6 +400,7 @@ static inline void bdev_io_complete(void *ctx);
static inline void bdev_io_complete_unsubmitted(struct spdk_bdev_io *bdev_io);
static void bdev_io_push_bounce_md_buf(struct spdk_bdev_io *bdev_io);
static void bdev_io_push_bounce_data(struct spdk_bdev_io *bdev_io);
static void _bdev_io_get_accel_buf(struct spdk_bdev_io *bdev_io);

static void bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
static int bdev_write_zero_buffer(struct spdk_bdev_io *bdev_io);
@@ -1322,15 +1324,58 @@ static void
bdev_io_pull_data(struct spdk_bdev_io *bdev_io)
{
	struct spdk_bdev_channel *ch = bdev_io->internal.ch;
	struct spdk_bdev_desc *desc = bdev_io->internal.desc;
	int rc = 0;

	assert(bdev_io->internal.f.has_bounce_buf);

	if (bdev_io_needs_metadata(desc, bdev_io)) {
		assert(bdev_io->bdev->md_interleave);

		if (!bdev_io_use_accel_sequence(bdev_io)) {
			bdev_io->internal.accel_sequence = NULL;
		}

		if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
			rc = spdk_accel_append_dif_generate_copy(&bdev_io->internal.accel_sequence, ch->accel_channel,
					bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
					bdev_io->u.bdev.memory_domain,
					bdev_io->u.bdev.memory_domain_ctx,
					bdev_io->internal.bounce_buf.orig_iovs,
					bdev_io->internal.bounce_buf.orig_iovcnt,
					bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain : NULL,
					bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain_ctx : NULL,
					bdev_io->u.bdev.num_blocks,
					&bdev_io->u.bdev.dif_ctx,
					NULL, NULL);
		} else {
			assert(bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
			rc = spdk_accel_append_dif_verify_copy(&bdev_io->internal.accel_sequence, ch->accel_channel,
							       bdev_io->internal.bounce_buf.orig_iovs,
							       bdev_io->internal.bounce_buf.orig_iovcnt,
							       bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain : NULL,
							       bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain_ctx : NULL,
							       bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
							       bdev_io->u.bdev.memory_domain,
							       bdev_io->u.bdev.memory_domain_ctx,
							       bdev_io->u.bdev.num_blocks,
							       &bdev_io->u.bdev.dif_ctx,
							       &bdev_io->u.bdev.dif_err,
							       NULL, NULL);
		}

		if (spdk_likely(rc == 0)) {
			bdev_io->internal.f.has_accel_sequence = true;
			bdev_io->u.bdev.accel_sequence = bdev_io->internal.accel_sequence;
		} else if (rc != -ENOMEM) {
			SPDK_ERRLOG("Failed to append generate/verify_copy to accel sequence: %p\n",
				    bdev_io->internal.accel_sequence);
		}
	} else if (bdev_io_needs_sequence_exec(desc, bdev_io) ||
		   (bdev_io_use_accel_sequence(bdev_io) && bdev_io_use_memory_domain(bdev_io))) {
		/* If we need to exec an accel sequence or the IO uses a memory domain buffer and has a
		 * sequence, append a copy operation making accel change the src/dst buffers of the previous
		 * operation */
	if (bdev_io_needs_sequence_exec(bdev_io->internal.desc, bdev_io) ||
	    (bdev_io_use_accel_sequence(bdev_io) && bdev_io_use_memory_domain(bdev_io))) {
		assert(bdev_io_use_accel_sequence(bdev_io));
		if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
			rc = spdk_accel_append_copy(&bdev_io->internal.accel_sequence, ch->accel_channel,
@@ -1474,6 +1519,17 @@ bdev_io_get_max_buf_len(struct spdk_bdev_io *bdev_io, uint64_t len)
	return len + alignment + md_len;
}

static void
bdev_io_put_accel_buf(struct spdk_bdev_io *bdev_io)
{
	struct spdk_bdev_channel *ch = bdev_io->internal.ch;

	spdk_accel_put_buf(ch->accel_channel,
			   bdev_io->internal.buf.ptr,
			   bdev_io->u.bdev.memory_domain,
			   bdev_io->u.bdev.memory_domain_ctx);
}

static void
_bdev_io_put_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t buf_len)
{
@@ -1487,7 +1543,14 @@ static void
bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
{
	assert(bdev_io->internal.f.has_buf);
	_bdev_io_put_buf(bdev_io, bdev_io->internal.buf.ptr, bdev_io->internal.buf.len);

	if (bdev_io->u.bdev.memory_domain == spdk_accel_get_memory_domain()) {
		bdev_io_put_accel_buf(bdev_io);
	} else {
		assert(bdev_io->u.bdev.memory_domain == NULL);
		_bdev_io_put_buf(bdev_io, bdev_io->internal.buf.ptr,
				 bdev_io->internal.buf.len);
	}
	bdev_io->internal.buf.ptr = NULL;
	bdev_io->internal.f.has_buf = false;
}
@@ -1570,6 +1633,9 @@ bdev_shared_ch_retry_io(struct spdk_bdev_shared_resource *shared_resource)
		case BDEV_IO_RETRY_STATE_PUSH_MD:
			bdev_io_push_bounce_md_buf(bdev_io);
			break;
		case BDEV_IO_RETRY_STATE_GET_ACCEL_BUF:
			_bdev_io_get_accel_buf(bdev_io);
			break;
		default:
			assert(0 && "invalid retry state");
			break;
@@ -1900,6 +1966,36 @@ _bdev_io_get_bounce_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb
	bdev_io_get_buf(bdev_io, len);
}

static void
_bdev_io_get_accel_buf(struct spdk_bdev_io *bdev_io)
{
	struct spdk_bdev_channel *ch = bdev_io->internal.ch;
	void *buf;
	int rc;

	rc = spdk_accel_get_buf(ch->accel_channel,
				bdev_io->internal.buf.len,
				&buf,
				&bdev_io->u.bdev.memory_domain,
				&bdev_io->u.bdev.memory_domain_ctx);
	if (rc != 0) {
		bdev_queue_nomem_io_tail(ch->shared_resource, bdev_io,
					 BDEV_IO_RETRY_STATE_GET_ACCEL_BUF);
		return;
	}

	_bdev_io_set_buf(bdev_io, buf, bdev_io->internal.buf.len);
}

static inline void
bdev_io_get_accel_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb,
		      uint64_t len)
{
	bdev_io->internal.buf.len = len;
	bdev_io->internal.get_buf_cb = cb;

	_bdev_io_get_accel_buf(bdev_io);
}

SPDK_LOG_DEPRECATION_REGISTER(spdk_bdev_io_get_aux_buf,
			      "spdk_bdev_io_get_aux_buf is deprecated", "v25.01", 0);
@@ -3721,18 +3817,40 @@ _bdev_io_ext_use_bounce_buffer(struct spdk_bdev_io *bdev_io)
	 * be accessed directly. It is needed to allocate buffers before issuing IO operation.
	 * For write operation we need to pull buffers from memory domain before submitting IO.
	 * Once read operation completes, we need to use memory_domain push functionality to
	 * update data in original memory domain IO buffer
	 * update data in original memory domain IO buffer.
	 *
	 * If this I/O request is not aware of metadata, buffers in thsi IO request can't be
	 * accessed directly too. It is needed to allocate buffers before issuing IO operation.
	 * For write operation we need to insert metadata before submitting IO. Once read
	 * operation completes, we need to strip metadata in original IO buffer.
	 *
	 * This IO request will go through a regular IO flow, so clear memory domains pointers */
	assert(bdev_io->internal.f.has_memory_domain);
	assert(bdev_io_use_memory_domain(bdev_io) ||
	       bdev_io_needs_metadata(bdev_io->internal.desc, bdev_io));

	bdev_io->u.bdev.memory_domain = NULL;
	bdev_io->u.bdev.memory_domain_ctx = NULL;
	_bdev_io_get_bounce_buf(bdev_io, _bdev_memory_domain_get_io_cb,
				bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
}

/* We need to allocate bounce buffer if bdev doesn't support memory domains, or if it does
 * support them, but we need to execute an accel sequence and the data buffer is from accel
 * memory domain (to avoid doing a push/pull from that domain).
static inline void
_bdev_io_ext_use_accel_buffer(struct spdk_bdev_io *bdev_io)
{
	assert(bdev_io_use_memory_domain(bdev_io));
	assert(bdev_io_needs_metadata(bdev_io->internal.desc, bdev_io));

	bdev_io->u.bdev.memory_domain = NULL;
	bdev_io->u.bdev.memory_domain_ctx = NULL;
	bdev_io_get_accel_buf(bdev_io, _bdev_memory_domain_get_io_cb,
			      bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
}

/* We need to allocate bounce buffer
 * - if bdev doesn't support memory domains,
 * -  if it does support them, but we need to execute an accel sequence and the data buffer is
 *    from accel memory domain (to avoid doing a push/pull from that domain), or
 * - if IO is not aware of metadata.
 */
static inline bool
bdev_io_needs_bounce_buffer(struct spdk_bdev_desc *desc, struct spdk_bdev_io *bdev_io)
@@ -3740,9 +3858,30 @@ bdev_io_needs_bounce_buffer(struct spdk_bdev_desc *desc, struct spdk_bdev_io *bd
	if (bdev_io_use_memory_domain(bdev_io)) {
		if (!desc->memory_domains_supported ||
		    (bdev_io_needs_sequence_exec(desc, bdev_io) &&
		     bdev_io->internal.memory_domain == spdk_accel_get_memory_domain())) {
		     (bdev_io->internal.memory_domain == spdk_accel_get_memory_domain() ||
		      bdev_io_needs_metadata(desc, bdev_io)))) {
			return true;
		}

		return false;
	}

	if (bdev_io_needs_metadata(desc, bdev_io)) {
		return true;
	}

	return false;
}

/* We need to allocate fake accel buffer if bdev supports memory domains but IO is not
 * aware of metadata.
 */
static inline bool
bdev_io_needs_accel_buffer(struct spdk_bdev_desc *desc, struct spdk_bdev_io *bdev_io)
{
	if (bdev_io_needs_metadata(desc, bdev_io)) {
		assert(bdev_io_use_memory_domain(bdev_io));
		return true;
	}

	return false;
@@ -3774,6 +3913,11 @@ _bdev_io_submit_ext(struct spdk_bdev_desc *desc, struct spdk_bdev_io *bdev_io)
		return;
	}

	if (bdev_io_needs_accel_buffer(desc, bdev_io)) {
		_bdev_io_ext_use_accel_buffer(bdev_io);
		return;
	}

	if (bdev_io_needs_sequence_exec(desc, bdev_io)) {
		if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
			bdev_io_exec_sequence(bdev_io, bdev_io_submit_sequence_cb);
@@ -5605,7 +5749,7 @@ spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channe
		return -EINVAL;
	}

	if (md_buf && !_is_buf_allocated(&iov)) {
	if ((md_buf || desc->opts.hide_metadata) && !_is_buf_allocated(&iov)) {
		return -EINVAL;
	}

+22 −0
Original line number Diff line number Diff line
@@ -19,4 +19,26 @@ DEFINE_STUB(spdk_accel_append_copy, int,
	     uint32_t dst_iovcnt, struct spdk_memory_domain *dst_domain, void *dst_domain_ctx,
	     struct iovec *src_iovs, uint32_t src_iovcnt, struct spdk_memory_domain *src_domain,
	     void *src_domain_ctx, spdk_accel_step_cb cb_fn, void *cb_arg), 0);
DEFINE_STUB(spdk_accel_append_dif_verify_copy, int,
	    (struct spdk_accel_sequence **seq, struct spdk_io_channel *ch,
	     struct iovec *dst_iovs, size_t dst_iovcnt,
	     struct spdk_memory_domain *dst_domain, void *dst_domain_ctx,
	     struct iovec *src_iovs, size_t src_iovcnt,
	     struct spdk_memory_domain *src_domain, void *src_domain_ctx,
	     uint32_t num_blocks,
	     const struct spdk_dif_ctx *ctx, struct spdk_dif_error *err,
	     spdk_accel_step_cb cb_fn, void *cb_arg), 0);
DEFINE_STUB(spdk_accel_append_dif_generate_copy, int,
	    (struct spdk_accel_sequence **seq,
	     struct spdk_io_channel *ch,
	     struct iovec *dst_iovs, size_t dst_iovcnt,
	     struct spdk_memory_domain *dst_domain, void *dst_domain_ctx,
	     struct iovec *src_iovs, size_t src_iovcnt,
	     struct spdk_memory_domain *src_domain, void *src_domain_ctx,
	     uint32_t num_blocks, const struct spdk_dif_ctx *ctx,
	     spdk_accel_step_cb cb_fn, void *cb_arg), 0);
DEFINE_STUB(spdk_accel_get_memory_domain, struct spdk_memory_domain *, (void), NULL);
DEFINE_STUB(spdk_accel_get_buf, int, (struct spdk_io_channel *ch, uint64_t len, void **buf,
				      struct spdk_memory_domain **domain, void **domain_ctx), 0);
DEFINE_STUB_V(spdk_accel_put_buf, (struct spdk_io_channel *ch, void *buf,
				   struct spdk_memory_domain *domain, void *domain_ctx));