Commit 28bcf6a7 authored by Konrad Sztyber's avatar Konrad Sztyber Committed by Jim Harris
Browse files

bdev: retry IOs on ENOMEM from pull/append_copy



The IOs will now be retried after ENOMEM is received when doing memory
domain pull or appending an accel copy.  The retries are performed using
the mechanism that's already in place for IOs completed with
SPDK_BDEV_IO_STATUS_NOMEM.

Signed-off-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I284643bf9971338094e14617974f7511f745f24e
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/17761


Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 7952ef88
Loading
Loading
Loading
Loading
+53 −9
Original line number Diff line number Diff line
@@ -368,6 +368,8 @@ struct spdk_bdev_io_error_stat {

enum bdev_io_retry_state {
	BDEV_IO_RETRY_STATE_INVALID,
	BDEV_IO_RETRY_STATE_PULL,
	BDEV_IO_RETRY_STATE_PULL_MD,
	BDEV_IO_RETRY_STATE_SUBMIT,
};

@@ -414,6 +416,8 @@ static bool claim_type_is_v2(enum spdk_bdev_claim_type type);
static void bdev_desc_release_claims(struct spdk_bdev_desc *desc);
static void claim_reset(struct spdk_bdev *bdev);

static void bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch);

#define bdev_get_ext_io_opt(opts, field, defval) \
	(((opts) != NULL && offsetof(struct spdk_bdev_ext_io_opts, field) + \
	 sizeof((opts)->field) <= sizeof(*(opts))) ? (opts)->field : (defval))
@@ -1061,6 +1065,11 @@ bdev_io_exec_sequence_cb(void *ctx, int status)

	TAILQ_REMOVE(&bdev_io->internal.ch->io_accel_exec, bdev_io, internal.link);
	bdev_io_decrement_outstanding(ch, ch->shared_resource);

	if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
		bdev_ch_retry_io(ch);
	}

	bdev_io->internal.data_transfer_cpl(bdev_io, status);
}

@@ -1126,6 +1135,11 @@ bdev_io_pull_md_buf_done(void *ctx, int status)

	TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
	bdev_io_decrement_outstanding(ch, ch->shared_resource);

	if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
		bdev_ch_retry_io(ch);
	}

	assert(bdev_io->internal.data_transfer_cpl);
	bdev_io->internal.data_transfer_cpl(bdev_io, status);
}
@@ -1151,8 +1165,11 @@ bdev_io_pull_md_buf(struct spdk_bdev_io *bdev_io)
			}
			bdev_io_decrement_outstanding(ch, ch->shared_resource);
			TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
			if (rc != -ENOMEM) {
				SPDK_ERRLOG("Failed to pull data from memory domain %s, rc %d\n",
				    spdk_memory_domain_get_dma_device_id(bdev_io->internal.memory_domain), rc);
					    spdk_memory_domain_get_dma_device_id(
						    bdev_io->internal.memory_domain), rc);
			}
		} else {
			memcpy(bdev_io->internal.bounce_md_iov.iov_base,
			       bdev_io->internal.orig_md_iov.iov_base,
@@ -1160,9 +1177,13 @@ bdev_io_pull_md_buf(struct spdk_bdev_io *bdev_io)
		}
	}

	if (spdk_unlikely(rc == -ENOMEM)) {
		bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL_MD);
	} else {
		assert(bdev_io->internal.data_transfer_cpl);
		bdev_io->internal.data_transfer_cpl(bdev_io, rc);
	}
}

static void
_bdev_io_pull_bounce_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len)
@@ -1228,6 +1249,10 @@ _bdev_io_pull_bounce_data_buf_done(void *ctx, int status)
	TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
	bdev_io_decrement_outstanding(ch, ch->shared_resource);

	if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
		bdev_ch_retry_io(ch);
	}

	bdev_io_pull_bounce_data_buf_done(ctx, status);
}

@@ -1261,7 +1286,7 @@ bdev_io_pull_data(struct spdk_bdev_io *bdev_io)
						    NULL, NULL, 0, NULL, NULL);
		}

		if (spdk_unlikely(rc != 0)) {
		if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
			SPDK_ERRLOG("Failed to append copy to accel sequence: %p\n",
				    bdev_io->internal.accel_sequence);
		}
@@ -1283,8 +1308,11 @@ bdev_io_pull_data(struct spdk_bdev_io *bdev_io)
			}
			TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
			bdev_io_decrement_outstanding(ch, ch->shared_resource);
			if (rc != -ENOMEM) {
				SPDK_ERRLOG("Failed to pull data from memory domain %s\n",
				    spdk_memory_domain_get_dma_device_id(bdev_io->internal.memory_domain));
					    spdk_memory_domain_get_dma_device_id(
						    bdev_io->internal.memory_domain));
			}
		} else {
			assert(bdev_io->u.bdev.iovcnt == 1);
			spdk_copy_iovs_to_buf(bdev_io->u.bdev.iovs[0].iov_base,
@@ -1294,13 +1322,19 @@ bdev_io_pull_data(struct spdk_bdev_io *bdev_io)
		}
	}

	if (spdk_unlikely(rc == -ENOMEM)) {
		bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL);
	} else {
		bdev_io_pull_bounce_data_buf_done(bdev_io, rc);
	}
}

static void
_bdev_io_pull_bounce_data_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len,
			      bdev_copy_bounce_buffer_cpl cpl_cb)
{
	struct spdk_bdev_shared_resource *shared_resource = bdev_io->internal.ch->shared_resource;

	bdev_io->internal.data_transfer_cpl = cpl_cb;
	/* save original iovec */
	bdev_io->internal.orig_iovs = bdev_io->u.bdev.iovs;
@@ -1312,8 +1346,12 @@ _bdev_io_pull_bounce_data_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t le
	bdev_io->u.bdev.iovs[0].iov_base = buf;
	bdev_io->u.bdev.iovs[0].iov_len = len;

	if (spdk_unlikely(!TAILQ_EMPTY(&shared_resource->nomem_io))) {
		bdev_queue_nomem_io_tail(shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL);
	} else {
		bdev_io_pull_data(bdev_io);
	}
}

static void
_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t len)
@@ -1438,6 +1476,12 @@ bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch)
		case BDEV_IO_RETRY_STATE_SUBMIT:
			bdev_ch_resubmit_io(bdev_ch, bdev_io);
			break;
		case BDEV_IO_RETRY_STATE_PULL:
			bdev_io_pull_data(bdev_io);
			break;
		case BDEV_IO_RETRY_STATE_PULL_MD:
			bdev_io_pull_md_buf(bdev_io);
			break;
		default:
			assert(0 && "invalid retry state");
			break;
+33 −1
Original line number Diff line number Diff line
@@ -5795,7 +5795,7 @@ bdev_io_ext_bounce_buffer(void)
	struct spdk_io_channel *io_ch;
	char io_buf[512];
	struct iovec iov = { .iov_base = io_buf, .iov_len = 512 };
	struct ut_expected_io *expected_io;
	struct ut_expected_io *expected_io, *aux_io;
	struct spdk_bdev_ext_io_opts ext_io_opts = {
		.metadata = (void *)0xFF000000,
		.size = sizeof(ext_io_opts)
@@ -5849,6 +5849,38 @@ bdev_io_ext_bounce_buffer(void)
	stub_complete_io(1);
	CU_ASSERT(g_io_done == true);

	/* Verify the request is queued after receiving ENOMEM from pull */
	g_io_done = false;
	aux_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1);
	ut_expected_io_set_iov(aux_io, 0, iov.iov_base, iov.iov_len);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, aux_io, link);
	rc = spdk_bdev_writev_blocks(desc, io_ch, &iov, 1, 32, 14, io_done, NULL);
	CU_ASSERT(rc == 0);
	CU_ASSERT(g_io_done == false);
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);

	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_WRITE, 32, 14, 1);
	ut_expected_io_set_iov(expected_io, 0, iov.iov_base, iov.iov_len);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	MOCK_SET(spdk_memory_domain_pull_data, -ENOMEM);
	rc = spdk_bdev_writev_blocks_ext(desc, io_ch, &iov, 1, 32, 14, io_done, NULL, &ext_io_opts);
	CU_ASSERT(rc == 0);
	CU_ASSERT(g_io_done == false);
	/* The second IO has been queued */
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);

	MOCK_CLEAR(spdk_memory_domain_pull_data);
	g_memory_domain_pull_data_called = false;
	stub_complete_io(1);
	CU_ASSERT(g_io_done == true);
	CU_ASSERT(g_memory_domain_pull_data_called == true);
	/* The second IO should be submitted now */
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
	g_io_done = false;
	stub_complete_io(1);
	CU_ASSERT(g_io_done == true);

	spdk_put_io_channel(io_ch);
	spdk_bdev_close(desc);
	free_bdev(bdev);