Commit ed6827ed authored by Jim Harris's avatar Jim Harris
Browse files

bdev: prepopulate per-thread bdev_io cache



This helps prevent starvation in the case where a thread
is started but remains idle, while other threads consume
all of the spdk_bdev_io buffers in the global pool.

This starvation issue is fairly theoretical at this point,
but future patches will be adding the ability for callers
to be notified when an spdk_bdev_io becomes available if
the pool is exhausted.  We will add tests to stress
pool exhaustion at which point this patch will become
much more important.

While here, increase the minimum bdev_io_pool_size to
account for the mgmt_ch getting destroyed and then
immediately created again on the master core.  In this
case there is a window where both channels exist at
once - the one being destroyed won't free its cached
spdk_bdev_ios until the deferred spdk_put_io_channel
event executes.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I3a2fc80bc2bfd78b098bcbfce456d7a433cd64e9

Reviewed-on: https://review.gerrithub.io/415039


Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Tested-by: default avatarSPDK Automated Test System <sys_sgsw@intel.com>
parent b10f6b1c
Loading
Loading
Loading
Loading
+21 −2
Original line number Diff line number Diff line
@@ -264,10 +264,19 @@ spdk_bdev_get_opts(struct spdk_bdev_opts *opts)
int
spdk_bdev_set_opts(struct spdk_bdev_opts *opts)
{
	if (opts->bdev_io_pool_size < opts->bdev_io_cache_size * spdk_thread_get_count()) {
	uint32_t min_pool_size;

	/*
	 * Add 1 to the thread count to account for the extra mgmt_ch that gets created during subsystem
	 *  initialization.  A second mgmt_ch will be created on the same thread when the application starts
	 *  but before the deferred put_io_channel event is executed for the first mgmt_ch.
	 */
	min_pool_size = opts->bdev_io_cache_size * (spdk_thread_get_count() + 1);
	if (opts->bdev_io_pool_size < min_pool_size) {
		SPDK_ERRLOG("bdev_io_pool_size %" PRIu32 " is not compatible with bdev_io_cache_size %" PRIu32
			    " and %" PRIu32 " threads\n", opts->bdev_io_pool_size, opts->bdev_io_cache_size,
			    spdk_thread_get_count());
		SPDK_ERRLOG("bdev_io_pool_size must be at least %" PRIu32 "\n", min_pool_size);
		return -1;
	}

@@ -512,14 +521,24 @@ static int
spdk_bdev_mgmt_channel_create(void *io_device, void *ctx_buf)
{
	struct spdk_bdev_mgmt_channel *ch = ctx_buf;
	struct spdk_bdev_io *bdev_io;
	uint32_t i;

	STAILQ_INIT(&ch->need_buf_small);
	STAILQ_INIT(&ch->need_buf_large);

	STAILQ_INIT(&ch->per_thread_cache);
	ch->per_thread_cache_count = 0;
	ch->bdev_io_cache_size = g_bdev_opts.bdev_io_cache_size;

	/* Pre-populate bdev_io cache to ensure this thread cannot be starved. */
	ch->per_thread_cache_count = 0;
	for (i = 0; i < ch->bdev_io_cache_size; i++) {
		bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
		assert(bdev_io != NULL);
		ch->per_thread_cache_count++;
		STAILQ_INSERT_TAIL(&ch->per_thread_cache, bdev_io, internal.buf_link);
	}

	TAILQ_INIT(&ch->shared_resources);

	return 0;