Commit 98eca6fa authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Jim Harris
Browse files

lib/thread: Add API to register a post poller handler



This patch aims to improve doorbell management.
SPDK pollers are executed one after other, there is no
way to controll its execution time.
In a case when some application has 3 active pollers
where each poller represents a HW queue (e.g. nvmf_tgt
with nvme_tcp backend and accel module), time interval
between 2 poller invokations might be much more than
average request latency.
That makes doorbells managment more complicated bcs
the rule of thumb is to submit a batch of commands
to a HW and typical design is to ring doorbells once
per poller invokation to flush any tasks submitted
by other pollers. With this approach (flush once per
poll) HW stalls for significant amount of time.
The best solution would be to flush requests to the
HW once a batch of requests is in the submission
queue. But that is another poller which submits
work to the queue, e.g. nvmf_tgt reaps a new request
and submits it do bdev and bdev submits crypto task.
That is done in the context of nvmf_tgt poller but
task is eventually submitted to HW in the context of
accel poller. We need a mechanism to notify accel
module that nvmf_tgt finishes a batch in order to
flush submission queue. But we can go in another
direction - code in accel module may ask nvmf_tgt
poller to flush doorbells once it submits the
last request in the batch. With some abstraction
we can achieve it by introducing generic API
which registers a one-shot callback which a poller
calls once it exits its routine task. In that
case accel module can register a callback to flush
SQ if a new request is submitted. This patch adds
such API
It allows to ring doorbels (and in the future to
flush sockets) in more optimal way and significantly
improve performance in highly loaded applications.

Signed-off-by: default avatarAlexey Marchuk <alexeymar@nvidia.com>
Change-Id: I026cf931f6e56bfbb66422738a90f71525516bf7
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/24703


Reviewed-by: default avatarJim Harris <jim.harris@nvidia.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Community-CI: Community CI Samsung <spdk.community.ci.samsung@gmail.com>
parent 2c140f58
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -1292,6 +1292,17 @@ typedef void (*spdk_iobuf_get_stats_cb)(struct spdk_iobuf_module_stats *modules,
 */
int spdk_iobuf_get_stats(spdk_iobuf_get_stats_cb cb_fn, void *cb_arg);

typedef void (*spdk_post_poller_fn)(void *fn_arg);

/**
 * Register a function to be called after the current SPDK poller has completed. Once called,
 * this function is de-registered and won't called until the next registration call.
 *
 * \param fn Function to call
 * \param fn_arg Function argument
 */
void spdk_thread_register_post_poller_handler(spdk_post_poller_fn fn, void *fn_arg);

#ifdef __cplusplus
}
#endif
+2 −1
Original line number Diff line number Diff line
@@ -42,6 +42,7 @@
	spdk_poller_pause;
	spdk_poller_resume;
	spdk_poller_register_interrupt;
	spdk_thread_register_post_poller_handler;
	spdk_io_device_register;
	spdk_io_device_unregister;
	spdk_get_io_channel;
+46 −1
Original line number Diff line number Diff line
/*   SPDX-License-Identifier: BSD-3-Clause
 *   Copyright (C) 2016 Intel Corporation.
 *   All rights reserved.
 *   Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *   Copyright (c) 2022, 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 */

#include "spdk/stdinc.h"
@@ -103,6 +103,13 @@ enum spdk_thread_state {
	SPDK_THREAD_STATE_EXITED,
};

struct spdk_thread_post_poller_handler {
	spdk_post_poller_fn fn;
	void *fn_arg;
};

#define SPDK_THREAD_MAX_POST_POLLER_HANDLERS (4)

struct spdk_thread {
	uint64_t			tsc_last;
	struct spdk_thread_stats	stats;
@@ -124,7 +131,9 @@ struct spdk_thread {
	 * queues) or unregistered.
	 */
	TAILQ_HEAD(paused_pollers_head, spdk_poller)	paused_pollers;
	struct spdk_thread_post_poller_handler		pp_handlers[SPDK_THREAD_MAX_POST_POLLER_HANDLERS];
	struct spdk_ring		*messages;
	uint8_t				num_pp_handlers;
	int				msg_fd;
	SLIST_HEAD(, spdk_msg)		msg_cache;
	size_t				msg_cache_count;
@@ -1075,6 +1084,22 @@ thread_execute_timed_poller(struct spdk_thread *thread, struct spdk_poller *poll
	return rc;
}

static inline void
thread_run_pp_handlers(struct spdk_thread *thread)
{
	uint8_t i, count = thread->num_pp_handlers;

	/* Set to max value to prevent new handlers registration within the callback */
	thread->num_pp_handlers = SPDK_THREAD_MAX_POST_POLLER_HANDLERS;

	for (i = 0; i < count; i++) {
		thread->pp_handlers[i].fn(thread->pp_handlers[i].fn_arg);
		thread->pp_handlers[i].fn = NULL;
	}

	thread->num_pp_handlers = 0;
}

static int
thread_poll(struct spdk_thread *thread, uint32_t max_msgs, uint64_t now)
{
@@ -1105,6 +1130,9 @@ thread_poll(struct spdk_thread *thread, uint32_t max_msgs, uint64_t now)
		if (poller_rc > rc) {
			rc = poller_rc;
		}
		if (thread->num_pp_handlers) {
			thread_run_pp_handlers(thread);
		}
	}

	poller = thread->first_timed_poller;
@@ -3151,4 +3179,21 @@ spdk_spin_held(struct spdk_spinlock *sspin)
	return sspin->thread == thread;
}

void
spdk_thread_register_post_poller_handler(spdk_post_poller_fn fn, void *fn_arg)
{
	struct spdk_thread *thr;

	thr = _get_thread();
	assert(thr);
	if (spdk_unlikely(thr->num_pp_handlers == SPDK_THREAD_MAX_POST_POLLER_HANDLERS)) {
		SPDK_ERRLOG("Too many handlers registered");
		return;
	}

	thr->pp_handlers[thr->num_pp_handlers].fn = fn;
	thr->pp_handlers[thr->num_pp_handlers].fn_arg = fn_arg;
	thr->num_pp_handlers++;
}

SPDK_LOG_REGISTER_COMPONENT(thread)