Commit 10808e45 authored by paul luse's avatar paul luse Committed by Jim Harris
Browse files

idxd: refactor flow control for idxd engine



Recent work identified race conditions having to do with the
dynamic flow control mechanism for the idxd engine. In order
to both address the issue and simplify the code a new scheme
is now in place.  Essentially every DSA device will be allowed
to accomodate 8 channels and each channel will get a fixed 1/8
the number of work queue entries regardless of how many
channels there are.  Assignment of channels to devices is round
robin and if/when no more channels can be accommodated the get
channel request will fail.

The performance tests also revealed another issue that was
masked before, it's a one-line so is in this patch for convenience.
In the idxd poller we limit the number of completions allowed
during one run to avoid the poller thread from starving other
threads since as operations complete on this thread they are
immediately replaced up to the limit for the channel.

Signed-off-by: default avatarpaul luse <paul.e.luse@intel.com>
Change-Id: I913e809a934b562feb495815a9b9c605d622285c
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8171


Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent 570c8bb4
Loading
Loading
Loading
Loading
+5 −15
Original line number Diff line number Diff line
@@ -69,14 +69,6 @@ struct idxd_batch;
 */
int spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan);

/**
 * Reconfigures this channel based on how many current channels there are.
 *
 * \param chan IDXD channel to be set.
 * \return 0 on success, negative errno on failure.
 */
int spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan);

/**
 * Signature for callback function invoked when a request is completed.
 *
@@ -431,7 +423,6 @@ int spdk_idxd_process_events(struct spdk_idxd_io_channel *chan);
 * Returns an IDXD channel for a given IDXD device.
 *
 * \param idxd IDXD device to get a channel for.
 *
 * \return pointer to an IDXD channel.
 */
struct spdk_idxd_io_channel *spdk_idxd_get_channel(struct spdk_idxd_device *idxd);
@@ -440,17 +431,16 @@ struct spdk_idxd_io_channel *spdk_idxd_get_channel(struct spdk_idxd_device *idxd
 * Free an IDXD channel.
 *
 * \param chan IDXD channel to free.
 * \return true if the underlying device needs a rebalance
 */
bool spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan);
void spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan);

/**
 * Determine if the idxd device needs rebalancing.
 * Get the max number of outstanding operations supported by this channel.
 *
 * \param idxd IDXD device.
 * \return true if rebalance is needed, false if not.
 * \param chan IDXD channel to communicate on.
 * \return max number of operations supported.
 */
bool spdk_idxd_device_needs_rebalance(struct spdk_idxd_device *idxd);
int spdk_idxd_chan_get_max_operations(struct spdk_idxd_io_channel *chan);

#ifdef __cplusplus
}
+36 −91
Original line number Diff line number Diff line
@@ -46,6 +46,12 @@

#define ALIGN_4K 0x1000
#define USERSPACE_DRIVER_NAME "user"
#define CHAN_PER_DEVICE(total_wq_size) ((total_wq_size >= 128) ? 8 : 4)
/*
 * Need to limit how many completions we reap in one poller to avoid starving
 * other threads as callers can submit new operations on the polling thread.
 */
#define MAX_COMPLETIONS_PER_POLL 16

static STAILQ_HEAD(, spdk_idxd_impl) g_idxd_impls = STAILQ_HEAD_INITIALIZER(g_idxd_impls);
static struct spdk_idxd_impl *g_idxd_impl;
@@ -75,12 +81,6 @@ struct device_config g_dev_cfg1 = {
	.total_engines = 4,
};

bool
spdk_idxd_device_needs_rebalance(struct spdk_idxd_device *idxd)
{
	return idxd->needs_rebalance;
}

static uint64_t
idxd_read_8(struct spdk_idxd_device *idxd, void *portal, uint32_t offset)
{
@@ -99,11 +99,6 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
		SPDK_ERRLOG("Failed to allocate idxd chan\n");
		return NULL;
	}
	chan->idxd = idxd;

	TAILQ_INIT(&chan->batches);
	TAILQ_INIT(&chan->batch_pool);
	TAILQ_INIT(&chan->comp_ctx_oustanding);

	chan->batch_base = calloc(NUM_BATCHES_PER_CHANNEL, sizeof(struct idxd_batch));
	if (chan->batch_base == NULL) {
@@ -112,36 +107,39 @@ spdk_idxd_get_channel(struct spdk_idxd_device *idxd)
		return NULL;
	}

	pthread_mutex_lock(&idxd->num_channels_lock);
	if (idxd->num_channels == CHAN_PER_DEVICE(idxd->total_wq_size)) {
		/* too many channels sharing this device */
		pthread_mutex_unlock(&idxd->num_channels_lock);
		free(chan->batch_base);
		free(chan);
		return NULL;
	}
	idxd->num_channels++;
	pthread_mutex_unlock(&idxd->num_channels_lock);

	chan->idxd = idxd;
	TAILQ_INIT(&chan->batches);
	TAILQ_INIT(&chan->batch_pool);
	TAILQ_INIT(&chan->comp_ctx_oustanding);

	batch = chan->batch_base;
	for (i = 0 ; i < NUM_BATCHES_PER_CHANNEL ; i++) {
		TAILQ_INSERT_TAIL(&chan->batch_pool, batch, link);
		batch++;
	}

	pthread_mutex_lock(&chan->idxd->num_channels_lock);
	chan->idxd->num_channels++;
	if (chan->idxd->num_channels > 1) {
		chan->idxd->needs_rebalance = true;
	} else {
		chan->idxd->needs_rebalance = false;
	}
	pthread_mutex_unlock(&chan->idxd->num_channels_lock);

	return chan;
}

bool
void
spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan)
{
	struct idxd_batch *batch;
	bool rebalance = false;

	pthread_mutex_lock(&chan->idxd->num_channels_lock);
	assert(chan->idxd->num_channels > 0);
	chan->idxd->num_channels--;
	if (chan->idxd->num_channels > 0) {
		rebalance = true;
	}
	pthread_mutex_unlock(&chan->idxd->num_channels_lock);

	spdk_free(chan->completions);
@@ -154,8 +152,13 @@ spdk_idxd_put_channel(struct spdk_idxd_io_channel *chan)
	}
	free(chan->batch_base);
	free(chan);
}

	return rebalance;
/* returns the total max operations for channel. */
int
spdk_idxd_chan_get_max_operations(struct spdk_idxd_io_channel *chan)
{
	return chan->idxd->total_wq_size / CHAN_PER_DEVICE(chan->idxd->total_wq_size);
}

int
@@ -170,9 +173,8 @@ spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan)
		chan->idxd->wq_id = 0;
	}

	pthread_mutex_lock(&chan->idxd->num_channels_lock);
	num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size / chan->idxd->num_channels;
	pthread_mutex_unlock(&chan->idxd->num_channels_lock);
	num_ring_slots = chan->idxd->queues[chan->idxd->wq_id].wqcfg.wq_size / CHAN_PER_DEVICE(
				 chan->idxd->total_wq_size);

	chan->ring_slots = spdk_bit_array_create(num_ring_slots);
	if (chan->ring_slots == NULL) {
@@ -180,13 +182,7 @@ spdk_idxd_configure_chan(struct spdk_idxd_io_channel *chan)
		return -ENOMEM;
	}

	/*
	 * max ring slots can change as channels come and go but we
	 * start off getting all of the slots for this work queue.
	 */
	chan->max_ring_slots = num_ring_slots;

	/* Store the original size of the ring. */
	/* Store the size of the ring. */
	chan->ring_size = num_ring_slots;

	chan->desc = spdk_zmalloc(num_ring_slots * sizeof(struct idxd_hw_desc),
@@ -250,61 +246,6 @@ err_desc:
	return rc;
}

static void
_idxd_drain(struct spdk_idxd_io_channel *chan)
{
	uint32_t index;
	int set = 0;

	do {
		spdk_idxd_process_events(chan);
		set = 0;
		for (index = 0; index < chan->max_ring_slots; index++) {
			set |= spdk_bit_array_get(chan->ring_slots, index);
		}
	} while (set);
}

int
spdk_idxd_reconfigure_chan(struct spdk_idxd_io_channel *chan)
{
	uint32_t num_ring_slots;
	int rc;

	_idxd_drain(chan);

	assert(spdk_bit_array_count_set(chan->ring_slots) == 0);

	pthread_mutex_lock(&chan->idxd->num_channels_lock);
	assert(chan->idxd->num_channels > 0);
	num_ring_slots = chan->ring_size / chan->idxd->num_channels;
	/* If no change (ie this was a call from another thread doing its for_each_channel,
	 * then we can just bail now.
	 */
	if (num_ring_slots == chan->max_ring_slots) {
		pthread_mutex_unlock(&chan->idxd->num_channels_lock);
		return 0;
	}
	pthread_mutex_unlock(&chan->idxd->num_channels_lock);

	/* re-allocate our descriptor ring for hw flow control. */
	rc = spdk_bit_array_resize(&chan->ring_slots, num_ring_slots);
	if (rc < 0) {
		SPDK_ERRLOG("Unable to resize channel bit array\n");
		return -ENOMEM;
	}

	chan->max_ring_slots = num_ring_slots;

	/*
	 * Note: The batch descriptor ring does not change with the
	 * number of channels as descriptors on this ring do not
	 * "count" for flow control.
	 */

	return rc;
}

static inline struct spdk_idxd_impl *
idxd_get_impl_by_name(const char *impl_name)
{
@@ -1118,6 +1059,10 @@ spdk_idxd_process_events(struct spdk_idxd_io_channel *chan)
	int rc = 0;

	TAILQ_FOREACH_SAFE(comp_ctx, &chan->comp_ctx_oustanding, link, tmp) {
		if (rc == MAX_COMPLETIONS_PER_POLL) {
			break;
		}

		if (IDXD_COMPLETION(comp_ctx->hw.status)) {

			TAILQ_REMOVE(&chan->comp_ctx_oustanding, comp_ctx, link);
+1 −8
Original line number Diff line number Diff line
@@ -119,15 +119,8 @@ struct spdk_idxd_io_channel {
	 * We use one bit array to track ring slots for both
	 * desc and completions.
	 *
	 * TODO: We can get rid of the bit array and just use a uint
	 * to manage flow control as the current implementation saves
	 * enough info in comp_ctx that it doesn't need the index. Keeping
	 * the bit arrays for now as (a) they provide some extra debug benefit
	 * until we have silicon and (b) they may still be needed depending on
	 * polling implementation experiments that we need to run with real silicon.
	 */
	struct spdk_bit_array		*ring_slots;
	uint32_t			max_ring_slots;

	/* Lists of batches, free and in use. */
	TAILQ_HEAD(, idxd_batch)	batch_pool;
@@ -197,7 +190,7 @@ struct spdk_idxd_device {
	void				*portals;
	int				wq_id;
	uint32_t			num_channels;
	bool				needs_rebalance;
	uint32_t			total_wq_size;
	pthread_mutex_t			num_channels_lock;

	struct idxd_group		*groups;
+4 −3
Original line number Diff line number Diff line
@@ -279,6 +279,7 @@ idxd_wq_config(struct spdk_user_idxd_device *user_idxd)
	assert(LOG2_WQ_MAX_BATCH <= user_idxd->registers.gencap.max_batch_shift);
	assert(LOG2_WQ_MAX_XFER <= user_idxd->registers.gencap.max_xfer_shift);

	idxd->total_wq_size = user_idxd->registers.wqcap.total_wq_size;
	idxd->queues = calloc(1, user_idxd->registers.wqcap.num_wqs * sizeof(struct idxd_wq));
	if (idxd->queues == NULL) {
		SPDK_ERRLOG("Failed to allocate queue memory\n");
+1 −2
Original line number Diff line number Diff line
@@ -2,9 +2,8 @@
	global:

	# public functions
	spdk_idxd_device_needs_rebalance;
	spdk_idxd_chan_get_max_operations;
	spdk_idxd_configure_chan;
	spdk_idxd_reconfigure_chan;
	spdk_idxd_probe;
	spdk_idxd_detach;
	spdk_idxd_batch_prep_copy;
Loading