Commit 055de83a authored by Anton Nayshtut's avatar Anton Nayshtut Committed by Ben Walker
Browse files

bdev: multiple QoS queues with atomic-based QoS quota



Here's a comparison of the legacy and the atomic-based QoS.

Tested with bdevperf + 1 null or malloc bdev. When the Qos is enabled
the limit set to the value higher than the setup is capable of achieving
(900.000.000 IOPs).

| bdev   | CPU  |    No QoS    | Legacy QoS | Atomic QoS  | LP  | AP  |
|        | Mask |              |   (IOPS)   |    (IOPS)   | (%) | (%) |
|--------|------|--------------|------------|-------------|-----|-----|
| null   | 1    |  10807210.39 | 8720802.93 |  8883633.79 |  19 |  18 |
| null   | 3    |  21065293.41 | 7798114.51 |  8605514.14 |  63 |  59 |
| null   | 7    |  31941220.35 | 8026022.82 | 11057120.85 |  75 |  65 |
| null   | F    |  42510017.40 | 7655892.00 | 11832259.16 |  82 |  72 |
| null   | FF   |  80864836.44 | 7598421.52 | 13096449.71 |  91 |  84 |
| null   | FFF  | 121780455.70 | 7609117.22 | 11030412.77 |  94 |  91 |
| null   | FFFF | 162071300.70 | 7558077.12 | 10826030.57 |  95 |  93 |
| malloc | 1    |   1948096.14 | 1863817.94 |  1881245.75 |   4 |   3 |
| malloc | 3    |   3803549.11 | 2068897.67 |  3155528.57 |  46 |  17 |
| malloc | 7    |   5593798.24 | 2227017.22 |  4387343.78 |  60 |  22 |
| malloc | F    |   7231720.95 | 2281762.70 |  5797832.38 |  68 |  20 |
| malloc | FF   |  12070891.12 | 2614674.11 |  9690367.72 |  78 |  20 |
| malloc | FFF  |  13989626.48 | 2630881.18 | 12840411.99 |  81 |   8 |
| malloc | FFFF |  14405443.97 | 2600775.13 |  9992956.98 |  82 |  31 |

Change-Id: Ia91706b761e085b4231a2fb9a08565c305735b7d
Signed-off-by: default avatarAnton Nayshtut <anayshtut@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/18914


Reviewed-by: default avatarBen Walker <ben@nvidia.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
parent debc2429
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -911,9 +911,6 @@ struct spdk_bdev_io {
		/** The bdev I/O channel that this was handled on. */
		struct spdk_bdev_channel *ch;

		/** The bdev I/O channel that this was submitted on. */
		struct spdk_bdev_channel *io_submit_ch;

		/** The bdev descriptor that was used when submitting this I/O. */
		struct spdk_bdev_desc *desc;

+1 −0
Original line number Diff line number Diff line
#  SPDX-License-Identifier: BSD-3-Clause
#  Copyright (C) 2015 Intel Corporation.
#  Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
#  All rights reserved.
#

+149 −127
Original line number Diff line number Diff line
@@ -176,11 +176,15 @@ struct spdk_bdev_qos_limit {
	/** Maximum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */
	uint32_t max_per_timeslice;

	/** Function to check whether to queue the IO. */
	bool (*queue_io)(const struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io);
	/** Function to check whether to queue the IO.
	 * If The IO is allowed to pass, the quota will be reduced correspondingly.
	 */
	bool (*queue_io)(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io);

	/** Function to update for the submitted IO. */
	void (*update_quota)(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io);
	/** Function to rewind the quota once the IO was allowed to be sent by this
	 * limit but queued due to one of the further limits.
	 */
	void (*rewind_quota)(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io);
};

struct spdk_bdev_qos {
@@ -193,9 +197,6 @@ struct spdk_bdev_qos {
	/** The thread on which the poller is running. */
	struct spdk_thread *thread;

	/** Queue of I/O waiting to be issued. */
	bdev_io_tailq_t queued;

	/** Size of a timeslice in tsc ticks. */
	uint64_t timeslice_size;

@@ -315,6 +316,9 @@ struct spdk_bdev_channel {
	bdev_io_tailq_t		queued_resets;

	lba_range_tailq_t	locked_ranges;

	/** List of I/Os queued by QoS. */
	bdev_io_tailq_t		qos_queued_io;
};

struct media_event_entry {
@@ -2567,66 +2571,101 @@ bdev_get_io_size_in_byte(struct spdk_bdev_io *bdev_io)
	}
}

static bool
bdev_qos_rw_queue_io(const struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
static inline bool
bdev_qos_rw_queue_io(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io, uint64_t delta)
{
	if (limit->max_per_timeslice > 0 && limit->remaining_this_timeslice <= 0) {
		return true;
	} else {
	int64_t remaining_this_timeslice;

	if (!limit->max_per_timeslice) {
		/* The QoS is disabled */
		return false;
	}
}

static bool
bdev_qos_r_queue_io(const struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
{
	if (bdev_is_read_io(io) == false) {
	remaining_this_timeslice = __atomic_sub_fetch(&limit->remaining_this_timeslice, delta,
				   __ATOMIC_RELAXED);
	if (remaining_this_timeslice + (int64_t)delta > 0) {
		/* There was still a quota for this delta -> the IO shouldn't be queued
		 *
		 * We allow a slight quota overrun here so an IO bigger than the per-timeslice
		 * quota can be allowed once a while. Such overrun then taken into account in
		 * the QoS poller, where the next timeslice quota is calculated.
		 */
		return false;
	}

	return bdev_qos_rw_queue_io(limit, io);
	/* There was no quota for this delta -> the IO should be queued
	 * The remaining_this_timeslice must be rewinded so it reflects the real
	 * amount of IOs or bytes allowed.
	 */
	__atomic_add_fetch(
		&limit->remaining_this_timeslice, delta, __ATOMIC_RELAXED);
	return true;
}

static bool
bdev_qos_w_queue_io(const struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
static inline void
bdev_qos_rw_rewind_io(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io, uint64_t delta)
{
	if (bdev_is_read_io(io) == true) {
		return false;
	__atomic_add_fetch(&limit->remaining_this_timeslice, delta, __ATOMIC_RELAXED);
}

	return bdev_qos_rw_queue_io(limit, io);
static bool
bdev_qos_rw_iops_queue(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
{
	return bdev_qos_rw_queue_io(limit, io, 1);
}

static void
bdev_qos_rw_iops_update_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
bdev_qos_rw_iops_rewind_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
{
	limit->remaining_this_timeslice--;
	bdev_qos_rw_rewind_io(limit, io, 1);
}

static void
bdev_qos_rw_bps_update_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
static bool
bdev_qos_rw_bps_queue(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
{
	limit->remaining_this_timeslice -= bdev_get_io_size_in_byte(io);
	return bdev_qos_rw_queue_io(limit, io, bdev_get_io_size_in_byte(io));
}

static void
bdev_qos_r_bps_update_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
bdev_qos_rw_bps_rewind_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
{
	bdev_qos_rw_rewind_io(limit, io, bdev_get_io_size_in_byte(io));
}

static bool
bdev_qos_r_bps_queue(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
{
	if (bdev_is_read_io(io) == false) {
		return;
		return false;
	}

	return bdev_qos_rw_bps_update_quota(limit, io);
	return bdev_qos_rw_bps_queue(limit, io);
}

static void
bdev_qos_w_bps_update_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
bdev_qos_r_bps_rewind_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
{
	if (bdev_is_read_io(io) != false) {
		bdev_qos_rw_rewind_io(limit, io, bdev_get_io_size_in_byte(io));
	}
}

static bool
bdev_qos_w_bps_queue(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
{
	if (bdev_is_read_io(io) == true) {
		return;
		return false;
	}

	return bdev_qos_rw_bps_queue(limit, io);
}

	return bdev_qos_rw_bps_update_quota(limit, io);
static void
bdev_qos_w_bps_rewind_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
{
	if (bdev_is_read_io(io) != true) {
		bdev_qos_rw_rewind_io(limit, io, bdev_get_io_size_in_byte(io));
	}
}

static void
@@ -2637,26 +2676,25 @@ bdev_qos_set_ops(struct spdk_bdev_qos *qos)
	for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
		if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
			qos->rate_limits[i].queue_io = NULL;
			qos->rate_limits[i].update_quota = NULL;
			continue;
		}

		switch (i) {
		case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT:
			qos->rate_limits[i].queue_io = bdev_qos_rw_queue_io;
			qos->rate_limits[i].update_quota = bdev_qos_rw_iops_update_quota;
			qos->rate_limits[i].queue_io = bdev_qos_rw_iops_queue;
			qos->rate_limits[i].rewind_quota = bdev_qos_rw_iops_rewind_quota;
			break;
		case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT:
			qos->rate_limits[i].queue_io = bdev_qos_rw_queue_io;
			qos->rate_limits[i].update_quota = bdev_qos_rw_bps_update_quota;
			qos->rate_limits[i].queue_io = bdev_qos_rw_bps_queue;
			qos->rate_limits[i].rewind_quota = bdev_qos_rw_bps_rewind_quota;
			break;
		case SPDK_BDEV_QOS_R_BPS_RATE_LIMIT:
			qos->rate_limits[i].queue_io = bdev_qos_r_queue_io;
			qos->rate_limits[i].update_quota = bdev_qos_r_bps_update_quota;
			qos->rate_limits[i].queue_io = bdev_qos_r_bps_queue;
			qos->rate_limits[i].rewind_quota = bdev_qos_r_bps_rewind_quota;
			break;
		case SPDK_BDEV_QOS_W_BPS_RATE_LIMIT:
			qos->rate_limits[i].queue_io = bdev_qos_w_queue_io;
			qos->rate_limits[i].update_quota = bdev_qos_w_bps_update_quota;
			qos->rate_limits[i].queue_io = bdev_qos_w_bps_queue;
			qos->rate_limits[i].rewind_quota = bdev_qos_w_bps_rewind_quota;
			break;
		default:
			break;
@@ -2731,28 +2769,19 @@ bdev_qos_queue_io(struct spdk_bdev_qos *qos, struct spdk_bdev_io *bdev_io)

			if (qos->rate_limits[i].queue_io(&qos->rate_limits[i],
							 bdev_io) == true) {
				return true;
			}
		}
		for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
			if (!qos->rate_limits[i].update_quota) {
				for (i -= 1; i >= 0 ; i--) {
					if (!qos->rate_limits[i].queue_io) {
						continue;
					}

			qos->rate_limits[i].update_quota(&qos->rate_limits[i], bdev_io);
					qos->rate_limits[i].rewind_quota(&qos->rate_limits[i], bdev_io);
				}
				return true;
			}
		}

	return false;
	}

static inline void
_bdev_io_do_submit(void *ctx)
{
	struct spdk_bdev_io *bdev_io = ctx;
	struct spdk_bdev_channel *ch = bdev_io->internal.ch;

	bdev_io_do_submit(ch, bdev_io);
	return false;
}

static int
@@ -2761,19 +2790,10 @@ bdev_qos_io_submit(struct spdk_bdev_channel *ch, struct spdk_bdev_qos *qos)
	struct spdk_bdev_io		*bdev_io = NULL, *tmp = NULL;
	int				submitted_ios = 0;

	TAILQ_FOREACH_SAFE(bdev_io, &qos->queued, internal.link, tmp) {
	TAILQ_FOREACH_SAFE(bdev_io, &ch->qos_queued_io, internal.link, tmp) {
		if (!bdev_qos_queue_io(qos, bdev_io)) {
			TAILQ_REMOVE(&qos->queued, bdev_io, internal.link);

			if (bdev_io->internal.io_submit_ch) {
				/* Send back the IO to the original thread for the actual processing. */
				bdev_io->internal.ch = bdev_io->internal.io_submit_ch;
				bdev_io->internal.io_submit_ch = NULL;
				spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
						     _bdev_io_do_submit, bdev_io);
			} else {
			TAILQ_REMOVE(&ch->qos_queued_io, bdev_io, internal.link);
			bdev_io_do_submit(ch, bdev_io);
			}

			submitted_ios++;
		}
@@ -3427,10 +3447,10 @@ _bdev_io_submit(void *ctx)
		_bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_ABORTED);
	} else if (bdev_ch->flags & BDEV_CH_QOS_ENABLED) {
		if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT) &&
		    bdev_abort_queued_io(&bdev->internal.qos->queued, bdev_io->u.abort.bio_to_abort)) {
		    bdev_abort_queued_io(&bdev_ch->qos_queued_io, bdev_io->u.abort.bio_to_abort)) {
			_bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
		} else {
			TAILQ_INSERT_TAIL(&bdev->internal.qos->queued, bdev_io, internal.link);
			TAILQ_INSERT_TAIL(&bdev_ch->qos_queued_io, bdev_io, internal.link);
			bdev_qos_io_submit(bdev_ch, bdev->internal.qos);
		}
	} else {
@@ -3500,10 +3520,8 @@ void
bdev_io_submit(struct spdk_bdev_io *bdev_io)
{
	struct spdk_bdev *bdev = bdev_io->bdev;
	struct spdk_thread *thread = spdk_bdev_io_get_thread(bdev_io);
	struct spdk_bdev_channel *ch = bdev_io->internal.ch;

	assert(thread != NULL);
	assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);

	if (!TAILQ_EMPTY(&ch->locked_ranges)) {
@@ -3530,18 +3548,8 @@ bdev_io_submit(struct spdk_bdev_io *bdev_io)
		return;
	}

	if (ch->flags & BDEV_CH_QOS_ENABLED) {
		if ((thread == bdev->internal.qos->thread) || !bdev->internal.qos->thread) {
			_bdev_io_submit(bdev_io);
		} else {
			bdev_io->internal.io_submit_ch = ch;
			bdev_io->internal.ch = bdev->internal.qos->ch;
			spdk_thread_send_msg(bdev->internal.qos->thread, _bdev_io_submit, bdev_io);
		}
	} else {
	_bdev_io_submit(bdev_io);
}
}

static inline void
_bdev_io_ext_use_bounce_buffer(struct spdk_bdev_io *bdev_io)
@@ -3619,7 +3627,6 @@ bdev_io_init(struct spdk_bdev_io *bdev_io,
	bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
	bdev_io->internal.in_submit_request = false;
	bdev_io->internal.buf = NULL;
	bdev_io->internal.io_submit_ch = NULL;
	bdev_io->internal.orig_iovs = NULL;
	bdev_io->internal.orig_iovcnt = 0;
	bdev_io->internal.orig_md_iov.iov_base = NULL;
@@ -3696,18 +3703,44 @@ bdev_qos_update_max_quota_per_timeslice(struct spdk_bdev_qos *qos)
		qos->rate_limits[i].max_per_timeslice = spdk_max(max_per_timeslice,
							qos->rate_limits[i].min_per_timeslice);

		qos->rate_limits[i].remaining_this_timeslice = qos->rate_limits[i].max_per_timeslice;
		__atomic_store_n(&qos->rate_limits[i].remaining_this_timeslice,
				 qos->rate_limits[i].max_per_timeslice, __ATOMIC_RELEASE);
	}

	bdev_qos_set_ops(qos);
}

static void
bdev_channel_submit_qos_io(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
			   struct spdk_io_channel *io_ch, void *ctx)
{
	struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(io_ch);
	int status;

	bdev_qos_io_submit(bdev_ch, bdev->internal.qos);

	/* if all IOs were sent then continue the iteration, otherwise - stop it */
	/* TODO: channels round robing */
	status = TAILQ_EMPTY(&bdev_ch->qos_queued_io) ? 0 : 1;

	spdk_bdev_for_each_channel_continue(i, status);
}


static void
bdev_channel_submit_qos_io_done(struct spdk_bdev *bdev, void *ctx, int status)
{

}

static int
bdev_channel_poll_qos(void *arg)
{
	struct spdk_bdev_qos *qos = arg;
	struct spdk_bdev *bdev = arg;
	struct spdk_bdev_qos *qos = bdev->internal.qos;
	uint64_t now = spdk_get_ticks();
	int i;
	int64_t remaining_last_timeslice;

	if (now < (qos->last_timeslice + qos->timeslice_size)) {
		/* We received our callback earlier than expected - return
@@ -3725,20 +3758,30 @@ bdev_channel_poll_qos(void *arg)
		 * here, we'll account for the overrun so that the next timeslice will
		 * be appropriately reduced.
		 */
		if (qos->rate_limits[i].remaining_this_timeslice > 0) {
			qos->rate_limits[i].remaining_this_timeslice = 0;
		remaining_last_timeslice = __atomic_exchange_n(&qos->rate_limits[i].remaining_this_timeslice,
					   0, __ATOMIC_RELAXED);
		if (remaining_last_timeslice < 0) {
			/* There could be a race condition here as both bdev_qos_rw_queue_io() and bdev_channel_poll_qos()
			 * potentially use 2 atomic ops each, so they can intertwine.
			 * This race can potentialy cause the limits to be a little fuzzy but won't cause any real damage.
			 */
			__atomic_store_n(&qos->rate_limits[i].remaining_this_timeslice,
					 remaining_last_timeslice, __ATOMIC_RELAXED);
		}
	}

	while (now >= (qos->last_timeslice + qos->timeslice_size)) {
		qos->last_timeslice += qos->timeslice_size;
		for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
			qos->rate_limits[i].remaining_this_timeslice +=
				qos->rate_limits[i].max_per_timeslice;
			__atomic_add_fetch(&qos->rate_limits[i].remaining_this_timeslice,
					   qos->rate_limits[i].max_per_timeslice, __ATOMIC_RELAXED);
		}
	}

	return bdev_qos_io_submit(qos->ch, qos);
	spdk_bdev_for_each_channel(bdev, bdev_channel_submit_qos_io, qos,
				   bdev_channel_submit_qos_io_done);

	return SPDK_POLLER_BUSY;
}

static void
@@ -3804,8 +3847,6 @@ bdev_enable_qos(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch)

			qos->thread = spdk_io_channel_get_thread(io_ch);

			TAILQ_INIT(&qos->queued);

			for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
				if (bdev_qos_is_iops_rate_limit(i) == true) {
					qos->rate_limits[i].min_per_timeslice =
@@ -3824,7 +3865,7 @@ bdev_enable_qos(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch)
				SPDK_BDEV_QOS_TIMESLICE_IN_USEC * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
			qos->last_timeslice = spdk_get_ticks();
			qos->poller = SPDK_POLLER_REGISTER(bdev_channel_poll_qos,
							   qos,
							   bdev,
							   SPDK_BDEV_QOS_TIMESLICE_IN_USEC);
		}

@@ -4034,6 +4075,7 @@ bdev_channel_create(void *io_device, void *ctx_buf)
	ch->io_outstanding = 0;
	TAILQ_INIT(&ch->queued_resets);
	TAILQ_INIT(&ch->locked_ranges);
	TAILQ_INIT(&ch->qos_queued_io);
	ch->flags = 0;
	ch->shared_resource = shared_resource;

@@ -4244,7 +4286,6 @@ bdev_qos_destroy(struct spdk_bdev *bdev)
	new_qos->ch = NULL;
	new_qos->thread = NULL;
	new_qos->poller = NULL;
	TAILQ_INIT(&new_qos->queued);
	/*
	 * The limit member of spdk_bdev_qos_limit structure is not zeroed.
	 * It will be used later for the new QoS structure.
@@ -6335,15 +6376,7 @@ bdev_reset_freeze_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bd
	channel->flags |= BDEV_CH_RESET_IN_PROGRESS;

	if ((channel->flags & BDEV_CH_QOS_ENABLED) != 0) {
		/* The QoS object is always valid and readable while
		 * the channel flag is set, so the lock here should not
		 * be necessary. We're not in the fast path though, so
		 * just take it anyway. */
		spdk_spin_lock(&channel->bdev->internal.spinlock);
		if (channel->bdev->internal.qos->ch == channel) {
			TAILQ_SWAP(&channel->bdev->internal.qos->queued, &tmp_queued, spdk_bdev_io, internal.link);
		}
		spdk_spin_unlock(&channel->bdev->internal.spinlock);
		TAILQ_SWAP(&channel->qos_queued_io, &tmp_queued, spdk_bdev_io, internal.link);
	}

	bdev_abort_all_queued_io(&shared_resource->nomem_io, channel);
@@ -8951,7 +8984,6 @@ bdev_disable_qos_done(void *cb_arg)
{
	struct set_qos_limit_ctx *ctx = cb_arg;
	struct spdk_bdev *bdev = ctx->bdev;
	struct spdk_bdev_io *bdev_io;
	struct spdk_bdev_qos *qos;

	spdk_spin_lock(&bdev->internal.spinlock);
@@ -8959,24 +8991,6 @@ bdev_disable_qos_done(void *cb_arg)
	bdev->internal.qos = NULL;
	spdk_spin_unlock(&bdev->internal.spinlock);

	while (!TAILQ_EMPTY(&qos->queued)) {
		/* Send queued I/O back to their original thread for resubmission. */
		bdev_io = TAILQ_FIRST(&qos->queued);
		TAILQ_REMOVE(&qos->queued, bdev_io, internal.link);

		if (bdev_io->internal.io_submit_ch) {
			/*
			 * Channel was changed when sending it to the QoS thread - change it back
			 *  before sending it back to the original thread.
			 */
			bdev_io->internal.ch = bdev_io->internal.io_submit_ch;
			bdev_io->internal.io_submit_ch = NULL;
		}

		spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
				     _bdev_io_submit, bdev_io);
	}

	if (qos->thread != NULL) {
		spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch));
		spdk_poller_unregister(&qos->poller);
@@ -9009,9 +9023,17 @@ bdev_disable_qos_msg(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
		     struct spdk_io_channel *ch, void *_ctx)
{
	struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(ch);
	struct spdk_bdev_io *bdev_io;

	bdev_ch->flags &= ~BDEV_CH_QOS_ENABLED;

	while (!TAILQ_EMPTY(&bdev_ch->qos_queued_io)) {
		/* Re-submit the queued I/O. */
		bdev_io = TAILQ_FIRST(&bdev_ch->qos_queued_io);
		TAILQ_REMOVE(&bdev_ch->qos_queued_io, bdev_io, internal.link);
		_bdev_io_submit(bdev_io);
	}

	spdk_bdev_for_each_channel_continue(i, 0);
}

+2 −3
Original line number Diff line number Diff line
@@ -1000,7 +1000,6 @@ basic_qos(void)
	bdev = &g_bdev.bdev;
	bdev->internal.qos = calloc(1, sizeof(*bdev->internal.qos));
	SPDK_CU_ASSERT_FATAL(bdev->internal.qos != NULL);
	TAILQ_INIT(&bdev->internal.qos->queued);
	/*
	 * Enable read/write IOPS, read only byte per second and
	 * read/write byte per second rate limits.
@@ -1175,7 +1174,7 @@ io_during_qos_queue(void)
	bdev = &g_bdev.bdev;
	bdev->internal.qos = calloc(1, sizeof(*bdev->internal.qos));
	SPDK_CU_ASSERT_FATAL(bdev->internal.qos != NULL);
	TAILQ_INIT(&bdev->internal.qos->queued);

	/*
	 * Enable read/write IOPS, read only byte per sec, write only
	 * byte per sec and read/write byte per sec rate limits.
@@ -1278,7 +1277,7 @@ io_during_qos_reset(void)
	bdev = &g_bdev.bdev;
	bdev->internal.qos = calloc(1, sizeof(*bdev->internal.qos));
	SPDK_CU_ASSERT_FATAL(bdev->internal.qos != NULL);
	TAILQ_INIT(&bdev->internal.qos->queued);

	/*
	 * Enable read/write IOPS, write only byte per sec and
	 * read/write byte per second rate limits.