Commit 48ce2c97 authored by GangCao's avatar GangCao Committed by Jim Harris
Browse files

Bdev: remove the QD poller at the time of Bdev unregister



Fix issue: #2561

The issue here is that in the bdev_set_qd_sampling_period RPC
command, the QD sampling period has been set. Then later the
related Desc is closed and in the bdev_close() function the
QD sampling period is reset to 0.

A new QD desc is added as the QD sampling period update could
be handled properly.

Meanwhile, a new QD Poll In Progress flag is also added so as
to indicate there are ongoing events of QD sampling and the
Bdev unregister will be handled in the proper way.

Related test case and unit test also updated for this change.

Change-Id: Iac86c2c6447fe338c7480cf468897fc8f41f8741
Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Signed-off-by: default avatarGangCao <gang.cao@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/13016


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
parent c7db171c
Loading
Loading
Loading
Loading
+9 −0
Original line number Diff line number Diff line
@@ -478,9 +478,15 @@ struct spdk_bdev {
		/** poller for tracking the queue_depth of a device, NULL if not tracking */
		struct spdk_poller *qd_poller;

		/** open descriptor to use qd_poller safely */
		struct spdk_bdev_desc *qd_desc;

		/** period at which we poll for queue depth information */
		uint64_t period;

		/** new period to be used to poll for queue depth information */
		uint64_t new_period;

		/** used to aggregate queue depth while iterating across the bdev's open channels */
		uint64_t temporary_queue_depth;

@@ -496,6 +502,9 @@ struct spdk_bdev {
		/** accumulated I/O statistics for previously deleted channels of this bdev */
		struct spdk_bdev_io_stat stat;

		/** true if tracking the queue_depth of a device is in progress */
		bool	qd_poll_in_progress;

		/** histogram enabled on this bdev */
		bool	histogram_enabled;
		bool	histogram_in_progress;
+1 −1
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk

SO_VER := 9
SO_VER := 10
SO_MINOR := 0

ifeq ($(CONFIG_VTUNE),y)
+67 −15
Original line number Diff line number Diff line
@@ -3925,6 +3925,8 @@ spdk_bdev_get_io_time(const struct spdk_bdev *bdev)
	return bdev->internal.io_time;
}

static void bdev_update_qd_sampling_period(void *ctx);

static void
_calculate_measured_qd_cpl(struct spdk_io_channel_iter *i, int status)
{
@@ -3936,6 +3938,10 @@ _calculate_measured_qd_cpl(struct spdk_io_channel_iter *i, int status)
		bdev->internal.io_time += bdev->internal.period;
		bdev->internal.weighted_io_time += bdev->internal.period * bdev->internal.measured_queue_depth;
	}

	bdev->internal.qd_poll_in_progress = false;

	bdev_update_qd_sampling_period(bdev);
}

static void
@@ -3953,26 +3959,75 @@ static int
bdev_calculate_measured_queue_depth(void *ctx)
{
	struct spdk_bdev *bdev = ctx;

	bdev->internal.qd_poll_in_progress = true;
	bdev->internal.temporary_queue_depth = 0;
	spdk_for_each_channel(__bdev_to_io_dev(bdev), _calculate_measured_qd, bdev,
			      _calculate_measured_qd_cpl);
	return SPDK_POLLER_BUSY;
}

static void
bdev_update_qd_sampling_period(void *ctx)
{
	struct spdk_bdev *bdev = ctx;

	if (bdev->internal.period == bdev->internal.new_period) {
		return;
	}

	if (bdev->internal.qd_poll_in_progress) {
		return;
	}

	bdev->internal.period = bdev->internal.new_period;

	spdk_poller_unregister(&bdev->internal.qd_poller);
	if (bdev->internal.period != 0) {
		bdev->internal.qd_poller = SPDK_POLLER_REGISTER(bdev_calculate_measured_queue_depth,
					   bdev, bdev->internal.period);
	} else {
		spdk_bdev_close(bdev->internal.qd_desc);
		bdev->internal.qd_desc = NULL;
	}
}

static void
_tmp_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
{
	SPDK_NOTICELOG("Unexpected event type: %d\n", type);
}

void
spdk_bdev_set_qd_sampling_period(struct spdk_bdev *bdev, uint64_t period)
{
	bdev->internal.period = period;
	int rc;

	if (bdev->internal.qd_poller != NULL) {
		spdk_poller_unregister(&bdev->internal.qd_poller);
		bdev->internal.measured_queue_depth = UINT64_MAX;
	if (bdev->internal.new_period == period) {
		return;
	}

	if (period != 0) {
		bdev->internal.qd_poller = SPDK_POLLER_REGISTER(bdev_calculate_measured_queue_depth, bdev,
					   period);
	bdev->internal.new_period = period;

	if (bdev->internal.qd_desc != NULL) {
		assert(bdev->internal.period != 0);

		spdk_thread_send_msg(bdev->internal.qd_desc->thread,
				     bdev_update_qd_sampling_period, bdev);
		return;
	}

	assert(bdev->internal.period == 0);

	rc = spdk_bdev_open_ext(spdk_bdev_get_name(bdev), false, _tmp_bdev_event_cb,
				NULL, &bdev->internal.qd_desc);
	if (rc != 0) {
		return;
	}

	bdev->internal.period = period;
	bdev->internal.qd_poller = SPDK_POLLER_REGISTER(bdev_calculate_measured_queue_depth,
				   bdev, period);
}

static void
@@ -6071,6 +6126,9 @@ bdev_register(struct spdk_bdev *bdev)
	}

	bdev->internal.reset_in_progress = NULL;
	bdev->internal.qd_poll_in_progress = false;
	bdev->internal.period = 0;
	bdev->internal.new_period = 0;

	spdk_io_device_register(__bdev_to_io_dev(bdev),
				bdev_channel_create, bdev_channel_destroy,
@@ -6280,18 +6338,14 @@ spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void
	pthread_mutex_unlock(&bdev->internal.mutex);
	pthread_mutex_unlock(&g_bdev_mgr.mutex);

	spdk_bdev_set_qd_sampling_period(bdev, 0);

	spdk_for_each_channel(__bdev_to_io_dev(bdev),
			      bdev_unregister_abort_channel,
			      bdev,
			      bdev_unregister);
}

static void
_tmp_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
{
	SPDK_NOTICELOG("Unexpected event type: %d\n", type);
}

int
spdk_bdev_unregister_by_name(const char *bdev_name, struct spdk_bdev_module *module,
			     spdk_bdev_unregister_cb cb_fn, void *cb_arg)
@@ -6505,8 +6559,6 @@ bdev_close(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc)
		}
	}

	spdk_bdev_set_qd_sampling_period(bdev, 0);

	if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->internal.open_descs)) {
		rc = bdev_unregister_unsafe(bdev);
		pthread_mutex_unlock(&bdev->internal.mutex);
+41 −0
Original line number Diff line number Diff line
@@ -353,6 +353,46 @@ function qos_test_suite() {
	trap - SIGINT SIGTERM EXIT
}

function qd_sampling_function_test() {
	local bdev_name=$1
	local sampling_period=10
	local iostats

	$rpc_py bdev_set_qd_sampling_period $bdev_name $sampling_period

	iostats=$($rpc_py bdev_get_iostat -b $bdev_name)

	qd_sampling_period=$(jq -r '.bdevs[0].queue_depth_polling_period' <<< "$iostats")

	if [ $qd_sampling_period == null ] || [ $qd_sampling_period -ne $sampling_period ]; then
		echo "Qeueue depth polling period is not right"
		$rpc_py bdev_malloc_delete $QD_DEV
		killprocess $QD_PID
		exit 1
	fi
}

function qd_sampling_test_suite() {
	QD_DEV="Malloc_QD"

	"$testdir/bdevperf/bdevperf" -z -m 0x3 -q 256 -o 4096 -w randread -t 5 -C "$env_ctx" &
	QD_PID=$!
	echo "Process bdev QD sampling period testing pid: $QD_PID"
	trap 'cleanup; killprocess $QD_PID; exit 1' SIGINT SIGTERM EXIT
	waitforlisten $QD_PID

	$rpc_py bdev_malloc_create -b $QD_DEV 128 512
	waitforbdev $QD_DEV

	$rootdir/test/bdev/bdevperf/bdevperf.py perform_tests &
	sleep 2
	qd_sampling_function_test $QD_DEV

	$rpc_py bdev_malloc_delete $QD_DEV
	killprocess $QD_PID
	trap - SIGINT SIGTERM EXIT
}

# Inital bdev creation and configuration
#-----------------------------------------------------
QOS_DEV_1="Malloc_0"
@@ -457,6 +497,7 @@ run_test "bdev_write_zeroes" $testdir/bdevperf/bdevperf --json "$conf_file" -q 1

if [[ $test_type == bdev ]]; then
	run_test "bdev_qos" qos_test_suite "$env_ctx"
	run_test "bdev_qd_sampling" qd_sampling_test_suite "$env_ctx"
fi

# Temporarily disabled - infinite loop
+106 −0
Original line number Diff line number Diff line
@@ -4095,6 +4095,111 @@ bdev_set_io_timeout(void)
	poll_threads();
}

static void
bdev_set_qd_sampling(void)
{
	struct spdk_bdev *bdev;
	struct spdk_bdev_desc *desc = NULL;
	struct spdk_io_channel *io_ch = NULL;
	struct spdk_bdev_channel *bdev_ch = NULL;
	struct timeout_io_cb_arg cb_arg;

	spdk_bdev_initialize(bdev_init_cb, NULL);

	bdev = allocate_bdev("bdev");

	CU_ASSERT(spdk_bdev_open_ext("bdev", true, bdev_ut_event_cb, NULL, &desc) == 0);
	SPDK_CU_ASSERT_FATAL(desc != NULL);
	CU_ASSERT(bdev == spdk_bdev_desc_get_bdev(desc));

	io_ch = spdk_bdev_get_io_channel(desc);
	CU_ASSERT(io_ch != NULL);

	bdev_ch = spdk_io_channel_get_ctx(io_ch);
	CU_ASSERT(TAILQ_EMPTY(&bdev_ch->io_submitted));

	/* This is the part1.
	 * We will check the bdev_ch->io_submitted list
	 * TO make sure that it can link IOs and only the user submitted IOs
	 */
	CU_ASSERT(spdk_bdev_read(desc, io_ch, (void *)0x1000, 0, 4096, io_done, NULL) == 0);
	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
	CU_ASSERT(spdk_bdev_write(desc, io_ch, (void *)0x2000, 0, 4096, io_done, NULL) == 0);
	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 2);
	stub_complete_io(1);
	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);
	stub_complete_io(1);
	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 0);

	/* This is the part2.
	 * Test the bdev's qd poller register
	 */
	/* 1st Successfully set the qd sampling period */
	spdk_bdev_set_qd_sampling_period(bdev, 10);
	CU_ASSERT(bdev->internal.new_period == 10);
	CU_ASSERT(bdev->internal.period == 10);
	CU_ASSERT(bdev->internal.qd_desc != NULL);
	poll_threads();
	CU_ASSERT(bdev->internal.qd_poller != NULL);

	/* 2nd Change the qd sampling period */
	spdk_bdev_set_qd_sampling_period(bdev, 20);
	CU_ASSERT(bdev->internal.new_period == 20);
	CU_ASSERT(bdev->internal.period == 10);
	CU_ASSERT(bdev->internal.qd_desc != NULL);
	poll_threads();
	CU_ASSERT(bdev->internal.qd_poller != NULL);
	CU_ASSERT(bdev->internal.period == bdev->internal.new_period);

	/* 3rd Change the qd sampling period and verify qd_poll_in_progress */
	spdk_delay_us(20);
	poll_thread_times(0, 1);
	CU_ASSERT(bdev->internal.qd_poll_in_progress == true);
	spdk_bdev_set_qd_sampling_period(bdev, 30);
	CU_ASSERT(bdev->internal.new_period == 30);
	CU_ASSERT(bdev->internal.period == 20);
	poll_threads();
	CU_ASSERT(bdev->internal.qd_poll_in_progress == false);
	CU_ASSERT(bdev->internal.period == bdev->internal.new_period);

	/* 4th Disable the qd sampling period */
	spdk_bdev_set_qd_sampling_period(bdev, 0);
	CU_ASSERT(bdev->internal.new_period == 0);
	CU_ASSERT(bdev->internal.period == 30);
	poll_threads();
	CU_ASSERT(bdev->internal.qd_poller == NULL);
	CU_ASSERT(bdev->internal.period == bdev->internal.new_period);
	CU_ASSERT(bdev->internal.qd_desc == NULL);

	/* This is the part3.
	 * We will test the submitted IO and reset works
	 * properly with the qd sampling.
	 */
	memset(&cb_arg, 0, sizeof(cb_arg));
	spdk_bdev_set_qd_sampling_period(bdev, 1);
	poll_threads();

	CU_ASSERT(spdk_bdev_write(desc, io_ch, (void *)0x2000, 0, 4096, io_done, NULL) == 0);
	CU_ASSERT(bdev_channel_count_submitted_io(bdev_ch) == 1);

	/* Also include the reset IO */
	memset(&cb_arg, 0, sizeof(cb_arg));
	CU_ASSERT(spdk_bdev_reset(desc, io_ch, io_done, NULL) == 0);
	poll_threads();

	/* Close the desc */
	spdk_put_io_channel(io_ch);
	spdk_bdev_close(desc);

	/* Complete the submitted IO and reset */
	stub_complete_io(2);
	poll_threads();

	free_bdev(bdev);
	spdk_bdev_finish(bdev_fini_cb, NULL);
	poll_threads();
}

static void
lba_range_overlap(void)
{
@@ -5371,6 +5476,7 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, bdev_open_ext);
	CU_ADD_TEST(suite, bdev_open_ext_unregister);
	CU_ADD_TEST(suite, bdev_set_io_timeout);
	CU_ADD_TEST(suite, bdev_set_qd_sampling);
	CU_ADD_TEST(suite, lba_range_overlap);
	CU_ADD_TEST(suite, lock_lba_range_check_ranges);
	CU_ADD_TEST(suite, lock_lba_range_with_io_outstanding);