Commit 38df3ad5 authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Tomasz Zawadzki
Browse files

bdev: Add spdk_bdev_open_async() and use it for bdev_get_bdevs RPC



When the bdev library is finished, a bdev_get_bdevs RPC call may be
still in progress. We should abort it to avoid memory leak.

Poller was created in bdev_rpc.c. If bdev.c aborts poller in bdev_rpc.c,
cross reference occurs and this causes a lot of cumbersome changes into
unit tests.

Hence, add a new API spdk_bdev_open_async() and move poller from
bdev_rpc.c to bdev.c. This simplifies the fix a lot.

Together with the previous patch, fixes the issue #3062.

Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: I1e131732d149c8742e9584b2c9e588f62a0c0c56
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/19358


Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@gmail.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent 8e2e3f3c
Loading
Loading
Loading
Loading
+48 −1
Original line number Diff line number Diff line
/*   SPDX-License-Identifier: BSD-3-Clause
 *   Copyright (C) 2016 Intel Corporation. All rights reserved.
 *   Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
 *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *   Copyright (c) 2021, 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 */

/** \file
@@ -380,6 +380,53 @@ struct spdk_bdev *spdk_bdev_next_leaf(struct spdk_bdev *prev);
int spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
		       void *event_ctx, struct spdk_bdev_desc **desc);

/**
 * Block device asynchronous open callback.
 *
 * \param desc Output parameter for the descriptor when operation is successful.
 * \param rc 0 if block device is opened successfully or negated errno if failed.
 * \param cb_arg Callback argument.
 */
typedef void (*spdk_bdev_open_async_cb_t)(struct spdk_bdev_desc *desc, int rc, void *cb_arg);

/**
 * Structure with optional asynchronous bdev open parameters.
 */
struct spdk_bdev_open_async_opts {
	/* Size of this structure in bytes. */
	size_t size;
	/*
	 * Time in milliseconds to wait for the block device to appear.
	 *
	 * When the block device does exist, wait until the block device appears or the timeout
	 * is expired if nonzero, or return immediately otherwise.
	 *
	 * Default value is zero and is used when options are omitted.
	 */
	uint64_t timeout_ms;
};
SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_open_async_opts) == 16, "Incorrect size");

/**
 * Open a block device for I/O operations asynchronously with options.
 *
 * \param bdev_name Block device name to open.
 * \param write true is read/write access requested, false if read-only
 * \param event_cb Notification callback to be called when the bdev triggers
 * asynchronous event such as bdev removal. This will always be called on the
 * same thread that spdk_bdev_open_async() was called on. In case of removal event
 * the descriptor will have to be manually closed to make the bdev unregister
 * proceed.
 * \param event_ctx param for event_cb.
 * \param opts Options for asynchronous block device open. If NULL, default values are used.
 * \param open_cb Open callback.
 * \param open_cb_arg Parameter for open_cb.
 * \return 0 if operation started successfully, suitable errno value otherwise
 */
int spdk_bdev_open_async(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
			 void *event_ctx, struct spdk_bdev_open_async_opts *opts,
			 spdk_bdev_open_async_cb_t open_cb, void *open_cb_arg);

/**
 * Close a previously opened block device.
 *
+224 −0
Original line number Diff line number Diff line
@@ -108,6 +108,8 @@ struct spdk_bdev_mgr {

	struct spdk_spinlock spinlock;

	TAILQ_HEAD(, spdk_bdev_open_async_ctx) async_bdev_opens;

#ifdef SPDK_CONFIG_VTUNE
	__itt_domain	*domain;
#endif
@@ -119,6 +121,7 @@ static struct spdk_bdev_mgr g_bdev_mgr = {
	.bdev_names = RB_INITIALIZER(g_bdev_mgr.bdev_names),
	.init_complete = false,
	.module_init_complete = false,
	.async_bdev_opens = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.async_bdev_opens),
};

static void
@@ -2338,6 +2341,8 @@ bdev_finish_wait_for_examine_done(void *cb_arg)
	bdev_module_fini_start_iter(NULL);
}

static void bdev_open_async_fini(void);

void
spdk_bdev_finish(spdk_bdev_fini_cb cb_fn, void *cb_arg)
{
@@ -2350,6 +2355,8 @@ spdk_bdev_finish(spdk_bdev_fini_cb cb_fn, void *cb_arg)
	g_fini_cb_fn = cb_fn;
	g_fini_cb_arg = cb_arg;

	bdev_open_async_fini();

	rc = spdk_bdev_wait_for_examine(bdev_finish_wait_for_examine_done, NULL);
	if (rc != 0) {
		SPDK_ERRLOG("wait_for_examine failed: %s\n", spdk_strerror(-rc));
@@ -7875,6 +7882,223 @@ spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event
	return rc;
}

struct spdk_bdev_open_async_ctx {
	char					*bdev_name;
	spdk_bdev_event_cb_t			event_cb;
	void					*event_ctx;
	bool					write;
	int					rc;
	spdk_bdev_open_async_cb_t		cb_fn;
	void					*cb_arg;
	struct spdk_bdev_desc			*desc;
	struct spdk_bdev_open_async_opts	opts;
	uint64_t				start_ticks;
	struct spdk_thread			*orig_thread;
	struct spdk_poller			*poller;
	TAILQ_ENTRY(spdk_bdev_open_async_ctx)	tailq;
};

static void
bdev_open_async_done(void *arg)
{
	struct spdk_bdev_open_async_ctx *ctx = arg;

	ctx->cb_fn(ctx->desc, ctx->rc, ctx->cb_arg);

	free(ctx->bdev_name);
	free(ctx);
}

static void
bdev_open_async_cancel(void *arg)
{
	struct spdk_bdev_open_async_ctx *ctx = arg;

	assert(ctx->rc == -ESHUTDOWN);

	spdk_poller_unregister(&ctx->poller);

	bdev_open_async_done(ctx);
}

/* This is called when the bdev library finishes at shutdown. */
static void
bdev_open_async_fini(void)
{
	struct spdk_bdev_open_async_ctx *ctx, *tmp_ctx;

	spdk_spin_lock(&g_bdev_mgr.spinlock);
	TAILQ_FOREACH_SAFE(ctx, &g_bdev_mgr.async_bdev_opens, tailq, tmp_ctx) {
		TAILQ_REMOVE(&g_bdev_mgr.async_bdev_opens, ctx, tailq);
		/*
		 * We have to move to ctx->orig_thread to unregister ctx->poller.
		 * However, there is a chance that ctx->poller is executed before
		 * message is executed, which could result in bdev_open_async_done()
		 * being called twice. To avoid such race condition, set ctx->rc to
		 * -ESHUTDOWN.
		 */
		ctx->rc = -ESHUTDOWN;
		spdk_thread_send_msg(ctx->orig_thread, bdev_open_async_cancel, ctx);
	}
	spdk_spin_unlock(&g_bdev_mgr.spinlock);
}

static int bdev_open_async(void *arg);

static void
_bdev_open_async(struct spdk_bdev_open_async_ctx *ctx)
{
	uint64_t timeout_ticks;

	if (ctx->rc == -ESHUTDOWN) {
		/* This context is being canceled. Do nothing. */
		return;
	}

	ctx->rc = bdev_open_ext(ctx->bdev_name, ctx->write, ctx->event_cb, ctx->event_ctx,
				&ctx->desc);
	if (ctx->rc == 0 || ctx->opts.timeout_ms == 0) {
		goto exit;
	}

	timeout_ticks = ctx->start_ticks + ctx->opts.timeout_ms * spdk_get_ticks_hz() / 1000ull;
	if (spdk_get_ticks() >= timeout_ticks) {
		SPDK_ERRLOG("Timed out while waiting for bdev '%s' to appear\n", ctx->bdev_name);
		ctx->rc = -ETIMEDOUT;
		goto exit;
	}

	return;

exit:
	spdk_poller_unregister(&ctx->poller);
	TAILQ_REMOVE(&g_bdev_mgr.async_bdev_opens, ctx, tailq);

	/* Completion callback is processed after stack unwinding. */
	spdk_thread_send_msg(ctx->orig_thread, bdev_open_async_done, ctx);
}

static int
bdev_open_async(void *arg)
{
	struct spdk_bdev_open_async_ctx *ctx = arg;

	spdk_spin_lock(&g_bdev_mgr.spinlock);

	_bdev_open_async(ctx);

	spdk_spin_unlock(&g_bdev_mgr.spinlock);

	return SPDK_POLLER_BUSY;
}

static void
bdev_open_async_opts_copy(struct spdk_bdev_open_async_opts *opts,
			  struct spdk_bdev_open_async_opts *opts_src,
			  size_t size)
{
	assert(opts);
	assert(opts_src);

	opts->size = size;

#define SET_FIELD(field) \
	if (offsetof(struct spdk_bdev_open_async_opts, field) + sizeof(opts->field) <= size) { \
		opts->field = opts_src->field; \
	} \

	SET_FIELD(timeout_ms);

	/* Do not remove this statement, you should always update this statement when you adding a new field,
	 * and do not forget to add the SET_FIELD statement for your added field. */
	SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_open_async_opts) == 16, "Incorrect size");

#undef SET_FIELD
}

static void
bdev_open_async_opts_get_default(struct spdk_bdev_open_async_opts *opts, size_t size)
{
	assert(opts);

	opts->size = size;

#define SET_FIELD(field, value) \
	if (offsetof(struct spdk_bdev_open_async_opts, field) + sizeof(opts->field) <= size) { \
		opts->field = value; \
	} \

	SET_FIELD(timeout_ms, 0);

#undef SET_FIELD
}

int
spdk_bdev_open_async(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
		     void *event_ctx, struct spdk_bdev_open_async_opts *opts,
		     spdk_bdev_open_async_cb_t open_cb, void *open_cb_arg)
{
	struct spdk_bdev_open_async_ctx *ctx;

	if (event_cb == NULL) {
		SPDK_ERRLOG("Missing event callback function\n");
		return -EINVAL;
	}

	if (open_cb == NULL) {
		SPDK_ERRLOG("Missing open callback function\n");
		return -EINVAL;
	}

	if (opts != NULL && opts->size == 0) {
		SPDK_ERRLOG("size in the options structure should not be zero\n");
		return -EINVAL;
	}

	ctx = calloc(1, sizeof(*ctx));
	if (ctx == NULL) {
		SPDK_ERRLOG("Failed to allocate open context\n");
		return -ENOMEM;
	}

	ctx->bdev_name = strdup(bdev_name);
	if (ctx->bdev_name == NULL) {
		SPDK_ERRLOG("Failed to duplicate bdev_name\n");
		free(ctx);
		return -ENOMEM;
	}

	ctx->poller = SPDK_POLLER_REGISTER(bdev_open_async, ctx, 100 * 1000);
	if (ctx->poller == NULL) {
		SPDK_ERRLOG("Failed to register bdev_open_async poller\n");
		free(ctx->bdev_name);
		free(ctx);
		return -ENOMEM;
	}

	ctx->cb_fn = open_cb;
	ctx->cb_arg = open_cb_arg;
	ctx->write = write;
	ctx->event_cb = event_cb;
	ctx->event_ctx = event_ctx;
	ctx->orig_thread = spdk_get_thread();
	ctx->start_ticks = spdk_get_ticks();

	bdev_open_async_opts_get_default(&ctx->opts, sizeof(ctx->opts));
	if (opts != NULL) {
		bdev_open_async_opts_copy(&ctx->opts, opts, opts->size);
	}

	spdk_spin_lock(&g_bdev_mgr.spinlock);

	TAILQ_INSERT_TAIL(&g_bdev_mgr.async_bdev_opens, ctx, tailq);
	_bdev_open_async(ctx);

	spdk_spin_unlock(&g_bdev_mgr.spinlock);

	return 0;
}

static void
bdev_close(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc)
{
+21 −59
Original line number Diff line number Diff line
@@ -795,37 +795,24 @@ static const struct spdk_json_object_decoder rpc_bdev_get_bdevs_decoders[] = {
	{"timeout", offsetof(struct rpc_bdev_get_bdevs, timeout), spdk_json_decode_uint64, true},
};

static int
get_bdevs_poller(void *_ctx)
static void
rpc_bdev_get_bdev_cb(struct spdk_bdev_desc *desc, int rc, void *cb_arg)
{
	struct rpc_bdev_get_bdevs_ctx *ctx = _ctx;
	struct spdk_jsonrpc_request *request = cb_arg;
	struct spdk_json_write_ctx *w;
	struct spdk_bdev_desc *desc;
	int rc;

	rc = spdk_bdev_open_ext(ctx->rpc.name, false, dummy_bdev_event_cb, NULL, &desc);
	if (rc != 0 && spdk_get_ticks() < ctx->timeout_ticks) {
		return SPDK_POLLER_BUSY;
	}
	if (rc == 0) {
		w = spdk_jsonrpc_begin_result(request);

	if (rc != 0) {
		SPDK_ERRLOG("Timed out while waiting for bdev '%s' to appear\n", ctx->rpc.name);
		spdk_jsonrpc_send_error_response(ctx->request, -ENODEV, spdk_strerror(ENODEV));
	} else {
		w = spdk_jsonrpc_begin_result(ctx->request);
		spdk_json_write_array_begin(w);
		rpc_dump_bdev_info(w, spdk_bdev_desc_get_bdev(desc));
		spdk_json_write_array_end(w);
		spdk_jsonrpc_end_result(ctx->request, w);
		spdk_jsonrpc_end_result(request, w);

		spdk_bdev_close(desc);
	} else {
		spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
	}

	spdk_poller_unregister(&ctx->poller);
	free_rpc_bdev_get_bdevs(&ctx->rpc);
	free(ctx);

	return SPDK_POLLER_BUSY;
}

static void
@@ -833,9 +820,8 @@ rpc_bdev_get_bdevs(struct spdk_jsonrpc_request *request,
		   const struct spdk_json_val *params)
{
	struct rpc_bdev_get_bdevs req = {};
	struct rpc_bdev_get_bdevs_ctx *ctx;
	struct spdk_bdev_open_async_opts opts = {};
	struct spdk_json_write_ctx *w;
	struct spdk_bdev_desc *desc = NULL;
	int rc;

	if (params && spdk_json_decode_object(params, rpc_bdev_get_bdevs_decoders,
@@ -849,50 +835,26 @@ rpc_bdev_get_bdevs(struct spdk_jsonrpc_request *request,
	}

	if (req.name) {
		rc = spdk_bdev_open_ext(req.name, false, dummy_bdev_event_cb, NULL, &desc);
		opts.size = sizeof(opts);
		opts.timeout_ms = req.timeout;

		rc = spdk_bdev_open_async(req.name, false, dummy_bdev_event_cb, NULL, &opts,
					  rpc_bdev_get_bdev_cb, request);
		if (rc != 0) {
			if (req.timeout == 0) {
				SPDK_ERRLOG("bdev '%s' does not exist\n", req.name);
				spdk_jsonrpc_send_error_response(request, -ENODEV, spdk_strerror(ENODEV));
				free_rpc_bdev_get_bdevs(&req);
				return;
			SPDK_ERRLOG("spdk_bdev_open_async failed for '%s': rc=%d\n", req.name, rc);
			spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
		}

			ctx = calloc(1, sizeof(*ctx));
			if (ctx == NULL) {
				SPDK_ERRLOG("Failed to allocate bdev_get_bdevs context\n");
				spdk_jsonrpc_send_error_response(request, -ENOMEM, spdk_strerror(ENOMEM));
		free_rpc_bdev_get_bdevs(&req);
		return;
	}

			ctx->poller = SPDK_POLLER_REGISTER(get_bdevs_poller, ctx, 10 * 1000);
			if (ctx->poller == NULL) {
				SPDK_ERRLOG("Failed to register bdev_get_bdevs poller\n");
				spdk_jsonrpc_send_error_response(request, -ENOMEM, spdk_strerror(ENOMEM));
	free_rpc_bdev_get_bdevs(&req);
				free(ctx);
				return;
			}

			memcpy(&ctx->rpc, &req, sizeof(req));
			ctx->timeout_ticks = spdk_get_ticks() + req.timeout *
					     spdk_get_ticks_hz() / 1000ull;
			ctx->request = request;
			return;
		}
	}

	free_rpc_bdev_get_bdevs(&req);
	w = spdk_jsonrpc_begin_result(request);
	spdk_json_write_array_begin(w);

	if (desc != NULL) {
		rpc_dump_bdev_info(w, spdk_bdev_desc_get_bdev(desc));
		spdk_bdev_close(desc);
	} else {
	spdk_for_each_bdev(w, rpc_dump_bdev_info);
	}

	spdk_json_write_array_end(w);

+1 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
	spdk_for_each_bdev;
	spdk_for_each_bdev_leaf;
	spdk_bdev_open_ext;
	spdk_bdev_open_async;
	spdk_bdev_close;
	spdk_bdev_desc_get_bdev;
	spdk_bdev_set_timeout;