Commit b47cee6c authored by Mike Gerdts's avatar Mike Gerdts Committed by Jim Harris
Browse files

blob: add IO channels for esnap clones



The channel passed to blob IO operations is useful for tracking
operations within the blobstore and the bs_dev that the blobstore
resides on. Esnap clone blobs perform reads from other bs_devs and
require per-thread, per-bs_dev channels.

This commit augments struct spdk_bs_channel with a tree containing
channels for the external snapshot bs_devs. The tree is indexed by blob
ID. These "esnap channels" are lazily created on the first read from an
external snapshot via each bs_channel. They are removed as bs_channels
are destroyed and blobs are closed.

Change-Id: I97aebe5a2f3584bfbf3a10ede8f3128448d30d6e
Signed-off-by: default avatarMike Gerdts <mgerdts@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/14974


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent d0516312
Loading
Loading
Loading
Loading
+12 −0
Original line number Diff line number Diff line
@@ -129,6 +129,11 @@ Channels are an SPDK-wide abstraction and with Blobstore the best way to think a
required in order to do IO.  The application will perform IO to the channel and channels are best thought of as being
associated 1:1 with a thread.

With external snapshots (see @ref blob_pg_esnap_and_esnap_clone), a read from a blob may lead to
reading from the device containing the blobstore or an external snapshot device. To support this,
each blobstore IO channel maintains a tree of channels to be used when reading from external
snapshot devices.

### Blob Identifiers

When an application creates a blob, it does not provide a name as is the case with many other similar
@@ -465,6 +470,13 @@ of IO. They are an internal construct only and are pre-allocated on a per channe
earlier). They are removed from a channel associated linked list when the set (sequence or batch) is started and
then returned to the list when completed.

Each request set maintains a reference to a `channel` and a `back_channel`. The `channel` is used
for performing IO on the blobstore device. The `back_channel` is used for performing IO on the
blob's back device, `blob->back_bs_dev`. For blobs that are not esnap clones, `channel` and
`back_channel` reference an IO channel used with the device that contains the blobstore.  For blobs
that are esnap clones, `channel` is the same as with any other blob and `back_channel` is an IO
channel for the external snapshot device.

### Key Internal Structures

`blobstore.h` contains many of the key structures for the internal workings of Blobstore. Only a few notable ones
+232 −5
Original line number Diff line number Diff line
@@ -40,6 +40,24 @@ static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool inte
static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
				   struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);

/*
 * External snapshots require a channel per thread per esnap bdev.  The tree
 * is populated lazily as blob IOs are handled by the back_bs_dev. When this
 * channel is destroyed, all the channels in the tree are destroyed.
 */

struct blob_esnap_channel {
	RB_ENTRY(blob_esnap_channel)	node;
	spdk_blob_id			blob_id;
	struct spdk_io_channel		*channel;
};

static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob,
		spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)

static inline bool
blob_is_esnap_clone(const struct spdk_blob *blob)
{
@@ -339,10 +357,33 @@ blob_free(struct spdk_blob *blob)
	free(blob);
}

static void
blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
{
	struct spdk_bs_dev	*bs_dev = ctx;

	if (bserrno != 0) {
		/*
		 * This is probably due to a memory allocation failure when creating the
		 * blob_esnap_destroy_ctx before iterating threads.
		 */
		SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
			    blob->id, bserrno);
		assert(false);
	}

	SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
	bs_dev->destroy(bs_dev);
}

static void
blob_back_bs_destroy(struct spdk_blob *blob)
{
	blob->back_bs_dev->destroy(blob->back_bs_dev);
	SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
		      blob->id);

	blob_esnap_destroy_bs_dev_channels(blob, blob_back_bs_destroy_esnap_done,
					   blob->back_bs_dev);
	blob->back_bs_dev = NULL;
}

@@ -2526,7 +2567,7 @@ blob_can_copy(struct spdk_blob *blob, uint32_t cluster_start_page, uint64_t *bas
{
	uint64_t lba = bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page);

	return (blob->bs->dev->copy != NULL) &&
	return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
	       blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
}

@@ -2862,7 +2903,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo
	case SPDK_BLOB_READ: {
		spdk_bs_batch_t *batch;

		batch = bs_batch_open(_ch, &cpl);
		batch = bs_batch_open(_ch, &cpl, blob);
		if (!batch) {
			cb_fn(cb_arg, -ENOMEM);
			return;
@@ -2890,7 +2931,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo
				return;
			}

			batch = bs_batch_open(_ch, &cpl);
			batch = bs_batch_open(_ch, &cpl, blob);
			if (!batch) {
				cb_fn(cb_arg, -ENOMEM);
				return;
@@ -2920,7 +2961,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo
	case SPDK_BLOB_UNMAP: {
		spdk_bs_batch_t *batch;

		batch = bs_batch_open(_ch, &cpl);
		batch = bs_batch_open(_ch, &cpl, blob);
		if (!batch) {
			cb_fn(cb_arg, -ENOMEM);
			return;
@@ -3287,6 +3328,7 @@ bs_channel_create(void *io_device, void *ctx_buf)

	TAILQ_INIT(&channel->need_cluster_alloc);
	TAILQ_INIT(&channel->queued_io);
	RB_INIT(&channel->esnap_channels);

	return 0;
}
@@ -3309,6 +3351,8 @@ bs_channel_destroy(void *io_device, void *ctx_buf)
		bs_user_op_abort(op, -EIO);
	}

	blob_esnap_destroy_bs_channel(channel);

	free(channel->req_mem);
	spdk_free(channel->new_cluster_page);
	channel->dev->destroy_channel(channel->dev, channel->dev_channel);
@@ -7788,6 +7832,24 @@ blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
	bs_sequence_finish(seq, bserrno);
}

static void
blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
{
	spdk_bs_sequence_t	*seq = cb_arg;

	if (bserrno != 0) {
		SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
			      blob->id, bserrno);
		bs_sequence_finish(seq, bserrno);
		return;
	}

	SPDK_DEBUGLOG(blob_esnap, "blob %" PRIx64 ": closed, syncing metadata\n", blob->id);

	/* Sync metadata */
	blob_persist(seq, blob, blob_close_cpl, blob);
}

void
spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
{
@@ -7813,6 +7875,11 @@ spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_ar
		return;
	}

	if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
		blob_esnap_destroy_bs_dev_channels(blob, blob_close_esnap_done, seq);
		return;
	}

	/* Sync metadata */
	blob_persist(seq, blob, blob_close_cpl, blob);
}
@@ -7827,6 +7894,7 @@ struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
void
spdk_bs_free_io_channel(struct spdk_io_channel *channel)
{
	blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
	spdk_put_io_channel(channel);
}

@@ -8574,5 +8642,164 @@ spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
	return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
}

struct spdk_io_channel *
blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
{
	struct spdk_bs_channel		*bs_channel = spdk_io_channel_get_ctx(ch);
	struct spdk_bs_dev		*bs_dev = blob->back_bs_dev;
	struct blob_esnap_channel	find = {};
	struct blob_esnap_channel	*esnap_channel, *existing;

	find.blob_id = blob->id;
	esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
	if (spdk_likely(esnap_channel != NULL)) {
		SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
			      blob->id, spdk_thread_get_name(spdk_get_thread()));
		return esnap_channel->channel;
	}

	SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
		      blob->id, spdk_thread_get_name(spdk_get_thread()));

	esnap_channel = calloc(1, sizeof(*esnap_channel));
	if (esnap_channel == NULL) {
		SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
			       find.blob_id);
		return NULL;
	}
	esnap_channel->channel = bs_dev->create_channel(bs_dev);
	if (esnap_channel->channel == NULL) {
		SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
		free(esnap_channel);
		return NULL;
	}
	esnap_channel->blob_id = find.blob_id;
	existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
	if (spdk_unlikely(existing != NULL)) {
		/*
		 * This should be unreachable: all modifications to this tree happen on this thread.
		 */
		SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
		assert(false);

		bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
		free(esnap_channel);

		return existing->channel;
	}

	return esnap_channel->channel;
}

static int
blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
{
	return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
}

struct blob_esnap_destroy_ctx {
	spdk_blob_op_with_handle_complete	cb_fn;
	void					*cb_arg;
	struct spdk_blob			*blob;
	struct spdk_bs_dev			*back_bs_dev;
};

static void
blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
{
	struct blob_esnap_destroy_ctx	*ctx = spdk_io_channel_iter_get_ctx(i);
	struct spdk_blob		*blob = ctx->blob;

	SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
		      blob->id);

	ctx->cb_fn(ctx->cb_arg, blob, status);
	free(ctx);
}

static void
blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
{
	struct blob_esnap_destroy_ctx	*ctx = spdk_io_channel_iter_get_ctx(i);
	struct spdk_blob		*blob = ctx->blob;
	struct spdk_bs_dev		*bs_dev = ctx->back_bs_dev;
	struct spdk_io_channel		*channel = spdk_io_channel_iter_get_channel(i);
	struct spdk_bs_channel		*bs_channel = spdk_io_channel_get_ctx(channel);
	struct blob_esnap_channel	*esnap_channel;
	struct blob_esnap_channel	find = {};

	assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));

	find.blob_id = blob->id;
	esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
	if (esnap_channel != NULL) {
		SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
			      blob->id, spdk_thread_get_name(spdk_get_thread()));
		RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
		bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
		free(esnap_channel);
	}

	spdk_for_each_channel_continue(i, 0);
}

/*
 * Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
 * used when closing an esnap clone blob and after decoupling from the parent.
 */
static void
blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, spdk_blob_op_with_handle_complete cb_fn,
				   void *cb_arg)
{
	struct blob_esnap_destroy_ctx	*ctx;

	if (!blob_is_esnap_clone(blob)) {
		cb_fn(cb_arg, blob, 0);
		return;
	}

	ctx = calloc(1, sizeof(*ctx));
	if (ctx == NULL) {
		cb_fn(cb_arg, blob, -ENOMEM);
		return;
	}
	ctx->cb_fn = cb_fn;
	ctx->cb_arg = cb_arg;
	ctx->blob = blob;
	ctx->back_bs_dev = blob->back_bs_dev;

	SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
		      blob->id);

	spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
			      blob_esnap_destroy_channels_done);
}

/*
 * Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
 * bs_channel is destroyed.
 */
static void
blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
{
	struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;

	assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));

	SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
		      spdk_thread_get_name(spdk_get_thread()));
	RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
			esnap_channel_tmp) {
		SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
			      ": destroying one channel in thread %s\n",
			      esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
		RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
		spdk_put_io_channel(esnap_channel->channel);
		free(esnap_channel);
	}
	SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
		      spdk_thread_get_name(spdk_get_thread()));
}

SPDK_LOG_REGISTER_COMPONENT(blob)
SPDK_LOG_REGISTER_COMPONENT(blob_esnap)
+4 −0
Original line number Diff line number Diff line
@@ -205,6 +205,8 @@ struct spdk_bs_channel {

	TAILQ_HEAD(, spdk_bs_request_set) need_cluster_alloc;
	TAILQ_HEAD(, spdk_bs_request_set) queued_io;

	RB_HEAD(blob_esnap_channel_tree, blob_esnap_channel) esnap_channels;
};

/** operation type */
@@ -420,6 +422,8 @@ SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_super_block) == 0x1000, "Invalid super

struct spdk_bs_dev *bs_create_zeroes_dev(void);
struct spdk_bs_dev *bs_create_blob_bs_dev(struct spdk_blob *blob);
struct spdk_io_channel *blob_esnap_get_io_channel(struct spdk_io_channel *ch,
		struct spdk_blob *blob);

/* Unit Conversions
 *
+33 −9
Original line number Diff line number Diff line
@@ -72,9 +72,9 @@ bs_sequence_completion(struct spdk_io_channel *channel, void *cb_arg, int bserrn
	set->u.sequence.cb_fn((spdk_bs_sequence_t *)set, set->u.sequence.cb_arg, bserrno);
}

static spdk_bs_sequence_t *
bs_sequence_start(struct spdk_io_channel *_channel,
		  struct spdk_bs_cpl *cpl)
static inline spdk_bs_sequence_t *
bs_sequence_start(struct spdk_io_channel *_channel, struct spdk_bs_cpl *cpl,
		  struct spdk_io_channel *back_channel)
{
	struct spdk_bs_channel		*channel;
	struct spdk_bs_request_set	*set;
@@ -90,7 +90,7 @@ bs_sequence_start(struct spdk_io_channel *_channel,
	set->cpl = *cpl;
	set->bserrno = 0;
	set->channel = channel;
	set->back_channel = _channel;
	set->back_channel = back_channel;

	set->cb_args.cb_fn = bs_sequence_completion;
	set->cb_args.cb_arg = set;
@@ -104,7 +104,7 @@ bs_sequence_start(struct spdk_io_channel *_channel,
spdk_bs_sequence_t *
bs_sequence_start_bs(struct spdk_io_channel *_channel, struct spdk_bs_cpl *cpl)
{
	return bs_sequence_start(_channel, cpl);
	return bs_sequence_start(_channel, cpl, _channel);
}

/* Use when performing IO on a blob. */
@@ -112,7 +112,24 @@ spdk_bs_sequence_t *
bs_sequence_start_blob(struct spdk_io_channel *_channel, struct spdk_bs_cpl *cpl,
		       struct spdk_blob *blob)
{
	return bs_sequence_start(_channel, cpl);
	struct spdk_io_channel	*esnap_ch = _channel;

	if (spdk_blob_is_esnap_clone(blob)) {
		esnap_ch = blob_esnap_get_io_channel(_channel, blob);
		if (esnap_ch == NULL) {
			/*
			 * The most likely reason we are here is because of some logic error
			 * elsewhere that caused channel allocations to fail. We could get here due
			 * to being out of memory as well. If we are out of memory, the process is
			 * this will be just one of many problems that this process will be having.
			 * Killing it off debug builds now due to logic errors is the right thing to
			 * do and killing it off due to ENOMEM is no big loss.
			 */
			assert(false);
			return NULL;
		}
	}
	return bs_sequence_start(_channel, cpl, esnap_ch);
}

void
@@ -308,11 +325,18 @@ bs_batch_completion(struct spdk_io_channel *_channel,
}

spdk_bs_batch_t *
bs_batch_open(struct spdk_io_channel *_channel,
	      struct spdk_bs_cpl *cpl)
bs_batch_open(struct spdk_io_channel *_channel, struct spdk_bs_cpl *cpl, struct spdk_blob *blob)
{
	struct spdk_bs_channel		*channel;
	struct spdk_bs_request_set	*set;
	struct spdk_io_channel		*back_channel = _channel;

	if (spdk_blob_is_esnap_clone(blob)) {
		back_channel = blob_esnap_get_io_channel(_channel, blob);
		if (back_channel == NULL) {
			return NULL;
		}
	}

	channel = spdk_io_channel_get_ctx(_channel);
	assert(channel != NULL);
@@ -325,7 +349,7 @@ bs_batch_open(struct spdk_io_channel *_channel,
	set->cpl = *cpl;
	set->bserrno = 0;
	set->channel = channel;
	set->back_channel = _channel;
	set->back_channel = back_channel;

	set->u.batch.cb_fn = NULL;
	set->u.batch.cb_arg = NULL;
+6 −3
Original line number Diff line number Diff line
@@ -91,8 +91,8 @@ struct spdk_bs_request_set {
	 */
	struct spdk_bs_channel		*channel;
	/*
	 * The channel used by the blobstore to perform IO on back_bs_dev.
	 * For now, back_channel == spdk_io_channel_get_ctx(set->channel).
	 * The channel used by the blobstore to perform IO on back_bs_dev. Unless the blob
	 * is an esnap clone, back_channel == spdk_io_channel_get_ctx(set->channel).
	 */
	struct spdk_io_channel		*back_channel;

@@ -135,6 +135,9 @@ spdk_bs_sequence_t *bs_sequence_start_bs(struct spdk_io_channel *channel,
spdk_bs_sequence_t *bs_sequence_start_blob(struct spdk_io_channel *channel,
		struct spdk_bs_cpl *cpl, struct spdk_blob *blob);

spdk_bs_sequence_t *bs_sequence_start_esnap(struct spdk_io_channel *channel,
		struct spdk_bs_cpl *cpl, struct spdk_blob *blob);

void bs_sequence_read_bs_dev(spdk_bs_sequence_t *seq, struct spdk_bs_dev *bs_dev,
			     void *payload, uint64_t lba, uint32_t lba_count,
			     spdk_bs_sequence_cpl cb_fn, void *cb_arg);
@@ -172,7 +175,7 @@ void bs_sequence_finish(spdk_bs_sequence_t *seq, int bserrno);
void bs_user_op_sequence_finish(void *cb_arg, int bserrno);

spdk_bs_batch_t *bs_batch_open(struct spdk_io_channel *channel,
			       struct spdk_bs_cpl *cpl);
			       struct spdk_bs_cpl *cpl, struct spdk_blob *blob);

void bs_batch_read_bs_dev(spdk_bs_batch_t *batch, struct spdk_bs_dev *bs_dev,
			  void *payload, uint64_t lba, uint32_t lba_count);
Loading