Commit 1eca87c3 authored by Alexey Marchuk's avatar Alexey Marchuk Committed by Tomasz Zawadzki
Browse files

blobstore: Preallocate md_page for new cluster



When a new cluster is added to a thin provisioned blob,
md_page is allocated to update extents in base dev
This memory allocation reduces perfromance, it can
take 250usec - 1 msec on ARM platform.

Since we may have only 1 outstainding cluster
allocation per io_channel, we can preallcoate md_page
on each channel and remove dynamic memory allocation.

With this change blob_write_extent_page() expects
that md_page is given by the caller. Sicne this function
is also used during snapshot deletion, this patch also
updates this process. Now we allocate a single page
and reuse it for each extent in the snapshot.

Signed-off-by: default avatarAlexey Marchuk <alexeymar@mellanox.com>
Change-Id: I815a4c8c69bd38d8eff4f45c088e5d05215b9e57
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12129


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
parent 3b06c975
Loading
Loading
Loading
Loading
+40 −20
Original line number Diff line number Diff line
@@ -56,7 +56,8 @@ static int bs_register_md_thread(struct spdk_blob_store *bs);
static int bs_unregister_md_thread(struct spdk_blob_store *bs);
static void blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
static void blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
		uint64_t cluster, uint32_t extent, spdk_blob_op_complete cb_fn, void *cb_arg);
		uint64_t cluster, uint32_t extent, struct spdk_blob_md_page *page,
		spdk_blob_op_complete cb_fn, void *cb_arg);

static int blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
			  uint16_t value_len, bool internal);
@@ -65,7 +66,7 @@ static int blob_get_xattr_value(struct spdk_blob *blob, const char *name,
static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);

static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
				   spdk_blob_op_complete cb_fn, void *cb_arg);
				   struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);

static int
blob_id_cmp(struct spdk_blob *blob1, struct spdk_blob *blob2)
@@ -2330,6 +2331,7 @@ struct spdk_blob_copy_cluster_ctx {
	uint64_t new_cluster;
	uint32_t new_extent_page;
	spdk_bs_sequence_t *seq;
	struct spdk_blob_md_page *new_cluster_page;
};

static void
@@ -2395,7 +2397,7 @@ blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
	cluster_number = bs_page_to_cluster(ctx->blob->bs, ctx->page);

	blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
					 ctx->new_extent_page, blob_insert_cluster_cpl, ctx);
					 ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
}

static void
@@ -2455,6 +2457,8 @@ bs_allocate_and_copy_cluster(struct spdk_blob *blob,

	ctx->blob = blob;
	ctx->page = cluster_start_page;
	ctx->new_cluster_page = ch->new_cluster_page;
	memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE);

	if (blob->parent_id != SPDK_BLOBID_INVALID) {
		ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
@@ -2505,7 +2509,7 @@ bs_allocate_and_copy_cluster(struct spdk_blob *blob,
					blob_write_copy, ctx);
	} else {
		blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
						 ctx->new_extent_page, blob_insert_cluster_cpl, ctx);
						 ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
	}
}

@@ -3135,6 +3139,15 @@ bs_channel_create(void *io_device, void *ctx_buf)
		return -1;
	}

	channel->new_cluster_page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY,
				    SPDK_MALLOC_DMA);
	if (!channel->new_cluster_page) {
		SPDK_ERRLOG("Failed to allocate new cluster page\n");
		free(channel->req_mem);
		channel->dev->destroy_channel(channel->dev, channel->dev_channel);
		return -1;
	}

	TAILQ_INIT(&channel->need_cluster_alloc);
	TAILQ_INIT(&channel->queued_io);

@@ -3160,6 +3173,7 @@ bs_channel_destroy(void *io_device, void *ctx_buf)
	}

	free(channel->req_mem);
	spdk_free(channel->new_cluster_page);
	channel->dev->destroy_channel(channel->dev, channel->dev_channel);
}

@@ -6635,6 +6649,7 @@ bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
struct delete_snapshot_ctx {
	struct spdk_blob_list *parent_snapshot_entry;
	struct spdk_blob *snapshot;
	struct spdk_blob_md_page *page;
	bool snapshot_md_ro;
	struct spdk_blob *clone;
	bool clone_md_ro;
@@ -6660,6 +6675,7 @@ delete_blob_cleanup_finish(void *cb_arg, int bserrno)
	}

	ctx->cb_fn(ctx->cb_arg, ctx->snapshot, ctx->bserrno);
	spdk_free(ctx->page);
	free(ctx);
}

@@ -6859,8 +6875,9 @@ delete_snapshot_update_extent_pages(void *cb_arg, int bserrno)
		/* Clone and snapshot both contain partially filled matching extent pages.
		 * Update the clone extent page in place with cluster map containing the mix of both. */
		ctx->next_extent_page = i + 1;
		memset(ctx->page, 0, SPDK_BS_PAGE_SIZE);

		blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP,
		blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP, ctx->page,
				       delete_snapshot_update_extent_pages, ctx);
		return;
	}
@@ -7105,6 +7122,12 @@ bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
	RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);

	if (update_clone) {
		ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
		if (!ctx->page) {
			ctx->bserrno = -ENOMEM;
			spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
			return;
		}
		/* This blob is a snapshot with active clone - update clone first */
		update_clone_on_snapshot_deletion(blob, ctx);
	} else {
@@ -7338,6 +7361,7 @@ struct spdk_blob_insert_cluster_ctx {
	uint32_t		cluster_num;	/* cluster index in blob */
	uint32_t		cluster;	/* cluster on disk */
	uint32_t		extent_page;	/* extent page on disk */
	struct spdk_blob_md_page *page; /* preallocated extent page */
	int			rc;
	spdk_blob_op_complete	cb_fn;
	void			*cb_arg;
@@ -7376,21 +7400,15 @@ blob_insert_new_ep_cb(void *arg, int bserrno)
static void
blob_persist_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
	struct spdk_blob_md_page        *page = cb_arg;

	bs_sequence_finish(seq, bserrno);
	spdk_free(page);
}

static void
blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
		       spdk_blob_op_complete cb_fn, void *cb_arg)
		       struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
{
	spdk_bs_sequence_t		*seq;
	struct spdk_bs_cpl		cpl;
	struct spdk_blob_md_page	*page = NULL;
	uint32_t			page_count = 0;
	int				rc;

	cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
	cpl.u.blob_basic.cb_fn = cb_fn;
@@ -7401,11 +7419,11 @@ blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster
		cb_fn(cb_arg, -ENOMEM);
		return;
	}
	rc = blob_serialize_add_page(blob, &page, &page_count, &page);
	if (rc < 0) {
		bs_sequence_finish(seq, rc);
		return;
	}

	assert(page);
	page->next = SPDK_INVALID_MD_PAGE;
	page->id = blob->id;
	page->sequence_num = 0;

	blob_serialize_extent_page(blob, cluster_num, page);

@@ -7443,7 +7461,7 @@ blob_insert_cluster_msg(void *arg)
		 * It was already claimed in the used_md_pages map and placed in ctx. */
		assert(ctx->extent_page != 0);
		assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
		blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num,
		blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
				       blob_insert_new_ep_cb, ctx);
	} else {
		/* It is possible for original thread to allocate extent page for
@@ -7456,14 +7474,15 @@ blob_insert_cluster_msg(void *arg)
		}
		/* Extent page already allocated.
		 * Every cluster allocation, requires just an update of single extent page. */
		blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num,
		blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
				       blob_insert_cluster_msg_cb, ctx);
	}
}

static void
blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
				 uint64_t cluster, uint32_t extent_page, spdk_blob_op_complete cb_fn, void *cb_arg)
				 uint64_t cluster, uint32_t extent_page, struct spdk_blob_md_page *page,
				 spdk_blob_op_complete cb_fn, void *cb_arg)
{
	struct spdk_blob_insert_cluster_ctx *ctx;

@@ -7478,6 +7497,7 @@ blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
	ctx->cluster_num = cluster_num;
	ctx->cluster = cluster;
	ctx->extent_page = extent_page;
	ctx->page = page;
	ctx->cb_fn = cb_fn;
	ctx->cb_arg = cb_arg;

+4 −0
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
 *
 *   Copyright (c) Intel Corporation.
 *   All rights reserved.
 *   Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
@@ -223,6 +224,9 @@ struct spdk_bs_channel {
	struct spdk_bs_dev		*dev;
	struct spdk_io_channel		*dev_channel;

	/* This page is only used during insert of a new cluster. */
	struct spdk_blob_md_page	*new_cluster_page;

	TAILQ_HEAD(, spdk_bs_request_set) need_cluster_alloc;
	TAILQ_HEAD(, spdk_bs_request_set) queued_io;
};
+2 −1
Original line number Diff line number Diff line
@@ -3827,6 +3827,7 @@ blob_insert_cluster_msg_test(void)
	struct spdk_blob_store *bs = g_bs;
	struct spdk_blob *blob;
	struct spdk_blob_opts opts;
	struct spdk_blob_md_page page = {};
	spdk_blob_id blobid;
	uint64_t free_clusters;
	uint64_t new_cluster = 0;
@@ -3854,7 +3855,7 @@ blob_insert_cluster_msg_test(void)
	bs_allocate_cluster(blob, cluster_num, &new_cluster, &extent_page, false);
	CU_ASSERT(blob->active.clusters[cluster_num] == 0);

	blob_insert_cluster_on_md_thread(blob, cluster_num, new_cluster, extent_page,
	blob_insert_cluster_on_md_thread(blob, cluster_num, new_cluster, extent_page, &page,
					 blob_op_complete, NULL);
	poll_threads();