Commit f01146ae authored by Jim Harris's avatar Jim Harris Committed by Tomasz Zawadzki
Browse files

blob: use uint64_t for unmap and write_zeroes lba count



Previous patches (5363eb3c) tried to work around the
32-bit unmap and write_zeroes LBA counts by breaking
up larger operations into smaller chunks of max size
UINT32_MAX lba chunks.

But some SSDs may just ignore unmap operations that
are not aligned to full physical block boundaries -
and a UINT32_MAX lba unmap on a 512B logical /
4KiB physical SSD would not be aligned.  If the SSD
decided to ignore the unmap/deallocate (which it is
allowed to do according to NVMe spec), we could end
up with not unmapping *any* blocks.  Probably SSDs
should always be trying hard to unmap as many
blocks as possible, but let's not try to depend on
that in blobstore.

So one option would be to break them into chunks
close to UINT32_MAX which are still aligned to
4KiB boundaries.  But the better fix is to just
change the unmap and write_zeroes APIs to take
64-bit arguments, and then we can avoid the
chunking altogether.

Fixes issue #2190.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I23998e493a764d466927c3520c7a8c7f943000a6
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9737


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarXiaodong Liu <xiaodong.liu@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: default avatarDong Yi <dongx.yi@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent 6dba57eb
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -184,11 +184,11 @@ struct spdk_bs_dev {
		      struct spdk_bs_dev_cb_args *cb_args);

	void (*write_zeroes)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
			     uint64_t lba, uint32_t lba_count,
			     uint64_t lba, uint64_t lba_count,
			     struct spdk_bs_dev_cb_args *cb_args);

	void (*unmap)(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
		      uint64_t lba, uint32_t lba_count,
		      uint64_t lba, uint64_t lba_count,
		      struct spdk_bs_dev_cb_args *cb_args);

	struct spdk_bdev *(*get_base_bdev)(struct spdk_bs_dev *dev);
+1 −1
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk

SO_VER := 5
SO_VER := 6
SO_MINOR := 0

C_SRCS = blobstore.c request.c zeroes.c blob_bs_dev.c
+2 −2
Original line number Diff line number Diff line
@@ -57,7 +57,7 @@ blob_bs_dev_writev(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,

static void
blob_bs_dev_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
			 uint64_t lba, uint32_t lba_count,
			 uint64_t lba, uint64_t lba_count,
			 struct spdk_bs_dev_cb_args *cb_args)
{
	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM);
@@ -66,7 +66,7 @@ blob_bs_dev_write_zeroes(struct spdk_bs_dev *dev, struct spdk_io_channel *channe

static void
blob_bs_dev_unmap(struct spdk_bs_dev *dev, struct spdk_io_channel *channel,
		  uint64_t lba, uint32_t lba_count,
		  uint64_t lba, uint64_t lba_count,
		  struct spdk_bs_dev_cb_args *cb_args)
{
	cb_args->cb_fn(cb_args->channel, cb_args->cb_arg, -EPERM);
+21 −24
Original line number Diff line number Diff line
@@ -1607,7 +1607,7 @@ struct spdk_blob_persist_ctx {

static void
bs_batch_clear_dev(struct spdk_blob_persist_ctx *ctx, spdk_bs_batch_t *batch, uint64_t lba,
		   uint32_t lba_count)
		   uint64_t lba_count)
{
	switch (ctx->blob->clear_method) {
	case BLOB_CLEAR_WITH_DEFAULT:
@@ -1715,7 +1715,7 @@ blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx
	struct spdk_blob_store		*bs = blob->bs;
	size_t				i;
	uint64_t                        lba;
	uint32_t                        lba_count;
	uint64_t                        lba_count;
	spdk_bs_batch_t                 *batch;

	batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx);
@@ -1787,7 +1787,7 @@ blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ct
	spdk_bs_batch_t			*batch;
	size_t				i;
	uint64_t			lba;
	uint32_t			lba_count;
	uint64_t			lba_count;

	/* Clusters don't move around in blobs. The list shrinks or grows
	 * at the end, but no changes ever occur in the middle of the list.
@@ -1800,7 +1800,7 @@ blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ct
	lba_count = 0;
	for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
		uint64_t next_lba = blob->active.clusters[i];
		uint32_t next_lba_count = bs_cluster_to_lba(bs, 1);
		uint64_t next_lba_count = bs_cluster_to_lba(bs, 1);

		if (next_lba > 0 && (lba + lba_count) == next_lba) {
			/* This cluster is contiguous with the previous one. */
@@ -1873,7 +1873,7 @@ blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
	struct spdk_blob		*blob = ctx->blob;
	struct spdk_blob_store		*bs = blob->bs;
	uint64_t			lba;
	uint32_t			lba_count;
	uint64_t			lba_count;
	spdk_bs_batch_t			*batch;
	size_t				i;

@@ -2499,7 +2499,7 @@ bs_allocate_and_copy_cluster(struct spdk_blob *blob,

static inline bool
blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
				 uint64_t *lba,	uint32_t *lba_count)
				 uint64_t *lba,	uint64_t *lba_count)
{
	*lba_count = length;

@@ -2624,7 +2624,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo
{
	struct spdk_bs_cpl cpl;
	uint64_t lba;
	uint32_t lba_count;
	uint64_t lba_count;
	bool is_allocated;

	assert(blob != NULL);
@@ -2894,7 +2894,7 @@ blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_chan
	 *  when the batch was completed, to allow for freeing the memory for the iov arrays.
	 */
	if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
		uint32_t lba_count;
		uint64_t lba_count;
		uint64_t lba;
		bool is_allocated;

@@ -4952,8 +4952,7 @@ spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
	bs_batch_write_zeroes_dev(batch, 0, num_md_lba);

	lba = num_md_lba;
	while (lba < ctx->bs->dev->blockcnt) {
		lba_count = spdk_min(UINT32_MAX, ctx->bs->dev->blockcnt - lba);
	lba_count = ctx->bs->dev->blockcnt - lba;
	switch (opts.clear_method) {
	case BS_CLEAR_WITH_UNMAP:
		/* Trim data clusters */
@@ -4967,8 +4966,6 @@ spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
	default:
		break;
	}
		lba += lba_count;
	}

	bs_batch_close(batch);
}
+6 −6
Original line number Diff line number Diff line
@@ -231,13 +231,13 @@ bs_sequence_writev_dev(spdk_bs_sequence_t *seq, struct iovec *iov, int iovcnt,

void
bs_sequence_write_zeroes_dev(spdk_bs_sequence_t *seq,
			     uint64_t lba, uint32_t lba_count,
			     uint64_t lba, uint64_t lba_count,
			     spdk_bs_sequence_cpl cb_fn, void *cb_arg)
{
	struct spdk_bs_request_set      *set = (struct spdk_bs_request_set *)seq;
	struct spdk_bs_channel       *channel = set->channel;

	SPDK_DEBUGLOG(blob_rw, "writing zeroes to %" PRIu32 " blocks at LBA %" PRIu64 "\n",
	SPDK_DEBUGLOG(blob_rw, "writing zeroes to %" PRIu64 " blocks at LBA %" PRIu64 "\n",
		      lba_count, lba);

	set->u.sequence.cb_fn = cb_fn;
@@ -360,12 +360,12 @@ bs_batch_write_dev(spdk_bs_batch_t *batch, void *payload,

void
bs_batch_unmap_dev(spdk_bs_batch_t *batch,
		   uint64_t lba, uint32_t lba_count)
		   uint64_t lba, uint64_t lba_count)
{
	struct spdk_bs_request_set	*set = (struct spdk_bs_request_set *)batch;
	struct spdk_bs_channel		*channel = set->channel;

	SPDK_DEBUGLOG(blob_rw, "Unmapping %" PRIu32 " blocks at LBA %" PRIu64 "\n", lba_count,
	SPDK_DEBUGLOG(blob_rw, "Unmapping %" PRIu64 " blocks at LBA %" PRIu64 "\n", lba_count,
		      lba);

	set->u.batch.outstanding_ops++;
@@ -375,12 +375,12 @@ bs_batch_unmap_dev(spdk_bs_batch_t *batch,

void
bs_batch_write_zeroes_dev(spdk_bs_batch_t *batch,
			  uint64_t lba, uint32_t lba_count)
			  uint64_t lba, uint64_t lba_count)
{
	struct spdk_bs_request_set	*set = (struct spdk_bs_request_set *)batch;
	struct spdk_bs_channel		*channel = set->channel;

	SPDK_DEBUGLOG(blob_rw, "Zeroing %" PRIu32 " blocks at LBA %" PRIu64 "\n", lba_count, lba);
	SPDK_DEBUGLOG(blob_rw, "Zeroing %" PRIu64 " blocks at LBA %" PRIu64 "\n", lba_count, lba);

	set->u.batch.outstanding_ops++;
	channel->dev->write_zeroes(channel->dev, channel->dev_channel, lba, lba_count,
Loading