Commit 4132ac52 authored by Maciej Szwed's avatar Maciej Szwed Committed by Jim Harris
Browse files

blob: support for thin provisioned reads and writes



Signed-off-by: default avatarMaciej Szwed <maciej.szwed@intel.com>
Change-Id: Ibc9609ad36188006e9454e5c799bccd8a92d7991
Reviewed-on: https://review.gerrithub.io/391422


Tested-by: default avatarSPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarDaniel Verkamp <daniel.verkamp@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 8970f868
Loading
Loading
Loading
Loading
+313 −36
Original line number Diff line number Diff line
@@ -89,7 +89,7 @@ _spdk_blob_insert_cluster(struct spdk_blob_data *blob, uint32_t cluster_num, uin

static int
_spdk_bs_allocate_cluster(struct spdk_blob_data *blob, uint32_t cluster_num,
			  uint64_t *lowest_free_cluster)
			  uint64_t *lowest_free_cluster, bool update_map)
{
	*lowest_free_cluster = spdk_bit_array_find_first_clear(blob->bs->used_clusters,
			       *lowest_free_cluster);
@@ -100,7 +100,10 @@ _spdk_bs_allocate_cluster(struct spdk_blob_data *blob, uint32_t cluster_num,

	SPDK_DEBUGLOG(SPDK_LOG_BLOB, "Claiming cluster %lu for blob %lu\n", *lowest_free_cluster, blob->id);
	_spdk_bs_claim_cluster(blob->bs, *lowest_free_cluster);

	if (update_map) {
		_spdk_blob_insert_cluster(blob, cluster_num, *lowest_free_cluster);
	}

	return 0;
}
@@ -741,6 +744,10 @@ _spdk_blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
		return;
	}

	if (spdk_blob_is_thin_provisioned(blob) == true) {
		blob->back_bs_dev = spdk_bs_create_zeroes_dev();
	}

	_spdk_blob_mark_clean(blob);

	ctx->cb_fn(seq, ctx->cb_arg, rc);
@@ -1103,7 +1110,7 @@ _spdk_resize_blob(struct spdk_blob_data *blob, uint64_t sz)
	if (spdk_blob_is_thin_provisioned(blob) == false) {
		lfc = 0;
		for (i = num_clusters; i < sz; i++) {
			_spdk_bs_allocate_cluster(blob, i, &lfc);
			_spdk_bs_allocate_cluster(blob, i, &lfc, true);
			lfc++;
		}
	}
@@ -1210,6 +1217,190 @@ _spdk_blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob_data *blob,
	_spdk_blob_persist_write_page_chain(seq, ctx, 0);
}

struct spdk_blob_copy_cluster_ctx {
	struct spdk_blob_data *blob;
	uint8_t *buf;
	uint64_t page;
	uint64_t new_cluster;
	spdk_bs_sequence_t *seq;
};

static void
_spdk_blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
{
	struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
	struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
	TAILQ_HEAD(, spdk_bs_request_set) requests;
	spdk_bs_user_op_t *op;

	TAILQ_INIT(&requests);
	TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);

	while (!TAILQ_EMPTY(&requests)) {
		op = TAILQ_FIRST(&requests);
		TAILQ_REMOVE(&requests, op, link);
		if (bserrno == 0) {
			spdk_bs_user_op_execute(op);
		} else {
			spdk_bs_user_op_abort(op);
		}
	}

	spdk_dma_free(ctx->buf);
	free(ctx);
}

static void
_spdk_blob_insert_cluster_cpl(void *cb_arg, int bserrno)
{
	struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;

	if (bserrno) {
		uint32_t cluster_number;

		if (bserrno == -EEXIST) {
			/* The metadata insert failed because another thread
			 * allocated the cluster first. Free our cluster
			 * but continue without error. */
			bserrno = 0;
		}

		cluster_number = _spdk_bs_page_to_cluster(ctx->blob->bs, ctx->page);
		_spdk_bs_release_cluster(ctx->blob->bs, cluster_number);
	}

	spdk_bs_sequence_finish(ctx->seq, bserrno);
}

static void
_spdk_blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
	struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
	uint32_t cluster_number;

	if (bserrno) {
		/* The write failed, so jump to the final completion handler */
		spdk_bs_sequence_finish(seq, bserrno);
		return;
	}

	cluster_number = _spdk_bs_page_to_cluster(ctx->blob->bs, ctx->page);

	_spdk_blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
					       _spdk_blob_insert_cluster_cpl, ctx);
}

static void
_spdk_blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
{
	struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;

	if (bserrno != 0) {
		/* The read failed, so jump to the final completion handler */
		spdk_bs_sequence_finish(seq, bserrno);
		return;
	}

	/* Write whole cluster */
	spdk_bs_sequence_write_dev(seq, ctx->buf,
				   _spdk_bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
				   _spdk_bs_cluster_to_lba(ctx->blob->bs, 1),
				   _spdk_blob_write_copy_cpl, ctx);
}

static void
_spdk_bs_allocate_and_copy_cluster(struct spdk_blob_data *blob,
				   struct spdk_io_channel *_ch,
				   uint64_t offset, spdk_bs_user_op_t *op)
{
	struct spdk_bs_cpl cpl;
	struct spdk_bs_channel *ch;
	struct spdk_blob_copy_cluster_ctx *ctx;
	uint32_t cluster_start_page;
	uint32_t cluster_number;
	int rc;

	ch = spdk_io_channel_get_ctx(_ch);

	if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
		/* There are already operations pending. Queue this user op
		 * and return because it will be re-executed when the outstanding
		 * cluster allocation completes. */
		TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
		return;
	}

	/* Round the page offset down to the first page in the cluster */
	cluster_start_page = _spdk_bs_page_to_cluster_start(blob, offset);

	/* Calculate which index in the metadata cluster array the corresponding
	 * cluster is supposed to be at. */
	cluster_number = _spdk_bs_page_to_cluster(blob->bs, cluster_start_page);

	ctx = calloc(1, sizeof(*ctx));
	if (!ctx) {
		spdk_bs_user_op_abort(op);
		return;
	}

	assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);

	ctx->blob = blob;
	ctx->page = cluster_start_page;

	ctx->buf = spdk_dma_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen, NULL);
	if (!ctx->buf) {
		free(ctx);
		spdk_bs_user_op_abort(op);
		return;
	}

	rc = _spdk_bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, false);
	if (rc != 0) {
		spdk_dma_free(ctx->buf);
		free(ctx);
		spdk_bs_user_op_abort(op);
		return;
	}

	cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
	cpl.u.blob_basic.cb_fn = _spdk_blob_allocate_and_copy_cluster_cpl;
	cpl.u.blob_basic.cb_arg = ctx;

	ctx->seq = spdk_bs_sequence_start(_ch, &cpl);
	if (!ctx->seq) {
		_spdk_bs_release_cluster(blob->bs, ctx->new_cluster);
		spdk_dma_free(ctx->buf);
		free(ctx);
		spdk_bs_user_op_abort(op);
		return;
	}

	/* Queue the user op to block other incoming operations */
	TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);

	/* Read cluster from backing device */
	spdk_bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
				     _spdk_bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
				     _spdk_bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
				     _spdk_blob_write_copy, ctx);
}

static void
_spdk_blob_calculate_lba_and_lba_count(struct spdk_blob_data *blob, uint64_t page, uint64_t length,
				       uint64_t *lba,	uint32_t *lba_count)
{
	*lba_count = _spdk_bs_page_to_lba(blob->bs, length);

	if (!_spdk_bs_page_is_allocated(blob, page)) {
		assert(blob->back_bs_dev != NULL);
		*lba = _spdk_bs_dev_page_to_lba(blob->back_bs_dev, page);
		*lba_count = _spdk_bs_blob_lba_to_back_dev_lba(blob, *lba_count);
	} else {
		*lba = _spdk_bs_blob_page_to_lba(blob, page);
	}
}

static void
_spdk_blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *_blob,
				   void *payload, uint64_t offset, uint64_t length,
@@ -1272,7 +1463,6 @@ _spdk_blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blo
				    spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
{
	struct spdk_blob_data *blob = __blob_to_data(_blob);
	spdk_bs_batch_t *batch;
	struct spdk_bs_cpl cpl;
	uint64_t lba;
	uint32_t lba_count;
@@ -1283,35 +1473,84 @@ _spdk_blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blo
	cpl.u.blob_basic.cb_fn = cb_fn;
	cpl.u.blob_basic.cb_arg = cb_arg;

	_spdk_blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);

	switch (op_type) {
	case SPDK_BLOB_READ: {
		spdk_bs_batch_t *batch;

		batch = spdk_bs_batch_open(_ch, &cpl);
		if (!batch) {
			cb_fn(cb_arg, -ENOMEM);
			return;
		}

	lba = _spdk_bs_blob_page_to_lba(blob, offset);
	lba_count = _spdk_bs_page_to_lba(blob->bs, length);

	switch (op_type) {
	case SPDK_BLOB_READ:
		if (_spdk_bs_page_is_allocated(blob, offset)) {
			/* Read from the blob */
			spdk_bs_batch_read_dev(batch, payload, lba, lba_count);
		} else {
			/* Read from the backing block device */
			spdk_bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
		}

		spdk_bs_batch_close(batch);
		break;
	}
	case SPDK_BLOB_WRITE:
	case SPDK_BLOB_WRITE_ZEROES: {
		if (_spdk_bs_page_is_allocated(blob, offset)) {
			/* Write to the blob */
			spdk_bs_batch_t *batch;

			batch = spdk_bs_batch_open(_ch, &cpl);
			if (!batch) {
				cb_fn(cb_arg, -ENOMEM);
				return;
			}

			if (op_type == SPDK_BLOB_WRITE) {
				spdk_bs_batch_write_dev(batch, payload, lba, lba_count);
			} else {
				spdk_bs_batch_write_zeroes_dev(batch, lba, lba_count);
			}

			spdk_bs_batch_close(batch);
		} else {
			/* Queue this operation and allocate the cluster */
			spdk_bs_user_op_t *op;

			op = spdk_bs_user_op_alloc(_ch, &cpl, op_type, _blob, payload, 0, offset, length);
			if (!op) {
				cb_fn(cb_arg, -ENOMEM);
				return;
			}

			_spdk_bs_allocate_and_copy_cluster(blob, _ch, offset, op);
		}
		break;
	case SPDK_BLOB_UNMAP:
	}
	case SPDK_BLOB_UNMAP: {
		spdk_bs_batch_t *batch;

		batch = spdk_bs_batch_open(_ch, &cpl);
		if (!batch) {
			cb_fn(cb_arg, -ENOMEM);
			return;
		}

		if (_spdk_bs_page_is_allocated(blob, offset)) {
			spdk_bs_batch_unmap_dev(batch, lba, lba_count);
		}

		spdk_bs_batch_close(batch);
		break;
	case SPDK_BLOB_WRITE_ZEROES:
		spdk_bs_batch_write_zeroes_dev(batch, lba, lba_count);
		break;
	}
	case SPDK_BLOB_READV:
	case SPDK_BLOB_WRITEV:
		SPDK_ERRLOG("readv/write not valid for %s\n", __func__);
		SPDK_ERRLOG("readv/write not valid\n");
		cb_fn(cb_arg, -EINVAL);
		break;
	}

	spdk_bs_batch_close(batch);
}

static void
@@ -1439,7 +1678,6 @@ _spdk_blob_request_submit_rw_iov(struct spdk_blob *_blob, struct spdk_io_channel
				 spdk_blob_op_complete cb_fn, void *cb_arg, bool read)
{
	struct spdk_blob_data	*blob = __blob_to_data(_blob);
	spdk_bs_sequence_t		*seq;
	struct spdk_bs_cpl	cpl;

	assert(blob != NULL);
@@ -1474,23 +1712,53 @@ _spdk_blob_request_submit_rw_iov(struct spdk_blob *_blob, struct spdk_io_channel
	 *  when the batch was completed, to allow for freeing the memory for the iov arrays.
	 */
	if (spdk_likely(length <= _spdk_bs_num_pages_to_cluster_boundary(blob, offset))) {
		uint64_t lba = _spdk_bs_blob_page_to_lba(blob, offset);
		uint32_t lba_count = _spdk_bs_page_to_lba(blob->bs, length);
		uint32_t lba_count;
		uint64_t lba;

		_spdk_blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);

		cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
		cpl.u.blob_basic.cb_fn = cb_fn;
		cpl.u.blob_basic.cb_arg = cb_arg;

		if (read) {
			spdk_bs_sequence_t *seq;

			seq = spdk_bs_sequence_start(_channel, &cpl);
			if (!seq) {
				cb_fn(cb_arg, -ENOMEM);
				return;
			}

		if (read) {
			if (_spdk_bs_page_is_allocated(blob, offset)) {
				spdk_bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, _spdk_rw_iov_done, NULL);
			} else {
				spdk_bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
							      _spdk_rw_iov_done, NULL);
			}
		} else {
			if (_spdk_bs_page_is_allocated(blob, offset)) {
				spdk_bs_sequence_t *seq;

				seq = spdk_bs_sequence_start(_channel, &cpl);
				if (!seq) {
					cb_fn(cb_arg, -ENOMEM);
					return;
				}

				spdk_bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, _spdk_rw_iov_done, NULL);
			} else {
				/* Queue this operation and allocate the cluster */
				spdk_bs_user_op_t *op;

				op = spdk_bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, _blob, iov, iovcnt, offset, length);
				if (!op) {
					cb_fn(cb_arg, -ENOMEM);
					return;
				}

				_spdk_bs_allocate_and_copy_cluster(blob, _channel, offset, op);
			}
		}
	} else {
		struct rw_iov_ctx *ctx;
@@ -1562,6 +1830,8 @@ _spdk_bs_channel_create(void *io_device, void *ctx_buf)
		return -1;
	}

	TAILQ_INIT(&channel->need_cluster_alloc);

	return 0;
}

@@ -1569,6 +1839,13 @@ static void
_spdk_bs_channel_destroy(void *io_device, void *ctx_buf)
{
	struct spdk_bs_channel *channel = ctx_buf;
	spdk_bs_user_op_t *op;

	while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
		op = TAILQ_FIRST(&channel->need_cluster_alloc);
		TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
		spdk_bs_user_op_abort(op);
	}

	free(channel->req_mem);
	channel->dev->destroy_channel(channel->dev, channel->dev_channel);
+74 −0
Original line number Diff line number Diff line
@@ -133,6 +133,8 @@ struct spdk_blob_data {
	uint64_t	data_ro_flags;
	uint64_t	md_ro_flags;

	struct spdk_bs_dev *back_bs_dev;

	/* TODO: The xattrs are mutable, but we don't want to be
	 * copying them unecessarily. Figure this out.
	 */
@@ -182,6 +184,8 @@ struct spdk_bs_channel {

	struct spdk_bs_dev		*dev;
	struct spdk_io_channel		*dev_channel;

	TAILQ_HEAD(, spdk_bs_request_set) need_cluster_alloc;
};

/** operation type */
@@ -352,6 +356,14 @@ _spdk_bs_byte_to_lba(struct spdk_blob_store *bs, uint64_t length)
	return length / bs->dev->blocklen;
}

static inline uint64_t
_spdk_bs_dev_byte_to_lba(struct spdk_bs_dev *bs_dev, uint64_t length)
{
	assert(length % bs_dev->blocklen == 0);

	return length / bs_dev->blocklen;
}

static inline uint64_t
_spdk_bs_lba_to_byte(struct spdk_blob_store *bs, uint64_t lba)
{
@@ -364,6 +376,12 @@ _spdk_bs_page_to_lba(struct spdk_blob_store *bs, uint64_t page)
	return page * SPDK_BS_PAGE_SIZE / bs->dev->blocklen;
}

static inline uint64_t
_spdk_bs_dev_page_to_lba(struct spdk_bs_dev *bs_dev, uint64_t page)
{
	return page * SPDK_BS_PAGE_SIZE / bs_dev->blocklen;
}

static inline uint32_t
_spdk_bs_lba_to_page(struct spdk_blob_store *bs, uint64_t lba)
{
@@ -376,6 +394,18 @@ _spdk_bs_lba_to_page(struct spdk_blob_store *bs, uint64_t lba)
	return lba / lbas_per_page;
}

static inline uint64_t
_spdk_bs_dev_lba_to_page(struct spdk_bs_dev *bs_dev, uint64_t lba)
{
	uint64_t	lbas_per_page;

	lbas_per_page = SPDK_BS_PAGE_SIZE / bs_dev->blocklen;

	assert(lba % lbas_per_page == 0);

	return lba / lbas_per_page;
}

static inline uint64_t
_spdk_bs_cluster_to_page(struct spdk_blob_store *bs, uint32_t cluster)
{
@@ -404,6 +434,18 @@ _spdk_bs_lba_to_cluster(struct spdk_blob_store *bs, uint64_t lba)
	return lba / (bs->cluster_sz / bs->dev->blocklen);
}

static inline uint64_t
_spdk_bs_blob_lba_to_back_dev_lba(struct spdk_blob_data *blob, uint64_t lba)
{
	return lba * blob->bs->dev->blocklen / blob->back_bs_dev->blocklen;
}

static inline uint64_t
_spdk_bs_blob_lba_from_back_dev_lba(struct spdk_blob_data *blob, uint64_t lba)
{
	return lba * blob->back_bs_dev->blocklen / blob->bs->dev->blocklen;
}

/* End basic conversions */

static inline uint32_t
@@ -454,4 +496,36 @@ _spdk_bs_num_pages_to_cluster_boundary(struct spdk_blob_data *blob, uint32_t pag
	return pages_per_cluster - (page % pages_per_cluster);
}

/* Given a page offset into a blob, look up the number of pages into blob to beginning of current cluster */
static inline uint32_t
_spdk_bs_page_to_cluster_start(struct spdk_blob_data *blob, uint32_t page)
{
	uint32_t	pages_per_cluster;

	pages_per_cluster = blob->bs->pages_per_cluster;

	return page - (page % pages_per_cluster);
}

/* Given a page offset into a blob, look up if it is from allocated cluster. */
static inline bool
_spdk_bs_page_is_allocated(struct spdk_blob_data *blob, uint32_t page)
{
	uint64_t	lba;
	uint32_t	pages_per_cluster;

	pages_per_cluster = blob->bs->pages_per_cluster;

	assert(page < blob->active.num_clusters * pages_per_cluster);

	lba = blob->active.clusters[page / pages_per_cluster];

	if (lba == 0) {
		assert(spdk_blob_is_thin_provisioned(blob));
		return false;
	} else {
		return true;
	}
}

#endif
+49 −0
Original line number Diff line number Diff line
@@ -256,6 +256,14 @@ spdk_bs_sequence_finish(spdk_bs_sequence_t *seq, int bserrno)
	spdk_bs_request_set_complete((struct spdk_bs_request_set *)seq);
}

void
spdk_bs_user_op_sequence_finish(void *cb_arg, int bserrno)
{
	spdk_bs_sequence_t *seq = cb_arg;

	spdk_bs_sequence_finish(seq, bserrno);
}

static void
spdk_bs_batch_completion(struct spdk_io_channel *_channel,
			 void *cb_arg, int bserrno)
@@ -438,6 +446,14 @@ spdk_bs_batch_write_zeroes_blob(spdk_bs_batch_t *batch, struct spdk_blob *blob,
				     spdk_bs_batch_blob_op_complete, set);
}

void
spdk_bs_batch_set_errno(spdk_bs_batch_t *batch, int bserrno)
{
	struct spdk_bs_request_set	*set = (struct spdk_bs_request_set *)batch;

	set->bserrno = bserrno;
}

void
spdk_bs_batch_close(spdk_bs_batch_t *batch)
{
@@ -470,6 +486,25 @@ spdk_bs_sequence_to_batch(spdk_bs_sequence_t *seq, spdk_bs_sequence_cpl cb_fn, v
	return set;
}

spdk_bs_sequence_t *
spdk_bs_batch_to_sequence(spdk_bs_batch_t *batch)
{
	struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)batch;

	set->u.batch.outstanding_ops++;

	set->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
	set->cpl.u.blob_basic.cb_fn = spdk_bs_sequence_to_batch_completion;
	set->cpl.u.blob_basic.cb_arg = set;
	set->bserrno = 0;

	set->cb_args.cb_fn = spdk_bs_sequence_completion;
	set->cb_args.cb_arg = set;
	set->cb_args.channel = set->channel->dev_channel;

	return (spdk_bs_sequence_t *)set;
}

spdk_bs_user_op_t *
spdk_bs_user_op_alloc(struct spdk_io_channel *_channel, struct spdk_bs_cpl *cpl,
		      enum spdk_blob_op_type op_type, struct spdk_blob *blob,
@@ -555,4 +590,18 @@ spdk_bs_user_op_abort(spdk_bs_user_op_t *op)
	TAILQ_INSERT_TAIL(&set->channel->reqs, set, link);
}

void
spdk_bs_sequence_to_batch_completion(void *cb_arg, int bserrno)
{
	struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)cb_arg;

	set->u.batch.outstanding_ops--;

	if (set->u.batch.outstanding_ops == 0 && set->u.batch.batch_closed) {
		if (set->cb_args.cb_fn) {
			set->cb_args.cb_fn(set->cb_args.channel, set->cb_args.cb_arg, bserrno);
		}
	}
}

SPDK_LOG_REGISTER_COMPONENT("blob_rw", SPDK_LOG_BLOB_RW)
+8 −0
Original line number Diff line number Diff line
@@ -182,6 +182,8 @@ void spdk_bs_sequence_write_zeroes_dev(spdk_bs_sequence_t *seq,

void spdk_bs_sequence_finish(spdk_bs_sequence_t *seq, int bserrno);

void spdk_bs_user_op_sequence_finish(void *cb_arg, int bserrno);

spdk_bs_batch_t *spdk_bs_batch_open(struct spdk_io_channel *channel,
				    struct spdk_bs_cpl *cpl);

@@ -212,12 +214,16 @@ void spdk_bs_batch_unmap_blob(spdk_bs_batch_t *batch, struct spdk_blob *blob,
void spdk_bs_batch_write_zeroes_blob(spdk_bs_batch_t *batch, struct spdk_blob *blob,
				     uint64_t offset, uint64_t length);

void spdk_bs_batch_set_errno(spdk_bs_batch_t *batch, int bserrno);

void spdk_bs_batch_close(spdk_bs_batch_t *batch);

spdk_bs_batch_t *spdk_bs_sequence_to_batch(spdk_bs_sequence_t *seq,
		spdk_bs_sequence_cpl cb_fn,
		void *cb_arg);

spdk_bs_sequence_t *spdk_bs_batch_to_sequence(spdk_bs_batch_t *batch);

spdk_bs_user_op_t *spdk_bs_user_op_alloc(struct spdk_io_channel *channel, struct spdk_bs_cpl *cpl,
		enum spdk_blob_op_type op_type, struct spdk_blob *blob,
		void *payload, int iovcnt, uint64_t offset, uint64_t length);
@@ -226,4 +232,6 @@ void spdk_bs_user_op_execute(spdk_bs_user_op_t *op);

void spdk_bs_user_op_abort(spdk_bs_user_op_t *op);

void spdk_bs_sequence_to_batch_completion(void *cb_arg, int bserrno);

#endif
+199 −2
Original line number Diff line number Diff line
@@ -40,6 +40,7 @@
#include "../bs_dev_common.c"
#include "blobstore.c"
#include "request.c"
#include "zeroes.c"

struct spdk_blob_store *g_bs;
spdk_blob_id g_blobid;
@@ -2763,7 +2764,7 @@ blob_insert_cluster_msg(void)

	spdk_bs_open_blob(g_bs, blobid, blob_op_with_handle_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(g_blob != NULL);
	SPDK_CU_ASSERT_FATAL(g_blob != NULL);
	blob = g_blob;
	blob_data = __blob_to_data(blob);

@@ -2780,6 +2781,200 @@ blob_insert_cluster_msg(void)
	g_bs = NULL;
}

static void
blob_thin_prov_rw(void)
{
	static const uint8_t zero[10 * 4096] = { 0 };
	struct spdk_blob_store *bs;
	struct spdk_bs_dev *dev;
	struct spdk_blob *blob;
	struct spdk_blob_data *blob_data;
	struct spdk_io_channel *channel;
	struct spdk_blob_opts 	opts;
	spdk_blob_id blobid;
	uint64_t free_clusters;
	uint8_t payload_read[10 * 4096];
	uint8_t payload_write[10 * 4096];
	int rc;

	dev = init_dev();

	spdk_bs_init(dev, NULL, bs_op_with_handle_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	SPDK_CU_ASSERT_FATAL(g_bs != NULL);
	bs = g_bs;
	free_clusters = spdk_bs_free_cluster_count(bs);

	channel = spdk_bs_alloc_io_channel(bs);
	CU_ASSERT(channel != NULL);

	spdk_blob_opts_init(&opts);
	opts.thin_provision = true;

	spdk_bs_create_blob_ext(bs, &opts, blob_op_with_id_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
	CU_ASSERT(free_clusters == spdk_bs_free_cluster_count(bs));
	blobid = g_blobid;

	spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	SPDK_CU_ASSERT_FATAL(g_blob != NULL);
	blob = g_blob;
	blob_data = __blob_to_data(blob);

	CU_ASSERT(blob_data->active.num_clusters == 0);

	/* The blob started at 0 clusters. Resize it to be 5, but still unallocated. */
	rc = spdk_blob_resize(blob, 5);
	CU_ASSERT(rc == 0);
	CU_ASSERT(free_clusters == spdk_bs_free_cluster_count(bs));
	CU_ASSERT(blob_data->active.num_clusters == 5);

	spdk_blob_sync_md(blob, blob_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	/* Sync must not change anything */
	CU_ASSERT(free_clusters == spdk_bs_free_cluster_count(bs));
	CU_ASSERT(blob_data->active.num_clusters == 5);

	/* Payload should be all zeros from unallocated clusters */
	memset(payload_read, 0xFF, sizeof(payload_read));
	spdk_bs_io_read_blob(blob, channel, payload_read, 4, 10, blob_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(memcmp(zero, payload_read, 10 * 4096) == 0);

	memset(payload_write, 0xE5, sizeof(payload_write));
	spdk_bs_io_write_blob(blob, channel, payload_write, 4, 10, blob_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(free_clusters != spdk_bs_free_cluster_count(bs));

	spdk_bs_io_read_blob(blob, channel, payload_read, 4, 10, blob_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(memcmp(payload_write, payload_read, 10 * 4096) == 0);

	spdk_blob_close(blob, blob_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);

	spdk_bs_delete_blob(bs, blobid, blob_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(free_clusters == spdk_bs_free_cluster_count(bs));

	spdk_bs_free_io_channel(channel);

	/* Unload the blob store */
	spdk_bs_unload(g_bs, bs_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	g_bs = NULL;
	g_blob = NULL;
	g_blobid = 0;
}

static void
blob_thin_prov_rw_iov(void)
{
	static const uint8_t zero[10 * 4096] = { 0 };
	struct spdk_blob_store *bs;
	struct spdk_bs_dev *dev;
	struct spdk_blob *blob;
	struct spdk_blob_data *blob_data;
	struct spdk_io_channel *channel;
	struct spdk_blob_opts 	opts;
	spdk_blob_id blobid;
	uint64_t free_clusters;
	uint8_t payload_read[10 * 4096];
	uint8_t payload_write[10 * 4096];
	struct iovec iov_read[3];
	struct iovec iov_write[3];

	int rc;

	dev = init_dev();

	spdk_bs_init(dev, NULL, bs_op_with_handle_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	SPDK_CU_ASSERT_FATAL(g_bs != NULL);
	bs = g_bs;
	free_clusters = spdk_bs_free_cluster_count(bs);

	channel = spdk_bs_alloc_io_channel(bs);
	CU_ASSERT(channel != NULL);

	spdk_blob_opts_init(&opts);
	opts.thin_provision = true;

	spdk_bs_create_blob_ext(bs, &opts, blob_op_with_id_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(g_blobid != SPDK_BLOBID_INVALID);
	CU_ASSERT(free_clusters == spdk_bs_free_cluster_count(bs));
	blobid = g_blobid;

	spdk_bs_open_blob(bs, blobid, blob_op_with_handle_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	SPDK_CU_ASSERT_FATAL(g_blob != NULL);
	blob = g_blob;
	blob_data = __blob_to_data(blob);

	CU_ASSERT(blob_data->active.num_clusters == 0);

	/* The blob started at 0 clusters. Resize it to be 5, but still unallocated. */
	rc = spdk_blob_resize(blob, 5);
	CU_ASSERT(rc == 0);
	CU_ASSERT(free_clusters == spdk_bs_free_cluster_count(bs));
	CU_ASSERT(blob_data->active.num_clusters == 5);

	spdk_blob_sync_md(blob, blob_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	/* Sync must not change anything */
	CU_ASSERT(free_clusters == spdk_bs_free_cluster_count(bs));
	CU_ASSERT(blob_data->active.num_clusters == 5);

	/* Payload should be all zeros from unallocated clusters */
	memset(payload_read, 0xAA, sizeof(payload_read));
	iov_read[0].iov_base = payload_read;
	iov_read[0].iov_len = 3 * 4096;
	iov_read[1].iov_base = payload_read + 3 * 4096;
	iov_read[1].iov_len = 4 * 4096;
	iov_read[2].iov_base = payload_read + 7 * 4096;
	iov_read[2].iov_len = 3 * 4096;
	spdk_bs_io_readv_blob(blob, channel, iov_read, 3, 250, 10, blob_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(memcmp(zero, payload_read, 10 * 4096) == 0);

	memset(payload_write, 0xE5, sizeof(payload_write));
	iov_write[0].iov_base = payload_write;
	iov_write[0].iov_len = 1 * 4096;
	iov_write[1].iov_base = payload_write + 1 * 4096;
	iov_write[1].iov_len = 5 * 4096;
	iov_write[2].iov_base = payload_write + 6 * 4096;
	iov_write[2].iov_len = 4 * 4096;

	spdk_bs_io_writev_blob(blob, channel, iov_write, 3, 250, 10, blob_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);

	memset(payload_read, 0xAA, sizeof(payload_read));
	iov_read[0].iov_base = payload_read;
	iov_read[0].iov_len = 3 * 4096;
	iov_read[1].iov_base = payload_read + 3 * 4096;
	iov_read[1].iov_len = 4 * 4096;
	iov_read[2].iov_base = payload_read + 7 * 4096;
	iov_read[2].iov_len = 3 * 4096;
	spdk_bs_io_readv_blob(blob, channel, iov_read, 3, 250, 10, blob_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	CU_ASSERT(memcmp(payload_write, payload_read, 10 * 4096) == 0);

	spdk_blob_close(blob, blob_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);

	spdk_bs_free_io_channel(channel);

	/* Unload the blob store */
	spdk_bs_unload(g_bs, bs_op_complete, NULL);
	CU_ASSERT(g_bserrno == 0);
	g_bs = NULL;
	g_blob = NULL;
	g_blobid = 0;
}

int main(int argc, char **argv)
{
	CU_pSuite	suite = NULL;
@@ -2829,7 +3024,9 @@ int main(int argc, char **argv)
		CU_add_test(suite, "bs_version", bs_version) == NULL ||
		CU_add_test(suite, "blob_set_xattrs", blob_set_xattrs) == NULL ||
		CU_add_test(suite, "blob_thin_prov_alloc", blob_thin_prov_alloc) == NULL ||
		CU_add_test(suite, "blob_insert_cluster_msg", blob_insert_cluster_msg) == NULL
		CU_add_test(suite, "blob_insert_cluster_msg", blob_insert_cluster_msg) == NULL ||
		CU_add_test(suite, "blob_thin_prov_rw", blob_thin_prov_rw) == NULL ||
		CU_add_test(suite, "blob_thin_prov_rw_iov", blob_thin_prov_rw_iov) == NULL
	) {
		CU_cleanup_registry();
		return CU_get_error();