Commit db3d1201 authored by Sochin Jiang's avatar Sochin Jiang Committed by Tomasz Zawadzki
Browse files

lib/blob: fix a data corruption bug



There is a fatal bug that could easily cause data corruption when using
thin-provisioned blobs. In blob_request_submit_rw_iov(), we first get
lba by calling blob_calculate_lba_and_lba_count(),
blob_calculate_lba_and_lba_count() calculates different lbas according to
the return of bs_io_unit_is_allocated(). Later, we call bs_io_unit_is_allocated()
again to judge whether the specific cluster is allocated, the problem is it may
have be allocated here while not be allocated when calling blob_calculate_lba_and_lba_count()
before. To ensure the correctness of lba, we can do lba recalculation when
bs_io_unit_is_allocated() returns true, or make
blob_calculate_lba_and_lba_count() return the result of
bs_io_unit_is_allocated(), use the second solution in this patch.

By configuring more than one cpu core, md thread will run in a separate
SPDK thread, this data corruption scenario could be easily reproduced
by running fio verify in VMs using thin-provisioned Lvols as block
devices.

Signed-off-by: default avatarSochin Jiang <jiangxiaoqing.sochin@bytedance.com>
Change-Id: I099865ff291ea42d5d49b693cc53f64b60881684
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3318


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
parent 0d3cc15a
Loading
Loading
Loading
Loading
+12 −8
Original line number Diff line number Diff line
@@ -2354,7 +2354,7 @@ bs_allocate_and_copy_cluster(struct spdk_blob *blob,
	}
}

static inline void
static inline bool
blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
				 uint64_t *lba,	uint32_t *lba_count)
{
@@ -2364,8 +2364,10 @@ blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint6
		assert(blob->back_bs_dev != NULL);
		*lba = bs_io_unit_to_back_dev_lba(blob, io_unit);
		*lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count);
		return false;
	} else {
		*lba = bs_blob_io_unit_to_lba(blob, io_unit);
		return true;
	}
}

@@ -2480,6 +2482,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo
	struct spdk_bs_cpl cpl;
	uint64_t lba;
	uint32_t lba_count;
	bool is_allocated;

	assert(blob != NULL);

@@ -2487,7 +2490,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo
	cpl.u.blob_basic.cb_fn = cb_fn;
	cpl.u.blob_basic.cb_arg = cb_arg;

	blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
	is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);

	if (blob->frozen_refcnt) {
		/* This blob I/O is frozen */
@@ -2515,7 +2518,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo
			return;
		}

		if (bs_io_unit_is_allocated(blob, offset)) {
		if (is_allocated) {
			/* Read from the blob */
			bs_batch_read_dev(batch, payload, lba, lba_count);
		} else {
@@ -2528,7 +2531,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo
	}
	case SPDK_BLOB_WRITE:
	case SPDK_BLOB_WRITE_ZEROES: {
		if (bs_io_unit_is_allocated(blob, offset)) {
		if (is_allocated) {
			/* Write to the blob */
			spdk_bs_batch_t *batch;

@@ -2573,7 +2576,7 @@ blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blo
			return;
		}

		if (bs_io_unit_is_allocated(blob, offset)) {
		if (is_allocated) {
			bs_batch_unmap_dev(batch, lba, lba_count);
		}

@@ -2745,6 +2748,7 @@ blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_chan
	if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
		uint32_t lba_count;
		uint64_t lba;
		bool is_allocated;

		cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
		cpl.u.blob_basic.cb_fn = cb_fn;
@@ -2768,7 +2772,7 @@ blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_chan
			return;
		}

		blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
		is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);

		if (read) {
			spdk_bs_sequence_t *seq;
@@ -2779,14 +2783,14 @@ blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_chan
				return;
			}

			if (bs_io_unit_is_allocated(blob, offset)) {
			if (is_allocated) {
				bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
			} else {
				bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
							 rw_iov_done, NULL);
			}
		} else {
			if (bs_io_unit_is_allocated(blob, offset)) {
			if (is_allocated) {
				spdk_bs_sequence_t *seq;

				seq = bs_sequence_start(_channel, &cpl);