Commit 3deaf005 authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Changpeng Liu
Browse files

bdev: Fix wrong IO split when child iovs run out



When we ran out of child_iov space, ensure the iovs to be aligned
with block size. However the calculation was wrong.

(to_next_boundary_bytes % blocklen) meant not to_last_block but to_next_block.
So calculate to_last_block_size by reducing to_last_block_size from blocklen.

The data was collected when the issue occured. So add unit test
by using the data.

    Fixes #979

Reported-by: default avatarGeoffrey McRae <geoff@hostfission.com>

Signed-off-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Change-Id: I62a50bada450288ea7c60aec0e557c2a53cd8916
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/470806


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarPaul Luse <paul.e.luse@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent b0400403
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -1696,7 +1696,8 @@ _spdk_bdev_io_split(void *_bdev_io)
				uint32_t child_iovpos = child_iovcnt - 1;
				/* don't decrease child_iovcnt so the loop will naturally end */

				to_next_boundary_bytes += _to_next_boundary(to_next_boundary_bytes, blocklen);
				to_last_block_bytes = blocklen - to_last_block_bytes;
				to_next_boundary_bytes += to_last_block_bytes;
				while (to_last_block_bytes > 0 && iovcnt > 0) {
					iov_len = spdk_min(to_last_block_bytes,
							   bdev_io->child_iov[child_iovpos].iov_len);
+178 −0
Original line number Diff line number Diff line
@@ -1259,6 +1259,184 @@ bdev_io_split(void)
	CU_ASSERT(g_io_done == true);
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);

	/* Test multi vector command that needs to be split due to the IO boundary and
	 * the capacity of child iovs. Especially test the case when the command is
	 * split due to the capacity of child iovs, the tail address is not aligned with
	 * block size and is rewinded to the aligned address.
	 *
	 * The iovecs used in read request is complex but is based on the data
	 * collected in the real issue. We change the base addresses but keep the lengths
	 * not to loose the credibility of the test.
	 */
	bdev->optimal_io_boundary = 128;
	g_io_done = false;
	g_io_status = 0;

	for (i = 0; i < 31; i++) {
		iov[i].iov_base = (void *)(0xFEED0000000 + (i << 20));
		iov[i].iov_len = 1024;
	}
	iov[31].iov_base = (void *)0xFEED1F00000;
	iov[31].iov_len = 32768;
	iov[32].iov_base = (void *)0xFEED2000000;
	iov[32].iov_len = 160;
	iov[33].iov_base = (void *)0xFEED2100000;
	iov[33].iov_len = 4096;
	iov[34].iov_base = (void *)0xFEED2200000;
	iov[34].iov_len = 4096;
	iov[35].iov_base = (void *)0xFEED2300000;
	iov[35].iov_len = 4096;
	iov[36].iov_base = (void *)0xFEED2400000;
	iov[36].iov_len = 4096;
	iov[37].iov_base = (void *)0xFEED2500000;
	iov[37].iov_len = 4096;
	iov[38].iov_base = (void *)0xFEED2600000;
	iov[38].iov_len = 4096;
	iov[39].iov_base = (void *)0xFEED2700000;
	iov[39].iov_len = 4096;
	iov[40].iov_base = (void *)0xFEED2800000;
	iov[40].iov_len = 4096;
	iov[41].iov_base = (void *)0xFEED2900000;
	iov[41].iov_len = 4096;
	iov[42].iov_base = (void *)0xFEED2A00000;
	iov[42].iov_len = 4096;
	iov[43].iov_base = (void *)0xFEED2B00000;
	iov[43].iov_len = 12288;
	iov[44].iov_base = (void *)0xFEED2C00000;
	iov[44].iov_len = 8192;
	iov[45].iov_base = (void *)0xFEED2F00000;
	iov[45].iov_len = 4096;
	iov[46].iov_base = (void *)0xFEED3000000;
	iov[46].iov_len = 4096;
	iov[47].iov_base = (void *)0xFEED3100000;
	iov[47].iov_len = 4096;
	iov[48].iov_base = (void *)0xFEED3200000;
	iov[48].iov_len = 24576;
	iov[49].iov_base = (void *)0xFEED3300000;
	iov[49].iov_len = 16384;
	iov[50].iov_base = (void *)0xFEED3400000;
	iov[50].iov_len = 12288;
	iov[51].iov_base = (void *)0xFEED3500000;
	iov[51].iov_len = 4096;
	iov[52].iov_base = (void *)0xFEED3600000;
	iov[52].iov_len = 4096;
	iov[53].iov_base = (void *)0xFEED3700000;
	iov[53].iov_len = 4096;
	iov[54].iov_base = (void *)0xFEED3800000;
	iov[54].iov_len = 28672;
	iov[55].iov_base = (void *)0xFEED3900000;
	iov[55].iov_len = 20480;
	iov[56].iov_base = (void *)0xFEED3A00000;
	iov[56].iov_len = 4096;
	iov[57].iov_base = (void *)0xFEED3B00000;
	iov[57].iov_len = 12288;
	iov[58].iov_base = (void *)0xFEED3C00000;
	iov[58].iov_len = 4096;
	iov[59].iov_base = (void *)0xFEED3D00000;
	iov[59].iov_len = 4096;
	iov[60].iov_base = (void *)0xFEED3E00000;
	iov[60].iov_len = 352;

	/* The 1st child IO must be from iov[0] to iov[31] split by the capacity
	 * of child iovs,
	 */
	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 0, 126, 32);
	for (i = 0; i < 32; i++) {
		ut_expected_io_set_iov(expected_io, i, iov[i].iov_base, iov[i].iov_len);
	}
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	/* The 2nd child IO must be from iov[32] to the first 864 bytes of iov[33]
	 * split by the IO boundary requirement.
	 */
	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 126, 2, 2);
	ut_expected_io_set_iov(expected_io, 0, iov[32].iov_base, iov[32].iov_len);
	ut_expected_io_set_iov(expected_io, 1, iov[33].iov_base, 864);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	/* The 3rd child IO must be from the remaining 3232 bytes of iov[33] to
	 * the first 864 bytes of iov[46] split by the IO boundary requirement.
	 */
	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 128, 128, 14);
	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[33].iov_base + 864),
			       iov[33].iov_len - 864);
	ut_expected_io_set_iov(expected_io, 1, iov[34].iov_base, iov[34].iov_len);
	ut_expected_io_set_iov(expected_io, 2, iov[35].iov_base, iov[35].iov_len);
	ut_expected_io_set_iov(expected_io, 3, iov[36].iov_base, iov[36].iov_len);
	ut_expected_io_set_iov(expected_io, 4, iov[37].iov_base, iov[37].iov_len);
	ut_expected_io_set_iov(expected_io, 5, iov[38].iov_base, iov[38].iov_len);
	ut_expected_io_set_iov(expected_io, 6, iov[39].iov_base, iov[39].iov_len);
	ut_expected_io_set_iov(expected_io, 7, iov[40].iov_base, iov[40].iov_len);
	ut_expected_io_set_iov(expected_io, 8, iov[41].iov_base, iov[41].iov_len);
	ut_expected_io_set_iov(expected_io, 9, iov[42].iov_base, iov[42].iov_len);
	ut_expected_io_set_iov(expected_io, 10, iov[43].iov_base, iov[43].iov_len);
	ut_expected_io_set_iov(expected_io, 11, iov[44].iov_base, iov[44].iov_len);
	ut_expected_io_set_iov(expected_io, 12, iov[45].iov_base, iov[45].iov_len);
	ut_expected_io_set_iov(expected_io, 13, iov[46].iov_base, 864);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	/* The 4th child IO must be from the remaining 3232 bytes of iov[46] to the
	 * first 864 bytes of iov[52] split by the IO boundary requirement.
	 */
	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 256, 128, 7);
	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[46].iov_base + 864),
			       iov[46].iov_len - 864);
	ut_expected_io_set_iov(expected_io, 1, iov[47].iov_base, iov[47].iov_len);
	ut_expected_io_set_iov(expected_io, 2, iov[48].iov_base, iov[48].iov_len);
	ut_expected_io_set_iov(expected_io, 3, iov[49].iov_base, iov[49].iov_len);
	ut_expected_io_set_iov(expected_io, 4, iov[50].iov_base, iov[50].iov_len);
	ut_expected_io_set_iov(expected_io, 5, iov[51].iov_base, iov[51].iov_len);
	ut_expected_io_set_iov(expected_io, 6, iov[52].iov_base, 864);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	/* The 5th child IO must be from the remaining 3232 bytes of iov[52] to
	 * the first 4096 bytes of iov[57] split by the IO boundary requirement.
	 */
	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 384, 128, 6);
	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[52].iov_base + 864),
			       iov[52].iov_len - 864);
	ut_expected_io_set_iov(expected_io, 1, iov[53].iov_base, iov[53].iov_len);
	ut_expected_io_set_iov(expected_io, 2, iov[54].iov_base, iov[54].iov_len);
	ut_expected_io_set_iov(expected_io, 3, iov[55].iov_base, iov[55].iov_len);
	ut_expected_io_set_iov(expected_io, 4, iov[56].iov_base, iov[56].iov_len);
	ut_expected_io_set_iov(expected_io, 5, iov[57].iov_base, 4960);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	/* The 6th child IO must be from the remaining 7328 bytes of iov[57]
	 * to the first 3936 bytes of iov[58] split by the capacity of child iovs.
	 */
	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 512, 30, 3);
	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[57].iov_base + 4960),
			       iov[57].iov_len - 4960);
	ut_expected_io_set_iov(expected_io, 1, iov[58].iov_base, iov[58].iov_len);
	ut_expected_io_set_iov(expected_io, 2, iov[59].iov_base, 3936);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	/* The 7th child IO is from the remaining 160 bytes of iov[59] and iov[60]. */
	expected_io = ut_alloc_expected_io(SPDK_BDEV_IO_TYPE_READ, 542, 1, 2);
	ut_expected_io_set_iov(expected_io, 0, (void *)((uintptr_t)iov[59].iov_base + 3936),
			       iov[59].iov_len - 3936);
	ut_expected_io_set_iov(expected_io, 1, iov[60].iov_base, iov[60].iov_len);
	TAILQ_INSERT_TAIL(&g_bdev_ut_channel->expected_io, expected_io, link);

	rc = spdk_bdev_readv_blocks(desc, io_ch, iov, 61, 0, 543, io_done, NULL);
	CU_ASSERT(rc == 0);
	CU_ASSERT(g_io_done == false);

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
	stub_complete_io(1);
	CU_ASSERT(g_io_done == false);

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 5);
	stub_complete_io(5);
	CU_ASSERT(g_io_done == false);

	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 1);
	stub_complete_io(1);
	CU_ASSERT(g_io_done == true);
	CU_ASSERT(g_bdev_ut_channel->outstanding_io_count == 0);
	CU_ASSERT(g_io_status == SPDK_BDEV_IO_STATUS_SUCCESS);

	/* Test a WRITE_ZEROES that would span an I/O boundary.  WRITE_ZEROES should not be
	 * split, so test that.
	 */