Commit b735c429 authored by zhenwei pi's avatar zhenwei pi Committed by Tomasz Zawadzki
Browse files

scsi: Improve child IO split mechanism



Improve the child IO split mechanism with the points:
- simplify error handing for splitting loop.
- ctx->fn() does not have to touch any counter. then
  ctx->remaining_count is removed from _bdev_scsi_unmap and
  _bdev_scsi_write_same.

Change-Id: I543fcbb6eede662ec248e68c2d377d3b3a488961
Signed-off-by: default avatarzhenwei pi <pizhenwei@bytedance.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/20559


Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Community-CI: Mellanox Build Bot
parent 355312bf
Loading
Loading
Loading
Loading
+31 −34
Original line number Diff line number Diff line
@@ -1357,6 +1357,7 @@ struct spdk_bdev_scsi_split_ctx {
	uint16_t			remaining_count;
	uint16_t			current_count;
	uint16_t			outstanding_count;
	/* Return 0 on successly submitting child IO, 1 on skipping child IO, negative number on failure */
	int	(*fn)(struct spdk_bdev_scsi_split_ctx *ctx);
};

@@ -1379,23 +1380,16 @@ bdev_scsi_split(struct spdk_bdev_scsi_split_ctx *ctx)

	while (ctx->remaining_count != 0) {
		rc = ctx->fn(ctx);
		if (rc == 0) {
		if (rc >= 0) {
			ctx->current_count++;
			ctx->remaining_count--;
		} else {
			ctx->outstanding_count--;
			if (rc == -ENOMEM) {
				if (ctx->outstanding_count == 0) {
					/*
					 * If there are outstanding child I/Os, the last
					 * completion will call this function again to try
					 * the next split. Hence, queue the parent task only
					 * if there is no outstanding child I/O.
					 */
					bdev_scsi_queue_io(task, bdev_scsi_split_resubmit, ctx);
			/* 0 on success, 1 on skipping child IO. !rc is good enough */
			ctx->outstanding_count += !rc;
			continue;
		} else if (rc == -ENOMEM) {
			break;
		}
				return SPDK_SCSI_TASK_PENDING;
			} else {

		SPDK_ERRLOG("SCSI %s failed\n", spdk_scsi_sbc_opcode_string(opcode, 0));
		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
					  SPDK_SCSI_SENSE_NO_SENSE,
@@ -1404,21 +1398,26 @@ bdev_scsi_split(struct spdk_bdev_scsi_split_ctx *ctx)
		/* If any child I/O failed, stop further splitting process. */
		ctx->current_count += ctx->remaining_count;
		ctx->remaining_count = 0;
				/* We can't complete here - we may have to wait for previously
				 * submitted child I/Os to complete */
		break;
	}

	if (ctx->outstanding_count != 0) {
		/* We can't complete here - we may have to wait for previously
		 * submitted child I/Os to complete */
		return SPDK_SCSI_TASK_PENDING;
	}

	if (rc == -ENOMEM) {
		/* none outstanding child IO submitted, no callback would be involked.
		   this is the last chance to resubmit on -ENOMEM */
		bdev_scsi_queue_io(task, bdev_scsi_split_resubmit, ctx);
		return SPDK_SCSI_TASK_PENDING;
	}

	if (ctx->outstanding_count == 0) {
	free(ctx);
	return SPDK_SCSI_TASK_COMPLETE;
}

	return SPDK_SCSI_TASK_PENDING;
}

static void
bdev_scsi_task_complete_split_cmd(struct spdk_bdev_io *bdev_io, bool success,
				  void *cb_arg)
@@ -1504,10 +1503,9 @@ _bdev_scsi_unmap(struct spdk_bdev_scsi_split_ctx *ctx)
	num_blocks = from_be32(&desc->block_count);

	if (num_blocks == 0) {
		return 0;
		return 1;
	}

	ctx->outstanding_count++;
	return spdk_bdev_unmap_blocks(lun->bdev_desc,
				      lun->io_channel,
				      offset_blocks,
@@ -1575,7 +1573,6 @@ _bdev_scsi_write_same(struct spdk_bdev_scsi_split_ctx *ctx)
	struct spdk_scsi_lun *lun = task->lun;
	uint64_t offset_blocks;

	ctx->outstanding_count++;
	offset_blocks = ctx->start_offset_blocks + ctx->current_count;
	return spdk_bdev_writev_blocks(lun->bdev_desc, lun->io_channel, task->iovs, task->iovcnt,
				       offset_blocks, 1, bdev_scsi_task_complete_split_cmd, ctx);
+21 −5
Original line number Diff line number Diff line
@@ -1045,28 +1045,30 @@ unmap_split_test(void)
	/* Test block device size of 512 MiB */
	g_test_bdev_num_blocks = 512 * 1024 * 1024;

	/* Unmap 5 blocks using 5 descriptors. bdev_io pool size is 2.
	/* Unmap 5 blocks using 6 descriptors(descriptor 2 has 0 blocks). bdev_io pool size is 2.
	 * Hence, unmap should be done by 3 iterations.
	 * 1st - 2 unmaps, 2nd - 2 unmaps, and 3rd - 1 unmap.
	 * 1st - 2 unmaps(0, 1), 2nd - 3 unmaps(2, 3, 4), and 3rd - 1 unmap(5).
	 */
	ut_init_task(&task);
	task.lun = &lun;
	task.cdb = cdb;
	memset(cdb, 0, sizeof(cdb));
	cdb[0] = 0x42; /* UNMAP */
	to_be16(&data[7], 5); /* 5 parameters in list */
	to_be16(&data[7], 6); /* 6 parameters in list */
	memset(data, 0, sizeof(data));
	to_be16(&data[2], 80); /* 2 descriptors */
	to_be16(&data[2], 16 * 6); /* 6 descriptors */
	to_be64(&data[8], 1); /* LBA 1 */
	to_be32(&data[16], 2); /* 2 blocks */
	to_be64(&data[24], 5); /* LBA 5 */
	to_be32(&data[32], 3); /* 3 blocks */
	to_be64(&data[40], 10); /* LBA 10 */
	to_be32(&data[48], 1); /* 1 block */
	to_be32(&data[48], 0); /* 0 block */
	to_be64(&data[56], 15); /* LBA 15 */
	to_be32(&data[64], 4); /* 4 blocks */
	to_be64(&data[72], 30); /* LBA 30 */
	to_be32(&data[80], 1); /* 1 block */
	to_be64(&data[88], 40); /* LBA 40 */
	to_be32(&data[96], 1); /* 1 block */
	spdk_scsi_task_set_data(&task, data, sizeof(data));
	task.status = SPDK_SCSI_STATUS_GOOD;

@@ -1082,6 +1084,7 @@ unmap_split_test(void)
	g_bdev_io_pool_count = 2;
	ut_bdev_io_retry();

	/* descriptor 0 and 1 */
	CU_ASSERT(g_outstanding_bdev_io_count == 2);
	CU_ASSERT(g_pending_bdev_io_count == 0);

@@ -1093,9 +1096,22 @@ unmap_split_test(void)

	ut_bdev_io_retry();

	/* descriptor 3 and 4. (descriptor 2 should be ignored) */
	CU_ASSERT(g_outstanding_bdev_io_count == 2);
	CU_ASSERT(g_pending_bdev_io_count == 0);

	g_bdev_io_pool_full = true;
	ut_bdev_io_complete();

	CU_ASSERT(g_outstanding_bdev_io_count == 0);
	CU_ASSERT(g_pending_bdev_io_count == 1);

	ut_bdev_io_retry();

	/* descriptor 5 */
	CU_ASSERT(g_outstanding_bdev_io_count == 1);
	CU_ASSERT(g_pending_bdev_io_count == 0);

	ut_bdev_io_complete();

	CU_ASSERT(task.status == SPDK_SCSI_STATUS_GOOD);