Commit 73bc324b authored by Shuhei Matsumoto's avatar Shuhei Matsumoto Committed by Jim Harris
Browse files

scsi: Refine split processing for unmap



If any child I/O failed, we should set the status of the SCSI task
to failed and stop further split processing.

However, ctx->count meant number of outstanding children I/Os. We
could not stop further split processing. We updated the status of the
SCSI task only if child I/O submission failed.

Fix these in this patch. Verify the changes by refining unit tests
except for error cases.

This idea was derived from the splitting process of the generic bdev
layer.

Signed-off-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Change-Id: Ifc74c09c349567b0311fe2a7435a5452d4c68251
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/19437


Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
parent 9910e8d7
Loading
Loading
Loading
Loading
+64 −37
Original line number Diff line number Diff line
@@ -1328,8 +1328,9 @@ check_condition:
struct spdk_bdev_scsi_unmap_ctx {
	struct spdk_scsi_task		*task;
	struct spdk_scsi_unmap_bdesc	desc[DEFAULT_MAX_UNMAP_BLOCK_DESCRIPTOR_COUNT];
	uint32_t			desc_count;
	uint32_t			count;
	uint16_t			remaining_count;
	uint16_t			current_count;
	uint16_t			outstanding_count;
};

static int _bdev_scsi_unmap(struct spdk_bdev_scsi_unmap_ctx *ctx);
@@ -1340,21 +1341,34 @@ bdev_scsi_task_complete_unmap_cmd(struct spdk_bdev_io *bdev_io, bool success,
{
	struct spdk_bdev_scsi_unmap_ctx *ctx = cb_arg;
	struct spdk_scsi_task *task = ctx->task;
	int sc, sk, asc, ascq;

	ctx->count--;
	spdk_bdev_free_io(bdev_io);

	if (task->status == SPDK_SCSI_STATUS_GOOD) {
		spdk_bdev_io_get_scsi_status(bdev_io, &sc, &sk, &asc, &ascq);
		spdk_scsi_task_set_status(task, sc, sk, asc, ascq);
	if (!success) {
		spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
					  SPDK_SCSI_SENSE_NO_SENSE,
					  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
					  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
		/* If any child I/O failed, stop further splitting process. */
		ctx->current_count += ctx->remaining_count;
		ctx->remaining_count = 0;
	}

	spdk_bdev_free_io(bdev_io);
	ctx->outstanding_count--;
	if (ctx->outstanding_count != 0) {
		/* Any child I/O is still outstanding. */
		return;
	}

	if (ctx->count == 0) {
	if (ctx->remaining_count == 0) {
		/* SCSI task finishes when all descriptors are consumed. */
		scsi_lun_complete_task(task->lun, task);
		free(ctx);
		return;
	}

	/* Continue with splitting process. */
	_bdev_scsi_unmap(ctx);
}

static int
@@ -1404,51 +1418,63 @@ _bdev_scsi_unmap(struct spdk_bdev_scsi_unmap_ctx *ctx)
{
	struct spdk_scsi_task *task = ctx->task;
	struct spdk_scsi_lun *lun = task->lun;
	uint32_t i;
	int rc;

	for (i = ctx->count; i < ctx->desc_count; i++) {
	while (ctx->remaining_count != 0) {
		struct spdk_scsi_unmap_bdesc	*desc;
		uint64_t offset_blocks;
		uint64_t num_blocks;

		desc = &ctx->desc[i];
		desc = &ctx->desc[ctx->current_count];

		offset_blocks = from_be64(&desc->lba);
		num_blocks = from_be32(&desc->block_count);

		if (num_blocks == 0) {
			continue;
		}

		ctx->count++;
			rc = 0;
		} else {
			ctx->outstanding_count++;
			rc = spdk_bdev_unmap_blocks(lun->bdev_desc,
						    lun->io_channel,
						    offset_blocks,
						    num_blocks,
						    bdev_scsi_task_complete_unmap_cmd,
						    ctx);
		}

		if (rc) {
		if (rc == 0) {
			ctx->current_count++;
			ctx->remaining_count--;
		} else {
			ctx->outstanding_count--;
			if (rc == -ENOMEM) {
				if (ctx->outstanding_count == 0) {
					/*
					 * If there are outstanding child I/Os, the last
					 * completion will call this function again to try
					 * the next split. Hence, queue the parent task only
					 * if there is no outstanding child I/O.
					 */
					bdev_scsi_queue_io(task, bdev_scsi_unmap_resubmit, ctx);
				/* Unmap was not yet submitted to bdev */
				ctx->count--;
				return SPDK_SCSI_TASK_PENDING;
				}
				return SPDK_SCSI_TASK_PENDING;
			} else {
				SPDK_ERRLOG("SCSI Unmapping failed\n");
				spdk_scsi_task_set_status(task, SPDK_SCSI_STATUS_CHECK_CONDITION,
							  SPDK_SCSI_SENSE_NO_SENSE,
							  SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE,
							  SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE);
			ctx->count--;
				/* If any child I/O failed, stop further splitting process. */
				ctx->current_count += ctx->remaining_count;
				ctx->remaining_count = 0;
				/* We can't complete here - we may have to wait for previously
			 * submitted unmaps to complete */
				 * submitted child I/Os to complete */
				break;
			}
		}
	}

	if (ctx->count == 0) {
	if (ctx->outstanding_count == 0) {
		free(ctx);
		return SPDK_SCSI_TASK_COMPLETE;
	}
@@ -1476,7 +1502,8 @@ bdev_scsi_unmap(struct spdk_bdev *bdev, struct spdk_scsi_task *task)
	}

	ctx->task = task;
	ctx->count = 0;
	ctx->current_count = 0;
	ctx->outstanding_count = 0;

	if (task->iovcnt == 1) {
		data = (uint8_t *)task->iovs[0].iov_base;
@@ -1499,7 +1526,7 @@ bdev_scsi_unmap(struct spdk_bdev *bdev, struct spdk_scsi_task *task)
		return SPDK_SCSI_TASK_COMPLETE;
	}

	ctx->desc_count = desc_count;
	ctx->remaining_count = desc_count;

	return _bdev_scsi_unmap(ctx);
}
+120 −13
Original line number Diff line number Diff line
@@ -19,10 +19,13 @@ SPDK_LOG_REGISTER_COMPONENT(scsi)
static uint64_t g_test_bdev_num_blocks;

TAILQ_HEAD(, spdk_bdev_io) g_bdev_io_queue;
int g_outstanding_bdev_io_count = 0;
int g_scsi_cb_called = 0;

TAILQ_HEAD(, spdk_bdev_io_wait_entry) g_io_wait_queue;
int g_pending_bdev_io_count  = 0;
bool g_bdev_io_pool_full = false;
int g_bdev_io_pool_count = -1;

bool
spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type)
@@ -156,25 +159,42 @@ spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *i
}

static void
ut_bdev_io_flush(void)
ut_bdev_io_complete(void)
{
	struct spdk_bdev_io *bdev_io;
	struct spdk_bdev_io_wait_entry *entry;

	while (!TAILQ_EMPTY(&g_bdev_io_queue) || !TAILQ_EMPTY(&g_io_wait_queue)) {
	while (!TAILQ_EMPTY(&g_bdev_io_queue)) {
		bdev_io = TAILQ_FIRST(&g_bdev_io_queue);
		TAILQ_REMOVE(&g_bdev_io_queue, bdev_io, internal.link);
		bdev_io->internal.cb(bdev_io, true, bdev_io->internal.caller_ctx);
		free(bdev_io);
		g_outstanding_bdev_io_count--;
		if (g_bdev_io_pool_count != -1) {
			g_bdev_io_pool_count++;
		}
	}
}

static void
ut_bdev_io_retry(void)
{
	struct spdk_bdev_io_wait_entry *entry;

	while (!TAILQ_EMPTY(&g_io_wait_queue)) {
		entry = TAILQ_FIRST(&g_io_wait_queue);
		TAILQ_REMOVE(&g_io_wait_queue, entry, link);
		g_pending_bdev_io_count--;
		entry->cb_fn(entry->cb_arg);
	}
}

static void
ut_bdev_io_flush(void)
{
	while (!TAILQ_EMPTY(&g_bdev_io_queue) || !TAILQ_EMPTY(&g_io_wait_queue)) {
		ut_bdev_io_complete();
		ut_bdev_io_retry();
	}
}

static int
@@ -185,6 +205,10 @@ _spdk_bdev_io_op(spdk_bdev_io_completion_cb cb, void *cb_arg)
	if (g_bdev_io_pool_full) {
		g_bdev_io_pool_full = false;
		return -ENOMEM;
	} else if (g_bdev_io_pool_count == 0) {
		return -ENOMEM;
	} else if (g_bdev_io_pool_count != -1) {
		g_bdev_io_pool_count--;
	}

	bdev_io = calloc(1, sizeof(*bdev_io));
@@ -194,6 +218,7 @@ _spdk_bdev_io_op(spdk_bdev_io_completion_cb cb, void *cb_arg)
	bdev_io->internal.caller_ctx = cb_arg;

	TAILQ_INSERT_TAIL(&g_bdev_io_queue, bdev_io, internal.link);
	g_outstanding_bdev_io_count++;

	return 0;
}
@@ -244,6 +269,7 @@ spdk_bdev_queue_io_wait(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
			struct spdk_bdev_io_wait_entry *entry)
{
	TAILQ_INSERT_TAIL(&g_io_wait_queue, entry, link);
	g_pending_bdev_io_count++;
	return 0;
}

@@ -982,6 +1008,86 @@ get_dif_ctx_test(void)
	CU_ASSERT(dif_ctx.init_ref_tag + dif_ctx.ref_tag_offset == 0x12345678);
}

static void
unmap_split_test(void)
{
	struct spdk_bdev bdev = { .blocklen = 512 };
	struct spdk_scsi_lun lun;
	struct spdk_scsi_task task;
	uint8_t cdb[16];
	char data[4096];
	int rc;

	lun.bdev = &bdev;

	/* Test block device size of 512 MiB */
	g_test_bdev_num_blocks = 512 * 1024 * 1024;

	/* Unmap 5 blocks using 5 descriptors. bdev_io pool size is 2.
	 * Hence, unmap should be done by 3 iterations.
	 * 1st - 2 unmaps, 2nd - 2 unmaps, and 3rd - 1 unmap.
	 */
	ut_init_task(&task);
	task.lun = &lun;
	task.cdb = cdb;
	memset(cdb, 0, sizeof(cdb));
	cdb[0] = 0x42; /* UNMAP */
	to_be16(&data[7], 5); /* 5 parameters in list */
	memset(data, 0, sizeof(data));
	to_be16(&data[2], 80); /* 2 descriptors */
	to_be64(&data[8], 1); /* LBA 1 */
	to_be32(&data[16], 2); /* 2 blocks */
	to_be64(&data[24], 5); /* LBA 5 */
	to_be32(&data[32], 3); /* 3 blocks */
	to_be64(&data[40], 10); /* LBA 10 */
	to_be32(&data[48], 1); /* 1 block */
	to_be64(&data[56], 15); /* LBA 15 */
	to_be32(&data[64], 4); /* 4 blocks */
	to_be64(&data[72], 30); /* LBA 30 */
	to_be32(&data[80], 1); /* 1 block */
	spdk_scsi_task_set_data(&task, data, sizeof(data));
	task.status = SPDK_SCSI_STATUS_GOOD;

	g_bdev_io_pool_full = true;

	rc = bdev_scsi_execute(&task);
	CU_ASSERT(rc == SPDK_SCSI_TASK_PENDING);
	CU_ASSERT(task.status == SPDK_SCSI_STATUS_GOOD);
	CU_ASSERT(g_scsi_cb_called == 0);
	CU_ASSERT(g_outstanding_bdev_io_count == 0);
	CU_ASSERT(g_pending_bdev_io_count == 1);

	g_bdev_io_pool_count = 2;
	ut_bdev_io_retry();

	CU_ASSERT(g_outstanding_bdev_io_count == 2);
	CU_ASSERT(g_pending_bdev_io_count == 0);

	g_bdev_io_pool_full = true;
	ut_bdev_io_complete();

	CU_ASSERT(g_outstanding_bdev_io_count == 0);
	CU_ASSERT(g_pending_bdev_io_count == 1);

	ut_bdev_io_retry();

	CU_ASSERT(g_outstanding_bdev_io_count == 2);
	CU_ASSERT(g_pending_bdev_io_count == 0);

	ut_bdev_io_complete();

	CU_ASSERT(task.status == SPDK_SCSI_STATUS_GOOD);
	CU_ASSERT(g_scsi_cb_called == 1);

	g_scsi_cb_called = 0;
	g_bdev_io_pool_count = -1;

	SPDK_CU_ASSERT_FATAL(TAILQ_EMPTY(&g_bdev_io_queue));
	SPDK_CU_ASSERT_FATAL(TAILQ_EMPTY(&g_io_wait_queue));

	ut_put_task(&task);
}

int
main(int argc, char **argv)
{
@@ -1008,6 +1114,7 @@ main(int argc, char **argv)
	CU_ADD_TEST(suite, xfer_test);
	CU_ADD_TEST(suite, scsi_name_padding_test);
	CU_ADD_TEST(suite, get_dif_ctx_test);
	CU_ADD_TEST(suite, unmap_split_test);

	num_failures = spdk_ut_run_tests(argc, argv, NULL);
	CU_cleanup_registry();