Commit c5d80a8b authored by Jim Harris's avatar Jim Harris
Browse files

blob: avoid recursion when split IO immmediately complete



In some scenarios, a split IO can immediately complete.  For
example, a very large unmap operation to a newly thin-provisioned
blob has no operations to perform, so the batch for its operation
immediately completes.

But if it immediately completes, we can't recursively submit
the next split IO.  So use variables in the context structure
to detect when an operation immediately completes, to allow
it to unwind and submit the next operation without recursing.

Fixes issue #2347.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I8e4c121190c7d08152aa8de20cf6abc55b5edc46
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11388


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
parent b6992a90
Loading
Loading
Loading
Loading
+59 −7
Original line number Diff line number Diff line
@@ -2533,6 +2533,9 @@ struct op_split_ctx {
	void *curr_payload;
	enum spdk_blob_op_type op_type;
	spdk_bs_sequence_t *seq;
	bool in_submit_ctx;
	bool completed_in_submit_ctx;
	bool done;
};

static void
@@ -2542,18 +2545,40 @@ blob_request_submit_op_split_next(void *cb_arg, int bserrno)
	struct spdk_blob	*blob = ctx->blob;
	struct spdk_io_channel	*ch = ctx->channel;
	enum spdk_blob_op_type	op_type = ctx->op_type;
	uint8_t			*buf = ctx->curr_payload;
	uint64_t		offset = ctx->io_unit_offset;
	uint64_t		length = ctx->io_units_remaining;
	uint8_t			*buf;
	uint64_t		offset;
	uint64_t		length;
	uint64_t		op_length;

	if (bserrno != 0 || ctx->io_units_remaining == 0) {
		bs_sequence_finish(ctx->seq, bserrno);
		if (ctx->in_submit_ctx) {
			/* Defer freeing of the ctx object, since it will be
			 * accessed when this unwinds back to the submisison
			 * context.
			 */
			ctx->done = true;
		} else {
			free(ctx);
		}
		return;
	}

	do {
	if (ctx->in_submit_ctx) {
		/* If this split operation completed in the context
		 * of its submission, mark the flag and return immediately
		 * to avoid recursion.
		 */
		ctx->completed_in_submit_ctx = true;
		return;
	}

	while (true) {
		ctx->completed_in_submit_ctx = false;

		offset = ctx->io_unit_offset;
		length = ctx->io_units_remaining;
		buf = ctx->curr_payload;
		op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
				     offset));

@@ -2564,6 +2589,9 @@ blob_request_submit_op_split_next(void *cb_arg, int bserrno)
			ctx->curr_payload += op_length * blob->bs->io_unit_size;
		}

		assert(!ctx->in_submit_ctx);
		ctx->in_submit_ctx = true;

		switch (op_type) {
		case SPDK_BLOB_READ:
			spdk_blob_io_read(blob, ch, buf, offset, op_length,
@@ -2586,9 +2614,33 @@ blob_request_submit_op_split_next(void *cb_arg, int bserrno)
			SPDK_ERRLOG("readv/write not valid\n");
			bs_sequence_finish(ctx->seq, -EINVAL);
			free(ctx);
			return;
		}

#ifndef __clang_analyzer__
		/* scan-build reports a false positive around accessing the ctx here. It
		 * forms a path that recursively calls this function, but then says
		 * "assuming ctx->in_submit_ctx is false", when that isn't possible.
		 * This path does free(ctx), returns to here, and reports a use-after-free
		 * bug.  Wrapping this bit of code so that scan-build doesn't see it
		 * works around the scan-build bug.
		 */
		assert(ctx->in_submit_ctx);
		ctx->in_submit_ctx = false;

		/* If the operation completed immediately, loop back and submit the
		 * next operation.  Otherwise we can return and the next split
		 * operation will get submitted when this current operation is
		 * later completed asynchronously.
		 */
		if (ctx->completed_in_submit_ctx) {
			continue;
		} else if (ctx->done) {
			free(ctx);
		}
#endif
		break;
	}
	} while (false);
}

static void