Commit 0e3ac7d3 authored by Ben Walker's avatar Ben Walker Committed by Konrad Sztyber
Browse files

bdev: Merge spdk_bdev_io::split into new flags region



Reduce the number of cache lines that must be touched in the case where
there wasn't any splitting.

Change-Id: I2c0301632856501c95371aa0e2f2a84d70a2c191
Signed-off-by: default avatarBen Walker <ben@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/21946


Reviewed-by: default avatarJim Harris <jim.harris@samsung.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
parent cea819ac
Loading
Loading
Loading
Loading
+16 −12
Original line number Diff line number Diff line
@@ -935,8 +935,13 @@ struct spdk_bdev_io_internal_fields {

	union {
		struct {

			/** Whether the accel_sequence member is valid */
			uint8_t has_accel_sequence		: 1;
			uint8_t reserved			: 7;

			/** Whether the split data structure is valid */
			uint8_t split				: 1;
			uint8_t reserved			: 6;
		};
		uint8_t raw;
	} f;
@@ -983,23 +988,22 @@ struct spdk_bdev_io_internal_fields {
	/** Status for the IO */
	int8_t status;

	/** Indicates whether the IO is split */
	bool split;

	/** Retry state (resubmit, re-pull, re-push, etc.) */
	uint8_t retry_state;

	struct {
		/** stored user callback in case we split the I/O and use a temporary callback */
		spdk_bdev_io_completion_cb stored_user_cb;

		/** number of blocks remaining in a split i/o */
	uint64_t split_remaining_num_blocks;
		uint64_t remaining_num_blocks;

		/** current offset of the split I/O in the bdev */
	uint64_t split_current_offset_blocks;
		uint64_t current_offset_blocks;

		/** count of outstanding batched split I/Os */
	uint32_t split_outstanding;
		uint32_t outstanding;
	} split;

	/** bdev allocated memory associated with this request */
	void *buf;
+50 −33
Original line number Diff line number Diff line
@@ -1028,7 +1028,7 @@ bdev_io_needs_sequence_exec(struct spdk_bdev_desc *desc, struct spdk_bdev_io *bd

	/* For now, we don't allow splitting IOs with an accel sequence and will treat them as if
	 * bdev module didn't support accel sequences */
	return !desc->accel_sequence_supported[bdev_io->type] || bdev_io->internal.split;
	return !desc->accel_sequence_supported[bdev_io->type] || bdev_io->internal.f.split;
}

static inline void
@@ -3019,7 +3019,9 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt
	current_offset = *offset;
	current_remaining = *remaining;

	bdev_io->internal.split_outstanding++;
	assert(bdev_io->internal.f.split);

	bdev_io->internal.split.outstanding++;

	io_wait_fn = _bdev_rw_split;
	switch (bdev_io->type) {
@@ -3077,20 +3079,20 @@ bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt
	if (rc == 0) {
		current_offset += num_blocks;
		current_remaining -= num_blocks;
		bdev_io->internal.split_current_offset_blocks = current_offset;
		bdev_io->internal.split_remaining_num_blocks = current_remaining;
		bdev_io->internal.split.current_offset_blocks = current_offset;
		bdev_io->internal.split.remaining_num_blocks = current_remaining;
		*offset = current_offset;
		*remaining = current_remaining;
	} else {
		bdev_io->internal.split_outstanding--;
		bdev_io->internal.split.outstanding--;
		if (rc == -ENOMEM) {
			if (bdev_io->internal.split_outstanding == 0) {
			if (bdev_io->internal.split.outstanding == 0) {
				/* No I/O is outstanding. Hence we should wait here. */
				bdev_queue_io_wait_with_cb(bdev_io, io_wait_fn);
			}
		} else {
			bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
			if (bdev_io->internal.split_outstanding == 0) {
			if (bdev_io->internal.split.outstanding == 0) {
				bdev_ch_remove_from_io_submitted(bdev_io);
				spdk_trace_record(TRACE_BDEV_IO_DONE, bdev_io->internal.ch->trace_id,
						  0, (uintptr_t)bdev_io, bdev_io->internal.caller_ctx,
@@ -3134,8 +3136,10 @@ _bdev_rw_split(void *_bdev_io)
		io_boundary = UINT32_MAX;
	}

	remaining = bdev_io->internal.split_remaining_num_blocks;
	current_offset = bdev_io->internal.split_current_offset_blocks;
	assert(bdev_io->internal.f.split);

	remaining = bdev_io->internal.split.remaining_num_blocks;
	current_offset = bdev_io->internal.split.current_offset_blocks;
	parent_offset = bdev_io->u.bdev.offset_blocks;
	parent_iov_offset = (current_offset - parent_offset) * blocklen;
	parent_iovcnt = bdev_io->u.bdev.iovcnt;
@@ -3216,7 +3220,7 @@ _bdev_rw_split(void *_bdev_io)
							 * If the first child IO of any split round is less than
							 * a block size, an error exit.
							 */
							if (bdev_io->internal.split_outstanding == 0) {
							if (bdev_io->internal.split.outstanding == 0) {
								SPDK_ERRLOG("The first child io was less than a block size\n");
								bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
								bdev_ch_remove_from_io_submitted(bdev_io);
@@ -3259,8 +3263,10 @@ bdev_unmap_split(struct spdk_bdev_io *bdev_io)
	uint32_t num_children_reqs = 0;
	int rc;

	offset = bdev_io->internal.split_current_offset_blocks;
	remaining = bdev_io->internal.split_remaining_num_blocks;
	assert(bdev_io->internal.f.split);

	offset = bdev_io->internal.split.current_offset_blocks;
	remaining = bdev_io->internal.split.remaining_num_blocks;
	max_unmap_blocks = bdev_io->bdev->max_unmap * bdev_io->bdev->max_unmap_segments;

	while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS)) {
@@ -3283,8 +3289,10 @@ bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io)
	uint32_t num_children_reqs = 0;
	int rc;

	offset = bdev_io->internal.split_current_offset_blocks;
	remaining = bdev_io->internal.split_remaining_num_blocks;
	assert(bdev_io->internal.f.split);

	offset = bdev_io->internal.split.current_offset_blocks;
	remaining = bdev_io->internal.split.remaining_num_blocks;

	while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS)) {
		write_zeroes_blocks = spdk_min(remaining, bdev_io->bdev->max_write_zeroes);
@@ -3306,8 +3314,10 @@ bdev_copy_split(struct spdk_bdev_io *bdev_io)
	uint32_t num_children_reqs = 0;
	int rc;

	offset = bdev_io->internal.split_current_offset_blocks;
	remaining = bdev_io->internal.split_remaining_num_blocks;
	assert(bdev_io->internal.f.split);

	offset = bdev_io->internal.split.current_offset_blocks;
	remaining = bdev_io->internal.split.remaining_num_blocks;

	assert(bdev_io->bdev->max_copy != 0);
	while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_COPY_REQS)) {
@@ -3360,21 +3370,23 @@ bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)

	spdk_bdev_free_io(bdev_io);

	assert(parent_io->internal.f.split);

	if (!success) {
		parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
		/* If any child I/O failed, stop further splitting process. */
		parent_io->internal.split_current_offset_blocks += parent_io->internal.split_remaining_num_blocks;
		parent_io->internal.split_remaining_num_blocks = 0;
		parent_io->internal.split.current_offset_blocks += parent_io->internal.split.remaining_num_blocks;
		parent_io->internal.split.remaining_num_blocks = 0;
	}
	parent_io->internal.split_outstanding--;
	if (parent_io->internal.split_outstanding != 0) {
	parent_io->internal.split.outstanding--;
	if (parent_io->internal.split.outstanding != 0) {
		return;
	}

	/*
	 * Parent I/O finishes when all blocks are consumed.
	 */
	if (parent_io->internal.split_remaining_num_blocks == 0) {
	if (parent_io->internal.split.remaining_num_blocks == 0) {
		assert(parent_io->internal.cb != bdev_io_split_done);
		bdev_ch_remove_from_io_submitted(parent_io);
		spdk_trace_record(TRACE_BDEV_IO_DONE, parent_io->internal.ch->trace_id,
@@ -3428,10 +3440,11 @@ static void
bdev_io_split(struct spdk_bdev_io *bdev_io)
{
	assert(bdev_io_should_split(bdev_io));
	assert(bdev_io->internal.f.split);

	bdev_io->internal.split_current_offset_blocks = bdev_io->u.bdev.offset_blocks;
	bdev_io->internal.split_remaining_num_blocks = bdev_io->u.bdev.num_blocks;
	bdev_io->internal.split_outstanding = 0;
	bdev_io->internal.split.current_offset_blocks = bdev_io->u.bdev.offset_blocks;
	bdev_io->internal.split.remaining_num_blocks = bdev_io->u.bdev.num_blocks;
	bdev_io->internal.split.outstanding = 0;
	bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;

	switch (bdev_io->type) {
@@ -3590,7 +3603,7 @@ bdev_io_submit(struct spdk_bdev_io *bdev_io)
			      (uintptr_t)bdev_io, (uint64_t)bdev_io->type, bdev_io->internal.caller_ctx,
			      bdev_io->u.bdev.offset_blocks, ch->queue_depth);

	if (bdev_io->internal.split) {
	if (bdev_io->internal.f.split) {
		bdev_io_split(bdev_io);
		return;
	}
@@ -3685,7 +3698,7 @@ bdev_io_init(struct spdk_bdev_io *bdev_io,
	bdev_io->internal.memory_domain = NULL;
	bdev_io->internal.memory_domain_ctx = NULL;
	bdev_io->internal.data_transfer_cpl = NULL;
	bdev_io->internal.split = bdev_io_should_split(bdev_io);
	bdev_io->internal.f.split = bdev_io_should_split(bdev_io);
}

static bool
@@ -6302,7 +6315,7 @@ spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channe
	 * or emulate it using regular write request otherwise.
	 */
	if (bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES) ||
	    bdev_io->internal.split) {
	    bdev_io->internal.f.split) {
		bdev_io_submit(bdev_io);
		return 0;
	}
@@ -6939,8 +6952,10 @@ bdev_abort_io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
		}
	}

	parent_io->internal.split_outstanding--;
	if (parent_io->internal.split_outstanding == 0) {
	assert(parent_io->internal.f.split);

	parent_io->internal.split.outstanding--;
	if (parent_io->internal.split.outstanding == 0) {
		if (parent_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) {
			bdev_abort_retry(parent_io);
		} else {
@@ -6973,7 +6988,7 @@ bdev_abort_io(struct spdk_bdev_desc *desc, struct spdk_bdev_channel *channel,
	bdev_io->type = SPDK_BDEV_IO_TYPE_ABORT;
	bdev_io_init(bdev_io, bdev, cb_arg, cb);

	if (bdev->split_on_optimal_io_boundary && bio_to_abort->internal.split) {
	if (bdev->split_on_optimal_io_boundary && bio_to_abort->internal.f.split) {
		assert(bdev_io_should_split(bio_to_abort));
		bdev_io->u.bdev.abort.bio_cb_arg = bio_to_abort;

@@ -7088,7 +7103,8 @@ bdev_abort_retry(void *ctx)
	}

	/* Use split_outstanding to manage the progress of aborting I/Os. */
	parent_io->internal.split_outstanding = matched_ios;
	parent_io->internal.f.split = true;
	parent_io->internal.split.outstanding = matched_ios;
}

static void
@@ -7110,7 +7126,8 @@ bdev_abort(struct spdk_bdev_io *parent_io)
	}

	/* Use split_outstanding to manage the progress of aborting I/Os. */
	parent_io->internal.split_outstanding = matched_ios;
	parent_io->internal.f.split = true;
	parent_io->internal.split.outstanding = matched_ios;
}

int
@@ -10524,7 +10541,7 @@ spdk_bdev_copy_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
	 * emulate it using regular read and write requests otherwise.
	 */
	if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COPY) ||
	    bdev_io->internal.split) {
	    bdev_io->internal.f.split) {
		bdev_io_submit(bdev_io);
		return 0;
	}
+6 −6
Original line number Diff line number Diff line
@@ -245,7 +245,6 @@ bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
	struct spdk_bdev_io *part_io = cb_arg;
	uint32_t offset, remapped_offset;
	spdk_bdev_io_completion_cb cb;
	int rc;

	switch (bdev_io->type) {
@@ -268,10 +267,8 @@ bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
		break;
	}


	cb = part_io->internal.stored_user_cb;
	if (cb != NULL) {
		cb(part_io, success, NULL);
	if (part_io->internal.f.split) {
		part_io->internal.split.stored_user_cb(part_io, success, NULL);
	} else {
		spdk_bdev_io_complete_base_io_status(part_io, bdev_io);
	}
@@ -300,7 +297,10 @@ spdk_bdev_part_submit_request_ext(struct spdk_bdev_part_channel *ch, struct spdk
	uint64_t offset, remapped_offset, remapped_src_offset;
	int rc = 0;

	bdev_io->internal.stored_user_cb = cb;
	if (cb != NULL) {
		bdev_io->internal.f.split = true;
		bdev_io->internal.split.stored_user_cb = cb;
	}

	offset = bdev_io->u.bdev.offset_blocks;
	remapped_offset = offset + part->internal.offset_blocks;