Commit fc9ca1d0 authored by Artur Paszkiewicz's avatar Artur Paszkiewicz Committed by Jim Harris
Browse files

module/raid: move raid0 functions to separate file



Signed-off-by: default avatarArtur Paszkiewicz <artur.paszkiewicz@intel.com>
Change-Id: Icf74bda40303b3c38a7c58a5331589f3ec229634
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/471078


Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom SPDK FC-NVMe CI <spdk-ci.pdl@broadcom.com>
parent 6e6d04b6
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -35,7 +35,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk

CFLAGS += -I$(SPDK_ROOT_DIR)/lib/bdev/
C_SRCS = bdev_raid.c bdev_raid_rpc.c
C_SRCS = bdev_raid.c bdev_raid_rpc.c raid0.c
LIBNAME = bdev_raid

include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
+3 −313
Original line number Diff line number Diff line
@@ -256,31 +256,6 @@ raid_bdev_destruct(void *ctxt)
	return 0;
}

/*
 * brief:
 * raid_bdev_io_completion function is called by lower layers to notify raid
 * module that particular bdev_io is completed.
 * params:
 * bdev_io - pointer to bdev io submitted to lower layers, like child io
 * success - bdev_io status
 * cb_arg - function callback context, like parent io pointer
 * returns:
 * none
 */
static void
raid_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
	struct spdk_bdev_io         *parent_io = cb_arg;

	spdk_bdev_free_io(bdev_io);

	if (success) {
		spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS);
	} else {
		spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED);
	}
}

/*
 * brief:
 * raid_bdev_base_io_completion is the completion callback for member disk requests
@@ -291,7 +266,7 @@ raid_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg
 * returns:
 * none
 */
static void
void
raid_bdev_base_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
	struct spdk_bdev_io *parent_io = cb_arg;
@@ -309,134 +284,6 @@ raid_bdev_base_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *c
	}
}

static void
raid_bdev_queue_io_wait(struct spdk_bdev_io *raid_bdev_io, uint8_t pd_idx,
			spdk_bdev_io_wait_cb cb_fn, int ret);

static void
raid0_waitq_io_process(void *ctx);

/*
 * brief:
 * raid0_submit_rw_request function is used to submit I/O to the correct
 * member disk for raid0 bdevs.
 * params:
 * bdev_io - parent bdev io
 * start_strip - start strip number of this io
 * returns:
 * none
 */
static void
raid0_submit_rw_request(struct spdk_bdev_io *bdev_io, uint64_t start_strip)
{
	struct raid_bdev_io		*raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
	struct raid_bdev_io_channel	*raid_ch = spdk_io_channel_get_ctx(raid_io->ch);
	struct raid_bdev		*raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
	uint64_t			pd_strip;
	uint32_t			offset_in_strip;
	uint64_t			pd_lba;
	uint64_t			pd_blocks;
	uint8_t				pd_idx;
	int				ret = 0;

	pd_strip = start_strip / raid_bdev->num_base_bdevs;
	pd_idx = start_strip % raid_bdev->num_base_bdevs;
	offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1);
	pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip;
	pd_blocks = bdev_io->u.bdev.num_blocks;
	if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) {
		SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
		assert(0);
	}

	/*
	 * Submit child io to bdev layer with using base bdev descriptors, base
	 * bdev lba, base bdev child io length in blocks, buffer, completion
	 * function and function callback context
	 */
	assert(raid_ch != NULL);
	assert(raid_ch->base_channel);
	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
		ret = spdk_bdev_readv_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
					     raid_ch->base_channel[pd_idx],
					     bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
					     pd_lba, pd_blocks, raid_bdev_io_completion,
					     bdev_io);
	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
		ret = spdk_bdev_writev_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
					      raid_ch->base_channel[pd_idx],
					      bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
					      pd_lba, pd_blocks, raid_bdev_io_completion,
					      bdev_io);
	} else {
		SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
		assert(0);
	}

	if (ret != 0) {
		raid_bdev_queue_io_wait(bdev_io, pd_idx, raid0_waitq_io_process, ret);
	}
}

/*
 * brief:
 * raid0_waitq_io_process function is the callback function
 * registered by raid bdev module to bdev when bdev_io was unavailable
 * for raid0 bdevs.
 * params:
 * ctx - pointer to raid_bdev_io
 * returns:
 * none
 */
static void
raid0_waitq_io_process(void *ctx)
{
	struct spdk_bdev_io	*bdev_io = ctx;
	struct raid_bdev	*raid_bdev;
	uint64_t		start_strip;

	/*
	 * Try to submit childs of parent bdev io. If failed due to resource
	 * crunch then break the loop and don't try to process other queued IOs.
	 */
	raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
	start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift;
	raid0_submit_rw_request(bdev_io, start_strip);
}

/*
 * brief:
 * raid0_start_rw_request function is the submit_request function for
 * read/write requests for raid0 bdevs.
 * params:
 * ch - pointer to raid bdev io channel
 * bdev_io - pointer to parent bdev_io on raid bdev device
 * returns:
 * none
 */
static void
raid0_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
{
	struct raid_bdev_io		*raid_io;
	struct raid_bdev		*raid_bdev;
	uint64_t			start_strip = 0;
	uint64_t			end_strip = 0;

	raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
	raid_io->ch = ch;
	start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift;
	end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >>
		    raid_bdev->strip_size_shift;
	if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) {
		assert(false);
		SPDK_ERRLOG("I/O spans strip boundary!\n");
		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
		return;
	}
	raid0_submit_rw_request(bdev_io, start_strip);
}

/*
 * brief:
 * raid_bdev_queue_io_wait function processes the IO which failed to submit.
@@ -449,7 +296,7 @@ raid0_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
 * returns:
 * none
 */
static void
void
raid_bdev_queue_io_wait(struct spdk_bdev_io *raid_bdev_io, uint8_t pd_idx,
			spdk_bdev_io_wait_cb cb_fn, int ret)
{
@@ -539,163 +386,6 @@ _raid_bdev_submit_reset_request(struct spdk_io_channel *ch, struct spdk_bdev_io
	_raid_bdev_submit_reset_request_next(bdev_io);
}

static inline void
_raid0_get_io_range(struct raid_bdev_io_range *io_range,
		    uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift,
		    uint64_t offset_blocks, uint64_t num_blocks)
{
	uint64_t	start_strip;
	uint64_t	end_strip;

	io_range->strip_size = strip_size;

	/* The start and end strip index in raid0 bdev scope */
	start_strip = offset_blocks >> strip_size_shift;
	end_strip = (offset_blocks + num_blocks - 1) >> strip_size_shift;
	io_range->start_strip_in_disk = start_strip / num_base_bdevs;
	io_range->end_strip_in_disk = end_strip / num_base_bdevs;

	/* The first strip may have unaligned start LBA offset.
	 * The end strip may have unaligned end LBA offset.
	 * Strips between them certainly have aligned offset and length to boundaries.
	 */
	io_range->start_offset_in_strip = offset_blocks % strip_size;
	io_range->end_offset_in_strip = (offset_blocks + num_blocks - 1) % strip_size;

	/* The base bdev indexes in which start and end strips are located */
	io_range->start_disk = start_strip % num_base_bdevs;
	io_range->end_disk = end_strip % num_base_bdevs;

	/* Calculate how many base_bdevs are involved in io operation.
	 * Number of base bdevs involved is between 1 and num_base_bdevs.
	 * It will be 1 if the first strip and last strip are the same one.
	 */
	io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs);
}

static inline void
_raid0_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx,
		      uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk)
{
	uint64_t n_strips_in_disk;
	uint64_t start_offset_in_disk;
	uint64_t end_offset_in_disk;
	uint64_t offset_in_disk;
	uint64_t nblocks_in_disk;
	uint64_t start_strip_in_disk;
	uint64_t end_strip_in_disk;

	start_strip_in_disk = io_range->start_strip_in_disk;
	if (disk_idx < io_range->start_disk) {
		start_strip_in_disk += 1;
	}

	end_strip_in_disk = io_range->end_strip_in_disk;
	if (disk_idx > io_range->end_disk) {
		end_strip_in_disk -= 1;
	}

	assert(end_strip_in_disk >= start_strip_in_disk);
	n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1;

	if (disk_idx == io_range->start_disk) {
		start_offset_in_disk = io_range->start_offset_in_strip;
	} else {
		start_offset_in_disk = 0;
	}

	if (disk_idx == io_range->end_disk) {
		end_offset_in_disk = io_range->end_offset_in_strip;
	} else {
		end_offset_in_disk = io_range->strip_size - 1;
	}

	offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size;
	nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size
			  + end_offset_in_disk - start_offset_in_disk + 1;

	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID,
		      "raid_bdev (strip_size 0x%lx) splits IO to base_bdev (%u) at (0x%lx, 0x%lx).\n",
		      io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk);

	*_offset_in_disk = offset_in_disk;
	*_nblocks_in_disk = nblocks_in_disk;
}

/*
 * brief:
 * _raid_bdev_submit_null_payload_request_next function submits the next batch of
 * io requests with range but without payload, like FLUSH and UNMAP, to member disks;
 * it will submit as many as possible unless one base io request fails with -ENOMEM,
 * in which case it will queue itself for later submission.
 * params:
 * bdev_io - pointer to parent bdev_io on raid bdev device
 * returns:
 * none
 */
static void
_raid_bdev_submit_null_payload_request_next(void *_bdev_io)
{
	struct spdk_bdev_io		*bdev_io = _bdev_io;
	struct raid_bdev_io		*raid_io;
	struct raid_bdev		*raid_bdev;
	struct raid_bdev_io_channel	*raid_ch;
	struct raid_bdev_io_range	io_range;
	int				ret;

	raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
	raid_ch = spdk_io_channel_get_ctx(raid_io->ch);

	_raid0_get_io_range(&io_range, raid_bdev->num_base_bdevs,
			    raid_bdev->strip_size, raid_bdev->strip_size_shift,
			    bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks);

	raid_io->base_bdev_io_expected = io_range.n_disks_involved;

	while (raid_io->base_bdev_io_submitted < raid_io->base_bdev_io_expected) {
		uint8_t disk_idx;
		uint64_t offset_in_disk;
		uint64_t nblocks_in_disk;

		/* base_bdev is started from start_disk to end_disk.
		 * It is possible that index of start_disk is larger than end_disk's.
		 */
		disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs;

		_raid0_split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk);

		switch (bdev_io->type) {
		case SPDK_BDEV_IO_TYPE_UNMAP:
			ret = spdk_bdev_unmap_blocks(raid_bdev->base_bdev_info[disk_idx].desc,
						     raid_ch->base_channel[disk_idx],
						     offset_in_disk, nblocks_in_disk,
						     raid_bdev_base_io_completion, bdev_io);
			break;

		case SPDK_BDEV_IO_TYPE_FLUSH:
			ret = spdk_bdev_flush_blocks(raid_bdev->base_bdev_info[disk_idx].desc,
						     raid_ch->base_channel[disk_idx],
						     offset_in_disk, nblocks_in_disk,
						     raid_bdev_base_io_completion, bdev_io);
			break;

		default:
			SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type);
			assert(false);
			ret = -EIO;
		}

		if (ret == 0) {
			raid_io->base_bdev_io_submitted++;
		} else {
			raid_bdev_queue_io_wait(bdev_io, disk_idx,
						_raid_bdev_submit_null_payload_request_next, ret);
			return;
		}
	}
}

/*
 * brief:
 * _raid_bdev_submit_null_payload_request function is the submit_request function
@@ -720,7 +410,7 @@ _raid_bdev_submit_null_payload_request(struct spdk_io_channel *ch, struct spdk_b
	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID, "raid_bdev: type %d, range (0x%lx, 0x%lx)\n",
		      bdev_io->type, bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks);

	_raid_bdev_submit_null_payload_request_next(bdev_io);
	raid0_submit_null_payload_request(bdev_io);
}

/*
+10 −12
Original line number Diff line number Diff line
@@ -100,18 +100,6 @@ struct raid_bdev_io {
	uint8_t				base_bdev_io_status;
};

/* raid0 IO range */
struct raid_bdev_io_range {
	uint64_t	strip_size;
	uint64_t	start_strip_in_disk;
	uint64_t	end_strip_in_disk;
	uint64_t	start_offset_in_strip;
	uint64_t	end_offset_in_strip;
	uint8_t		start_disk;
	uint8_t		end_disk;
	uint8_t		n_disks_involved;
};

/*
 * raid_bdev is the single entity structure which contains SPDK block device
 * and the information related to any raid bdev either configured or
@@ -247,4 +235,14 @@ int raid_bdev_config_add_base_bdev(struct raid_bdev_config *raid_cfg,
void raid_bdev_config_cleanup(struct raid_bdev_config *raid_cfg);
struct raid_bdev_config *raid_bdev_config_find_by_name(const char *raid_name);

void
raid0_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io);
void
raid0_submit_null_payload_request(void *_bdev_io);
void
raid_bdev_base_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
void
raid_bdev_queue_io_wait(struct spdk_bdev_io *raid_bdev_io, uint8_t pd_idx,
			spdk_bdev_io_wait_cb cb_fn, int ret);

#endif /* SPDK_BDEV_RAID_INTERNAL_H */
+361 −0
Original line number Diff line number Diff line
/*-
 *   BSD LICENSE
 *
 *   Copyright (c) Intel Corporation.
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "bdev_raid.h"

#include "spdk/env.h"
#include "spdk/io_channel.h"
#include "spdk/string.h"
#include "spdk/util.h"

#include "spdk_internal/log.h"

/*
 * brief:
 * raid0_bdev_io_completion function is called by lower layers to notify raid
 * module that particular bdev_io is completed.
 * params:
 * bdev_io - pointer to bdev io submitted to lower layers, like child io
 * success - bdev_io status
 * cb_arg - function callback context, like parent io pointer
 * returns:
 * none
 */
static void
raid0_bdev_io_completion(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
{
	struct spdk_bdev_io         *parent_io = cb_arg;

	spdk_bdev_free_io(bdev_io);

	if (success) {
		spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_SUCCESS);
	} else {
		spdk_bdev_io_complete(parent_io, SPDK_BDEV_IO_STATUS_FAILED);
	}
}

static void
raid0_waitq_io_process(void *ctx);

/*
 * brief:
 * raid0_submit_rw_request function is used to submit I/O to the correct
 * member disk for raid0 bdevs.
 * params:
 * bdev_io - parent bdev io
 * start_strip - start strip number of this io
 * returns:
 * none
 */
static void
raid0_submit_rw_request(struct spdk_bdev_io *bdev_io, uint64_t start_strip)
{
	struct raid_bdev_io		*raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
	struct raid_bdev_io_channel	*raid_ch = spdk_io_channel_get_ctx(raid_io->ch);
	struct raid_bdev		*raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
	uint64_t			pd_strip;
	uint32_t			offset_in_strip;
	uint64_t			pd_lba;
	uint64_t			pd_blocks;
	uint8_t				pd_idx;
	int				ret = 0;

	pd_strip = start_strip / raid_bdev->num_base_bdevs;
	pd_idx = start_strip % raid_bdev->num_base_bdevs;
	offset_in_strip = bdev_io->u.bdev.offset_blocks & (raid_bdev->strip_size - 1);
	pd_lba = (pd_strip << raid_bdev->strip_size_shift) + offset_in_strip;
	pd_blocks = bdev_io->u.bdev.num_blocks;
	if (raid_bdev->base_bdev_info[pd_idx].desc == NULL) {
		SPDK_ERRLOG("base bdev desc null for pd_idx %u\n", pd_idx);
		assert(0);
	}

	/*
	 * Submit child io to bdev layer with using base bdev descriptors, base
	 * bdev lba, base bdev child io length in blocks, buffer, completion
	 * function and function callback context
	 */
	assert(raid_ch != NULL);
	assert(raid_ch->base_channel);
	if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
		ret = spdk_bdev_readv_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
					     raid_ch->base_channel[pd_idx],
					     bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
					     pd_lba, pd_blocks, raid0_bdev_io_completion,
					     bdev_io);
	} else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
		ret = spdk_bdev_writev_blocks(raid_bdev->base_bdev_info[pd_idx].desc,
					      raid_ch->base_channel[pd_idx],
					      bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
					      pd_lba, pd_blocks, raid0_bdev_io_completion,
					      bdev_io);
	} else {
		SPDK_ERRLOG("Recvd not supported io type %u\n", bdev_io->type);
		assert(0);
	}

	if (ret) {
		raid_bdev_queue_io_wait(bdev_io, pd_idx, raid0_waitq_io_process, ret);
	}
}

/*
 * brief:
 * raid0_waitq_io_process function is the callback function
 * registered by raid bdev module to bdev when bdev_io was unavailable
 * for raid0 bdevs.
 * params:
 * ctx - pointer to raid_bdev_io
 * returns:
 * none
 */
static void
raid0_waitq_io_process(void *ctx)
{
	struct spdk_bdev_io     *bdev_io = ctx;
	struct raid_bdev	*raid_bdev;
	uint64_t		start_strip;

	/*
	 * Try to submit childs of parent bdev io. If failed due to resource
	 * crunch then break the loop and don't try to process other queued IOs.
	 */
	raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
	start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift;
	raid0_submit_rw_request(bdev_io, start_strip);
}

/*
 * brief:
 * raid0_start_rw_request function is the submit_request function for
 * read/write requests for raid0 bdevs.
 * params:
 * ch - pointer to raid bdev io channel
 * bdev_io - pointer to parent bdev_io on raid bdev device
 * returns:
 * none
 */
void
raid0_start_rw_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
{
	struct raid_bdev_io		*raid_io;
	struct raid_bdev		*raid_bdev;
	uint64_t			start_strip = 0;
	uint64_t			end_strip = 0;

	raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
	raid_io->ch = ch;
	start_strip = bdev_io->u.bdev.offset_blocks >> raid_bdev->strip_size_shift;
	end_strip = (bdev_io->u.bdev.offset_blocks + bdev_io->u.bdev.num_blocks - 1) >>
		    raid_bdev->strip_size_shift;
	if (start_strip != end_strip && raid_bdev->num_base_bdevs > 1) {
		assert(false);
		SPDK_ERRLOG("I/O spans strip boundary!\n");
		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
		return;
	}
	raid0_submit_rw_request(bdev_io, start_strip);
}

/* raid0 IO range */
struct raid_bdev_io_range {
	uint64_t	strip_size;
	uint64_t	start_strip_in_disk;
	uint64_t	end_strip_in_disk;
	uint64_t	start_offset_in_strip;
	uint64_t	end_offset_in_strip;
	uint8_t		start_disk;
	uint8_t		end_disk;
	uint8_t		n_disks_involved;
};

static inline void
_raid0_get_io_range(struct raid_bdev_io_range *io_range,
		    uint8_t num_base_bdevs, uint64_t strip_size, uint64_t strip_size_shift,
		    uint64_t offset_blocks, uint64_t num_blocks)
{
	uint64_t	start_strip;
	uint64_t	end_strip;

	io_range->strip_size = strip_size;

	/* The start and end strip index in raid0 bdev scope */
	start_strip = offset_blocks >> strip_size_shift;
	end_strip = (offset_blocks + num_blocks - 1) >> strip_size_shift;
	io_range->start_strip_in_disk = start_strip / num_base_bdevs;
	io_range->end_strip_in_disk = end_strip / num_base_bdevs;

	/* The first strip may have unaligned start LBA offset.
	 * The end strip may have unaligned end LBA offset.
	 * Strips between them certainly have aligned offset and length to boundaries.
	 */
	io_range->start_offset_in_strip = offset_blocks % strip_size;
	io_range->end_offset_in_strip = (offset_blocks + num_blocks - 1) % strip_size;

	/* The base bdev indexes in which start and end strips are located */
	io_range->start_disk = start_strip % num_base_bdevs;
	io_range->end_disk = end_strip % num_base_bdevs;

	/* Calculate how many base_bdevs are involved in io operation.
	 * Number of base bdevs involved is between 1 and num_base_bdevs.
	 * It will be 1 if the first strip and last strip are the same one.
	 */
	io_range->n_disks_involved = spdk_min((end_strip - start_strip + 1), num_base_bdevs);
}

static inline void
_raid0_split_io_range(struct raid_bdev_io_range *io_range, uint8_t disk_idx,
		      uint64_t *_offset_in_disk, uint64_t *_nblocks_in_disk)
{
	uint64_t n_strips_in_disk;
	uint64_t start_offset_in_disk;
	uint64_t end_offset_in_disk;
	uint64_t offset_in_disk;
	uint64_t nblocks_in_disk;
	uint64_t start_strip_in_disk;
	uint64_t end_strip_in_disk;

	start_strip_in_disk = io_range->start_strip_in_disk;
	if (disk_idx < io_range->start_disk) {
		start_strip_in_disk += 1;
	}

	end_strip_in_disk = io_range->end_strip_in_disk;
	if (disk_idx > io_range->end_disk) {
		end_strip_in_disk -= 1;
	}

	assert(end_strip_in_disk >= start_strip_in_disk);
	n_strips_in_disk = end_strip_in_disk - start_strip_in_disk + 1;

	if (disk_idx == io_range->start_disk) {
		start_offset_in_disk = io_range->start_offset_in_strip;
	} else {
		start_offset_in_disk = 0;
	}

	if (disk_idx == io_range->end_disk) {
		end_offset_in_disk = io_range->end_offset_in_strip;
	} else {
		end_offset_in_disk = io_range->strip_size - 1;
	}

	offset_in_disk = start_offset_in_disk + start_strip_in_disk * io_range->strip_size;
	nblocks_in_disk = (n_strips_in_disk - 1) * io_range->strip_size
			  + end_offset_in_disk - start_offset_in_disk + 1;

	SPDK_DEBUGLOG(SPDK_LOG_BDEV_RAID0,
		      "raid_bdev (strip_size 0x%lx) splits IO to base_bdev (%u) at (0x%lx, 0x%lx).\n",
		      io_range->strip_size, disk_idx, offset_in_disk, nblocks_in_disk);

	*_offset_in_disk = offset_in_disk;
	*_nblocks_in_disk = nblocks_in_disk;
}

/*
 * brief:
 * raid0_submit_null_payload_request function submits the next batch of
 * io requests with range but without payload, like FLUSH and UNMAP, to member disks;
 * it will submit as many as possible unless one base io request fails with -ENOMEM,
 * in which case it will queue itself for later submission.
 * params:
 * bdev_io - pointer to parent bdev_io on raid bdev device
 * returns:
 * none
 */
void
raid0_submit_null_payload_request(void *_bdev_io)
{
	struct spdk_bdev_io		*bdev_io = _bdev_io;
	struct raid_bdev_io		*raid_io;
	struct raid_bdev		*raid_bdev;
	struct raid_bdev_io_channel	*raid_ch;
	struct raid_bdev_io_range	io_range;
	int				ret;

	raid_bdev = (struct raid_bdev *)bdev_io->bdev->ctxt;
	raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
	raid_ch = spdk_io_channel_get_ctx(raid_io->ch);

	_raid0_get_io_range(&io_range, raid_bdev->num_base_bdevs,
			    raid_bdev->strip_size, raid_bdev->strip_size_shift,
			    bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks);

	raid_io->base_bdev_io_expected = io_range.n_disks_involved;

	while (raid_io->base_bdev_io_submitted < raid_io->base_bdev_io_expected) {
		uint8_t disk_idx;
		uint64_t offset_in_disk;
		uint64_t nblocks_in_disk;

		/* base_bdev is started from start_disk to end_disk.
		 * It is possible that index of start_disk is larger than end_disk's.
		 */
		disk_idx = (io_range.start_disk + raid_io->base_bdev_io_submitted) % raid_bdev->num_base_bdevs;

		_raid0_split_io_range(&io_range, disk_idx, &offset_in_disk, &nblocks_in_disk);

		switch (bdev_io->type) {
		case SPDK_BDEV_IO_TYPE_UNMAP:
			ret = spdk_bdev_unmap_blocks(raid_bdev->base_bdev_info[disk_idx].desc,
						     raid_ch->base_channel[disk_idx],
						     offset_in_disk, nblocks_in_disk,
						     raid_bdev_base_io_completion, bdev_io);
			break;

		case SPDK_BDEV_IO_TYPE_FLUSH:
			ret = spdk_bdev_flush_blocks(raid_bdev->base_bdev_info[disk_idx].desc,
						     raid_ch->base_channel[disk_idx],
						     offset_in_disk, nblocks_in_disk,
						     raid_bdev_base_io_completion, bdev_io);
			break;

		default:
			SPDK_ERRLOG("submit request, invalid io type with null payload %u\n", bdev_io->type);
			assert(false);
			ret = -EIO;
		}

		if (ret == 0) {
			raid_io->base_bdev_io_submitted++;
		} else {
			raid_bdev_queue_io_wait(bdev_io, disk_idx,
						raid0_submit_null_payload_request, ret);
			return;
		}
	}
}

SPDK_LOG_REGISTER_COMPONENT("bdev_raid0", SPDK_LOG_BDEV_RAID0)
+1 −0
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@
#include "spdk_internal/mock.h"
#include "bdev/raid/bdev_raid.c"
#include "bdev/raid/bdev_raid_rpc.c"
#include "bdev/raid/raid0.c"

#define MAX_BASE_DRIVES 32
#define MAX_RAIDS 2