Commit 860a0750 authored by Konrad Sztyber's avatar Konrad Sztyber Committed by Tomasz Zawadzki
Browse files

bdev/ocssd: read / write support



The patch adds zone address to Open Channel LBA translation as well as
initial support for read  and write commands.  Each IO command is
currently limited to a single zone (chunk).

Change-Id: I3ee6d58323871f0651ac1d5e8dda28eb6d687a95
Signed-off-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/467149


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom SPDK FC-NVMe CI <spdk-ci.pdl@broadcom.com>
Reviewed-by: default avatarWojciech Malikowski <wojciech.malikowski@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
parent b9635c2f
Loading
Loading
Loading
Loading
+235 −3
Original line number Diff line number Diff line
@@ -39,15 +39,29 @@
#include "spdk/nvme_ocssd.h"
#include "spdk/nvme_ocssd_spec.h"
#include "spdk_internal/log.h"
#include "spdk/nvme.h"
#include "common.h"
#include "bdev_ocssd.h"

struct bdev_ocssd_lba_offsets {
	uint32_t grp;
	uint32_t pu;
	uint32_t chk;
	uint32_t lbk;
};

struct bdev_ocssd_io {
	size_t iov_pos;
	size_t iov_off;
};

struct ocssd_bdev {
	struct nvme_bdev nvme_bdev;
};

struct bdev_ocssd_ns {
	struct spdk_ocssd_geometry_data	geometry;
	struct bdev_ocssd_lba_offsets	lba_offsets;
};

static struct bdev_ocssd_ns *
@@ -76,7 +90,7 @@ bdev_ocssd_config_json(struct spdk_json_write_ctx *w)
static int
bdev_ocssd_get_ctx_size(void)
{
	return 0;
	return sizeof(struct bdev_ocssd_io);
}

static struct spdk_bdev_module ocssd_if = {
@@ -112,17 +126,218 @@ bdev_ocssd_destruct(void *ctx)
	return 0;
}

static uint64_t
bdev_ocssd_to_disk_lba(struct ocssd_bdev *ocssd_bdev, uint64_t lba)
{
	struct nvme_bdev_ns *nvme_ns = ocssd_bdev->nvme_bdev.nvme_ns;
	struct bdev_ocssd_ns *ocssd_ns = bdev_ocssd_get_ns_from_nvme(nvme_ns);
	const struct spdk_ocssd_geometry_data *geo = &ocssd_ns->geometry;
	const struct bdev_ocssd_lba_offsets *offsets = &ocssd_ns->lba_offsets;
	uint64_t addr_shift, lbk, chk, pu, grp;

	/* To achieve best performance, we need to make sure that adjacent zones can be accessed
	 * in parallel.  We accomplish this by having the following addressing scheme:
	 *
	 * [            zone id              ][  zone offset  ] User's LBA
	 * [ chunk ][ group ][ parallel unit ][ logical block ] Open Channel's LBA
	 *
	 * which means that neighbouring zones are placed in a different group and parallel unit.
	 */
	lbk = lba % geo->clba;
	addr_shift = geo->clba;

	pu = (lba / addr_shift) % geo->num_pu;
	addr_shift *= geo->num_pu;

	grp = (lba / addr_shift) % geo->num_grp;
	addr_shift *= geo->num_grp;

	chk = (lba / addr_shift) % geo->num_chk;

	return (lbk << offsets->lbk) |
	       (chk << offsets->chk) |
	       (pu  << offsets->pu)  |
	       (grp << offsets->grp);
}

static void
bdev_ocssd_reset_sgl(void *cb_arg, uint32_t offset)
{
	struct spdk_bdev_io *bdev_io = cb_arg;
	struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
	struct iovec *iov;

	ocdev_io->iov_pos = 0;
	ocdev_io->iov_off = 0;

	for (; ocdev_io->iov_pos < (size_t)bdev_io->u.bdev.iovcnt; ++ocdev_io->iov_pos) {
		iov = &bdev_io->u.bdev.iovs[ocdev_io->iov_pos];
		if (offset < iov->iov_len) {
			ocdev_io->iov_off = offset;
			return;
		}

		offset -= iov->iov_len;
	}

	assert(false && "Invalid offset length");
}

static int
bdev_ocssd_next_sge(void *cb_arg, void **address, uint32_t *length)
{
	struct spdk_bdev_io *bdev_io = cb_arg;
	struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
	struct iovec *iov;

	assert(ocdev_io->iov_pos < (size_t)bdev_io->u.bdev.iovcnt);
	iov = &bdev_io->u.bdev.iovs[ocdev_io->iov_pos];

	*address = iov->iov_base;
	*length = iov->iov_len;

	if (ocdev_io->iov_off != 0) {
		assert(ocdev_io->iov_off < iov->iov_len);
		*address = (char *)*address + ocdev_io->iov_off;
		*length -= ocdev_io->iov_off;
	}

	assert(ocdev_io->iov_off + *length == iov->iov_len);
	ocdev_io->iov_off = 0;
	ocdev_io->iov_pos++;

	return 0;
}

static void
bdev_ocssd_read_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
{
	struct spdk_bdev_io *bdev_io = ctx;

	spdk_bdev_io_complete_nvme_status(bdev_io, 0, cpl->status.sct, cpl->status.sc);
}

static int
bdev_ocssd_read(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io)
{
	struct ocssd_bdev *ocssd_bdev = bdev_io->bdev->ctxt;
	struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
	struct nvme_io_channel *nvme_ioch = spdk_io_channel_get_ctx(ioch);
	struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
	const size_t zone_size = nvme_bdev->disk.zone_size;
	uint64_t lba;

	if ((bdev_io->u.bdev.offset_blocks % zone_size) + bdev_io->u.bdev.num_blocks > zone_size) {
		SPDK_ERRLOG("Tried to cross zone boundary during read command\n");
		return -EINVAL;
	}

	ocdev_io->iov_pos = 0;
	ocdev_io->iov_off = 0;

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev_io->u.bdev.offset_blocks);

	return spdk_nvme_ns_cmd_readv_with_md(nvme_bdev->nvme_ns->ns, nvme_ioch->qpair, lba,
					      bdev_io->u.bdev.num_blocks, bdev_ocssd_read_cb,
					      bdev_io, 0, bdev_ocssd_reset_sgl,
					      bdev_ocssd_next_sge, bdev_io->u.bdev.md_buf, 0, 0);
}

static void
bdev_ocssd_write_cb(void *ctx, const struct spdk_nvme_cpl *cpl)
{
	struct spdk_bdev_io *bdev_io = ctx;

	spdk_bdev_io_complete_nvme_status(bdev_io, 0, cpl->status.sct, cpl->status.sc);
}

static int
bdev_ocssd_write(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io)
{
	struct ocssd_bdev *ocssd_bdev = bdev_io->bdev->ctxt;
	struct nvme_bdev *nvme_bdev = &ocssd_bdev->nvme_bdev;
	struct nvme_io_channel *nvme_ioch = spdk_io_channel_get_ctx(ioch);
	struct bdev_ocssd_io *ocdev_io = (struct bdev_ocssd_io *)bdev_io->driver_ctx;
	const size_t zone_size = nvme_bdev->disk.zone_size;
	uint64_t lba;

	if ((bdev_io->u.bdev.offset_blocks % zone_size) + bdev_io->u.bdev.num_blocks > zone_size) {
		SPDK_ERRLOG("Tried to cross zone boundary during write command\n");
		return -EINVAL;
	}

	ocdev_io->iov_pos = 0;
	ocdev_io->iov_off = 0;

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev_io->u.bdev.offset_blocks);

	return spdk_nvme_ns_cmd_writev_with_md(nvme_bdev->nvme_ns->ns, nvme_ioch->qpair, lba,
					       bdev_io->u.bdev.num_blocks, bdev_ocssd_write_cb,
					       bdev_io, 0, bdev_ocssd_reset_sgl,
					       bdev_ocssd_next_sge, bdev_io->u.bdev.md_buf, 0, 0);
}

static void
bdev_ocssd_io_get_buf_cb(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io, bool success)
{
	int rc;

	if (!success) {
		spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
		return;
	}

	rc = bdev_ocssd_read(ioch, bdev_io);
	if (spdk_likely(rc != 0)) {
		if (rc == -ENOMEM) {
			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
		} else {
			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
		}
	}
}

static void
bdev_ocssd_submit_request(struct spdk_io_channel *ioch, struct spdk_bdev_io *bdev_io)
{
	int rc = 0;

	switch (bdev_io->type) {
	case SPDK_BDEV_IO_TYPE_READ:
		spdk_bdev_io_get_buf(bdev_io, bdev_ocssd_io_get_buf_cb,
				     bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
		break;

	case SPDK_BDEV_IO_TYPE_WRITE:
		rc = bdev_ocssd_write(ioch, bdev_io);
		break;

	default:
		rc = -EINVAL;
		break;
	}

	if (spdk_unlikely(rc != 0)) {
		if (rc == -ENOMEM) {
			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_NOMEM);
		} else {
			spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
		}
	}
}

static bool
bdev_ocssd_io_type_supported(void *ctx, enum spdk_bdev_io_type type)
{
	switch (type) {
	case SPDK_BDEV_IO_TYPE_READ:
	case SPDK_BDEV_IO_TYPE_WRITE:
		return true;

	default:
		return false;
	}
}

static struct spdk_io_channel *
bdev_ocssd_get_io_channel(void *ctx)
@@ -310,6 +525,7 @@ bdev_ocssd_geometry_cb(void *_ctx, const struct spdk_nvme_cpl *cpl)
{
	struct bdev_ocssd_populate_ns_ctx *ctx = _ctx;
	struct nvme_bdev_ns *nvme_ns = ctx->nvme_ns;
	struct bdev_ocssd_ns *ocssd_ns = bdev_ocssd_get_ns_from_nvme(nvme_ns);
	int rc = 0;

	if (spdk_unlikely(spdk_nvme_cpl_is_error(cpl))) {
@@ -317,6 +533,14 @@ bdev_ocssd_geometry_cb(void *_ctx, const struct spdk_nvme_cpl *cpl)
		free(nvme_ns->type_ctx);
		nvme_ns->type_ctx = NULL;
		rc = -EIO;
	} else {
		ocssd_ns->lba_offsets.lbk = 0;
		ocssd_ns->lba_offsets.chk = ocssd_ns->lba_offsets.lbk +
					    ocssd_ns->geometry.lbaf.lbk_len;
		ocssd_ns->lba_offsets.pu  = ocssd_ns->lba_offsets.chk +
					    ocssd_ns->geometry.lbaf.chk_len;
		ocssd_ns->lba_offsets.grp = ocssd_ns->lba_offsets.pu +
					    ocssd_ns->geometry.lbaf.pu_len;
	}

	nvme_ctrlr_populate_namespace_done(ctx->nvme_ctx, nvme_ns, rc);
@@ -330,8 +554,15 @@ bdev_ocssd_populate_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr,
{
	struct bdev_ocssd_ns *ocssd_ns;
	struct bdev_ocssd_populate_ns_ctx *ctx;
	struct spdk_nvme_ns *ns;
	int rc;

	ns = spdk_nvme_ctrlr_get_ns(nvme_bdev_ctrlr->ctrlr, nvme_ns->id);
	if (ns == NULL) {
		nvme_ctrlr_populate_namespace_done(nvme_ctx, nvme_ns, -EINVAL);
		return;
	}

	ctx = calloc(1, sizeof(*ctx));
	if (ctx == NULL) {
		nvme_ctrlr_populate_namespace_done(nvme_ctx, nvme_ns, -ENOMEM);
@@ -346,6 +577,7 @@ bdev_ocssd_populate_namespace(struct nvme_bdev_ctrlr *nvme_bdev_ctrlr,
	}

	nvme_ns->type_ctx = ocssd_ns;
	nvme_ns->ns = ns;
	ctx->nvme_ctx = nvme_ctx;
	ctx->nvme_ns = nvme_ns;

+147 −1
Original line number Diff line number Diff line
@@ -50,6 +50,8 @@ DEFINE_STUB(spdk_nvme_ns_get_extended_sector_size, uint32_t, (struct spdk_nvme_n
DEFINE_STUB(spdk_nvme_ns_is_active, bool, (struct spdk_nvme_ns *ns), true);
DEFINE_STUB_V(spdk_opal_close, (struct spdk_opal_dev *dev));
DEFINE_STUB(spdk_opal_revert_poll, int, (struct spdk_opal_dev *dev), 0);
DEFINE_STUB_V(spdk_bdev_io_complete_nvme_status, (struct spdk_bdev_io *bdev_io, uint32_t cdw0,
		int sct, int sc));

struct nvme_request {
	spdk_nvme_cmd_cb cb_fn;
@@ -381,6 +383,22 @@ spdk_nvme_ns_cmd_readv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *
	return 0;
}

int
spdk_nvme_ns_cmd_writev_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair,
				uint64_t lba, uint32_t lba_count,
				spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags,
				spdk_nvme_req_reset_sgl_cb reset_sgl_fn,
				spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata,
				uint16_t apptag_mask, uint16_t apptag)
{
	struct nvme_request *req;

	req = alloc_request(cb_fn, cb_arg);
	TAILQ_INSERT_TAIL(&qpair->requests, req, tailq);

	return 0;
}

static void
create_bdev_cb(const char *bdev_name, int status, void *ctx)
{
@@ -528,6 +546,133 @@ test_device_geometry(void)
	free_controller(ctrlr);
}

static uint64_t
generate_lba(const struct spdk_ocssd_geometry_data *geo, uint64_t lbk,
	     uint64_t chk, uint64_t pu, uint64_t grp)
{
	uint64_t lba, len;

	lba = lbk;
	len = geo->lbaf.lbk_len;
	CU_ASSERT(lbk < (1ull << geo->lbaf.lbk_len));

	lba |= chk << len;
	len += geo->lbaf.chk_len;
	CU_ASSERT(chk < (1ull << geo->lbaf.chk_len));

	lba |= pu << len;
	len += geo->lbaf.pu_len;
	CU_ASSERT(pu < (1ull << geo->lbaf.pu_len));

	lba |= grp << len;

	return lba;
}

static void
test_lba_translation(void)
{
	struct spdk_nvme_ctrlr *ctrlr;
	struct nvme_bdev_ctrlr *nvme_bdev_ctrlr;
	struct spdk_nvme_transport_id trid = { .traddr = "00:00:00" };
	const char *controller_name = "nvme0";
	const char *bdev_name = "nvme0n1";
	struct spdk_ocssd_geometry_data geometry = {};
	struct ocssd_bdev *ocssd_bdev;
	struct spdk_bdev *bdev;
	uint64_t lba;
	int rc;

	geometry = (struct spdk_ocssd_geometry_data) {
		.clba = 512,
		.num_chk = 64,
		.num_pu = 8,
		.num_grp = 4,
		.lbaf = {
			.lbk_len = 9,
			.chk_len = 6,
			.pu_len = 3,
			.grp_len = 2,
		}
	};

	ctrlr = create_controller(&trid, 1, &geometry);
	nvme_bdev_ctrlr = create_nvme_bdev_controller(&trid, controller_name);

	rc = create_bdev(controller_name, bdev_name, 1);
	CU_ASSERT_EQUAL(rc, 0);

	bdev = spdk_bdev_get_by_name(bdev_name);
	SPDK_CU_ASSERT_FATAL(bdev != NULL);
	ocssd_bdev = SPDK_CONTAINEROF(bdev, struct ocssd_bdev, nvme_bdev.disk);

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, 0);
	CU_ASSERT_EQUAL(lba, generate_lba(&geometry, 0, 0, 0, 0));

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev->zone_size - 1);
	CU_ASSERT_EQUAL(lba, generate_lba(&geometry, bdev->zone_size - 1, 0, 0, 0));

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev->zone_size);
	CU_ASSERT_EQUAL(lba, generate_lba(&geometry, 0, 0, 1, 0));

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev->zone_size * geometry.num_pu);
	CU_ASSERT_EQUAL(lba, generate_lba(&geometry, 0, 0, 0, 1));

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev->zone_size * geometry.num_pu + 68);
	CU_ASSERT_EQUAL(lba, generate_lba(&geometry, 68, 0, 0, 1));

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev->zone_size + 68);
	CU_ASSERT_EQUAL(lba, generate_lba(&geometry, 68, 0, 1, 0));

	delete_nvme_bdev_controller(nvme_bdev_ctrlr);
	free_controller(ctrlr);

	geometry = (struct spdk_ocssd_geometry_data) {
		.clba = 5120,
		.num_chk = 501,
		.num_pu = 9,
		.num_grp = 1,
		.lbaf = {
			.lbk_len = 13,
			.chk_len = 9,
			.pu_len = 4,
			.grp_len = 1,
		}
	};

	ctrlr = create_controller(&trid, 1, &geometry);
	nvme_bdev_ctrlr = create_nvme_bdev_controller(&trid, controller_name);

	rc = create_bdev(controller_name, bdev_name, 1);
	CU_ASSERT_EQUAL(rc, 0);

	bdev = spdk_bdev_get_by_name(bdev_name);
	SPDK_CU_ASSERT_FATAL(bdev != NULL);
	ocssd_bdev = SPDK_CONTAINEROF(bdev, struct ocssd_bdev, nvme_bdev.disk);

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, 0);
	CU_ASSERT_EQUAL(lba, generate_lba(&geometry, 0, 0, 0, 0));

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev->zone_size - 1);
	CU_ASSERT_EQUAL(lba, generate_lba(&geometry, bdev->zone_size - 1, 0, 0, 0));

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev->zone_size);
	CU_ASSERT_EQUAL(lba, generate_lba(&geometry, 0, 0, 1, 0));

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev->zone_size * (geometry.num_pu - 1));
	CU_ASSERT_EQUAL(lba, generate_lba(&geometry, 0, 0, geometry.num_pu - 1, 0));

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev->zone_size * (geometry.num_pu));
	CU_ASSERT_EQUAL(lba, generate_lba(&geometry, 0, 1, 0, 0));

	lba = bdev_ocssd_to_disk_lba(ocssd_bdev, bdev->zone_size * (geometry.num_pu) + 68);
	CU_ASSERT_EQUAL(lba, generate_lba(&geometry, 68, 1, 0, 0));

	delete_nvme_bdev_controller(nvme_bdev_ctrlr);

	free_controller(ctrlr);
}

int
main(int argc, const char **argv)
{
@@ -546,7 +691,8 @@ main(int argc, const char **argv)

	if (
		CU_add_test(suite, "test_create_controller", test_create_controller) == NULL ||
		CU_add_test(suite, "test_device_geometry", test_device_geometry) == NULL
		CU_add_test(suite, "test_device_geometry", test_device_geometry) == NULL ||
		CU_add_test(suite, "test_lba_translation", test_lba_translation) == NULL
	) {
		CU_cleanup_registry();
		return CU_get_error();