Commit 58c4dac9 authored by Wojciech Malikowski's avatar Wojciech Malikowski Committed by Jim Harris
Browse files

lib/ftl: Limit write queue depth to one per chunk



There are no guarantees of the order of completion
of NVMe IO submission queue so wait for write completion
on specific chunk before submitting another write to it.

To control chunk occupancy split IO to child requests
and release chunk in IO completion callback.

Change-Id: I44147a21b528a7f33fb92b9e77d7de8f5b18f8ff
Signed-off-by: default avatarWojciech Malikowski <wojciech.malikowski@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/449239


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Reviewed-by: default avatarYoung Tack Jin <youngtack.jin@circuitblvd.com>
Reviewed-by: default avatarClaire Jihyun In <claire.in@circuitblvd.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent 04962482
Loading
Loading
Loading
Loading
+7 −1
Original line number Diff line number Diff line
@@ -765,6 +765,7 @@ ftl_band_write_md(struct ftl_band *band, void *data, size_t lbk_cnt,
{
	struct spdk_ftl_dev *dev = band->dev;
	struct ftl_io *io;
	int rc;

	io = ftl_io_init_md_write(dev, band, data,
				  spdk_divide_round_up(lbk_cnt, dev->xfer_size), cb);
@@ -774,7 +775,12 @@ ftl_band_write_md(struct ftl_band *band, void *data, size_t lbk_cnt,

	md_fn(dev, &band->md, data);

	return ftl_io_write(io);
	rc = ftl_io_write(io);
	if (rc == -EAGAIN) {
		rc = 0;
	}

	return rc;
}

void
+6 −1
Original line number Diff line number Diff line
@@ -55,6 +55,9 @@ struct ftl_chunk {
	/* Block state */
	enum ftl_chunk_state			state;

	/* Indicates that there is inflight write */
	bool					busy;

	/* First PPA */
	struct ftl_ppa				start_ppa;

@@ -250,7 +253,9 @@ ftl_band_chunk_is_first(struct ftl_band *band, struct ftl_chunk *chunk)
static inline int
ftl_chunk_is_writable(const struct ftl_chunk *chunk)
{
	return chunk->state == FTL_CHUNK_STATE_OPEN || chunk->state == FTL_CHUNK_STATE_FREE;
	return (chunk->state == FTL_CHUNK_STATE_OPEN ||
		chunk->state == FTL_CHUNK_STATE_FREE) &&
	       !chunk->busy;
}

#endif /* FTL_BAND_H */
+111 −14
Original line number Diff line number Diff line
@@ -66,6 +66,9 @@ struct ftl_wptr {
	/* Current erase block */
	struct ftl_chunk		*chunk;

	/* IO that is currently processed */
	struct ftl_io			*current_io;

	/* List link */
	LIST_ENTRY(ftl_wptr)		list_entry;
};
@@ -438,6 +441,7 @@ ftl_wptr_advance(struct ftl_wptr *wptr, size_t xfer_size)
		ftl_band_set_state(band, FTL_BAND_STATE_FULL);
	}

	wptr->chunk->busy = true;
	wptr->ppa = ftl_band_next_xfer_ppa(band, wptr->ppa, xfer_size);
	wptr->chunk = ftl_band_next_operational_chunk(band, wptr->chunk);

@@ -740,8 +744,8 @@ ftl_submit_read(struct ftl_io *io, ftl_next_ppa_fn next_ppa,
			break;
		}

		ftl_io_advance(io, lbk_cnt);
		ftl_io_inc_req(io);
		ftl_io_advance(io, lbk_cnt);
	}

	/* If we didn't have to read anything from the device, */
@@ -978,37 +982,123 @@ ftl_update_l2p(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry,
	pthread_spin_unlock(&band->md.lock);
}

static struct ftl_io *
ftl_io_init_child_write(struct ftl_io *parent, struct ftl_ppa ppa,
			void *data, void *md, spdk_ftl_fn cb)
{
	struct ftl_io *io;
	struct spdk_ftl_dev *dev = parent->dev;
	struct ftl_io_init_opts opts = {
		.dev		= dev,
		.io		= NULL,
		.parent		= parent,
		.rwb_batch	= NULL,
		.band		= parent->band,
		.size		= sizeof(struct ftl_io),
		.flags		= 0,
		.type		= FTL_IO_WRITE,
		.iov_cnt	= 1,
		.req_size	= dev->xfer_size,
		.fn		= cb,
		.data		= data,
		.md		= md,
	};

	io = ftl_io_init_internal(&opts);
	if (!io) {
		return NULL;
	}

	io->ppa = ppa;

	return io;
}

static void
ftl_io_child_write_cb(void *ctx, int status)
{
	struct ftl_chunk *chunk;
	struct ftl_io *io = ctx;

	chunk = ftl_band_chunk_from_ppa(io->band, io->ppa);
	chunk->busy = false;
}

static int
ftl_submit_write(struct ftl_wptr *wptr, struct ftl_io *io)
ftl_submit_child_write(struct ftl_wptr *wptr, struct ftl_io *io, int lbk_cnt)
{
	struct spdk_ftl_dev	*dev = io->dev;
	struct ftl_io		*child;
	struct iovec		*iov = ftl_io_iovec(io);
	int			rc = 0;
	size_t			i, lbk_cnt;
	int			rc;

	for (i = 0; i < io->iov_cnt; ++i) {
		lbk_cnt = iov[i].iov_len / PAGE_SIZE;
		assert(iov[i].iov_len > 0);
		assert(lbk_cnt == dev->xfer_size);
	/* Split IO to child requests and release chunk immediately after child is completed */
	child = ftl_io_init_child_write(io, wptr->ppa, iov[io->iov_pos].iov_base,
					ftl_io_get_md(io), ftl_io_child_write_cb);
	if (!child) {
		return -EAGAIN;
	}

		ftl_trace_submission(dev, io, wptr->ppa, iov[i].iov_len / PAGE_SIZE);
	rc = spdk_nvme_ns_cmd_write_with_md(dev->ns, ftl_get_write_qpair(dev),
						    iov[i].iov_base, ftl_io_get_md(io),
					    child->iov.iov_base, child->md,
					    ftl_ppa_addr_pack(dev, wptr->ppa),
						    lbk_cnt, ftl_io_cmpl_cb, io, 0, 0, 0);
					    lbk_cnt, ftl_io_cmpl_cb, child, 0, 0, 0);
	if (rc) {
			ftl_io_fail(io, rc);
		ftl_io_fail(child, rc);
		ftl_io_complete(child);
		SPDK_ERRLOG("spdk_nvme_ns_cmd_write failed with status:%d, ppa:%lu\n",
			    rc, wptr->ppa.ppa);

		return -EIO;
	}

	ftl_io_inc_req(child);
	ftl_io_advance(child, lbk_cnt);

	return 0;
}

static int
ftl_submit_write(struct ftl_wptr *wptr, struct ftl_io *io)
{
	struct spdk_ftl_dev	*dev = io->dev;
	struct iovec		*iov = ftl_io_iovec(io);
	int			rc = 0;
	size_t			lbk_cnt;

	while (io->iov_pos < io->iov_cnt) {
		lbk_cnt = iov[io->iov_pos].iov_len / PAGE_SIZE;
		assert(iov[io->iov_pos].iov_len > 0);
		assert(lbk_cnt == dev->xfer_size);

		/* There are no guarantees of the order of completion of NVMe IO submission queue */
		/* so wait until chunk is not busy before submitting another write */
		if (wptr->chunk->busy) {
			wptr->current_io = io;
			rc = -EAGAIN;
			break;
		}

		ftl_io_inc_req(io);
		rc = ftl_submit_child_write(wptr, io, lbk_cnt);

		if (rc == -EAGAIN) {
			wptr->current_io = io;
			break;
		} else if (rc) {
			ftl_io_fail(io, rc);
			break;
		}

		ftl_trace_submission(dev, io, wptr->ppa, lbk_cnt);

		/* Update parent iovec */
		ftl_io_advance(io, lbk_cnt);

		ftl_wptr_advance(wptr, lbk_cnt);
	}

	if (ftl_io_done(io)) {
		/* Parent IO will complete after all children are completed */
		ftl_io_complete(io);
	}

@@ -1045,6 +1135,13 @@ ftl_wptr_process_writes(struct ftl_wptr *wptr)
	struct ftl_io		*io;
	struct ftl_ppa		ppa, prev_ppa;

	if (wptr->current_io) {
		if (ftl_submit_write(wptr, wptr->current_io) == -EAGAIN) {
			return 0;
		}
		wptr->current_io = NULL;
	}

	/* Make sure the band is prepared for writing */
	if (!ftl_wptr_ready(wptr)) {
		return 0;