Commit 6c820f84 authored by Jim Harris's avatar Jim Harris
Browse files

nvme: add tracker prefetching in completion path



At 10M IO/s, we see a lot of CPU cycles wasted getting
the next tracker into cache.  If we only get one
completion at a time, this is unavoidable, but when
there are multiple completions pending, we can prefetch
the second tracker while processing the completion for
the first.

Signed-off-by: default avatarJim Harris <james.r.harris@intel.com>
Change-Id: I9de702bee3719e4494eec6f05b09be3672f1e0ac

Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/456097


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
parent f2d46446
Loading
Loading
Loading
Loading
+19 −2
Original line number Diff line number Diff line
@@ -2048,9 +2048,12 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_
{
	struct nvme_pcie_qpair	*pqpair = nvme_pcie_qpair(qpair);
	struct nvme_tracker	*tr;
	struct spdk_nvme_cpl	*cpl;
	struct spdk_nvme_cpl	*cpl, *next_cpl;
	uint32_t		 num_completions = 0;
	struct spdk_nvme_ctrlr	*ctrlr = qpair->ctrlr;
	uint16_t		 next_cq_head;
	uint8_t			 next_phase;
	bool			 next_is_valid = false;

	if (spdk_unlikely(nvme_qpair_is_admin_queue(qpair))) {
		nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
@@ -2068,9 +2071,23 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_
	while (1) {
		cpl = &pqpair->cpl[pqpair->cq_head];

		if (cpl->status.p != pqpair->flags.phase) {
		if (!next_is_valid && cpl->status.p != pqpair->flags.phase) {
			break;
		}

		if (spdk_likely(pqpair->cq_head + 1 != pqpair->num_entries)) {
			next_cq_head = pqpair->cq_head + 1;
			next_phase = pqpair->flags.phase;
		} else {
			next_cq_head = 0;
			next_phase = !pqpair->flags.phase;
		}
		next_cpl = &pqpair->cpl[next_cq_head];
		next_is_valid = (next_cpl->status.p == next_phase);
		if (next_is_valid) {
			__builtin_prefetch(&pqpair->tr[next_cpl->cid]);
		}

#ifdef __PPC64__
		/*
		 * This memory barrier prevents reordering of: