Commit cf0eac8c authored by Ben Walker's avatar Ben Walker Committed by Changpeng Liu
Browse files

nvme: Add qpair option to batch command submissions



Avoid ringing the submission queue doorbell until the
call to spdk_nvme_qpair_process_completions().

Change-Id: I7b3cd952e5ec79109eaa1c3a50f6537d7aaea51a
Signed-off-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/447239


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarChangpeng Liu <changpeng.liu@intel.com>
parent 494dcc81
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -15,6 +15,14 @@ return a context associated with the specified controllers. Users then call
spdk_nvme_probe_poll_async() until it returns 0, indicating that the operation
is completed with success.

A new qpair creation option, delay_pcie_doorbell, was added. This can be passed
to spdk_nvme_alloc_io_qpair(). This makes the I/O submission functions,
such as spdk_nvme_ns_writev(), skip ringing the submission queue doorbell.
Instead the doorbell will be rung as necessary inside
spdk_nvme_qpair_process_completions(). This can result in significantly fewer
MMIO writes to the doorbell register under heavy load, greatly improving
performance.

New API spdk_nvme_ctrlr_get_flags() was added.

### raid
+11 −0
Original line number Diff line number Diff line
@@ -935,6 +935,17 @@ struct spdk_nvme_io_qpair_opts {
	 * compatibility requirements, or driver-assisted striping.
	 */
	uint32_t io_queue_requests;

	/**
	 * When submitting I/O via spdk_nvme_ns_read/write and similar functions,
	 * don't immediately write the submission queue doorbell. Instead, write
	 * to the doorbell as necessary inside spdk_nvme_qpair_process_completions().
	 *
	 * This results in better batching of I/O submission and consequently fewer
	 * MMIO writes to the doorbell, which may increase performance.
	 *
	 * This only applies to local PCIe devices. */
	bool delay_pcie_doorbell;
};

/**
+4 −0
Original line number Diff line number Diff line
@@ -234,6 +234,10 @@ spdk_nvme_ctrlr_get_default_io_qpair_opts(struct spdk_nvme_ctrlr *ctrlr,
		opts->io_queue_requests = ctrlr->opts.io_queue_requests;
	}

	if (FIELD_OK(delay_pcie_doorbell)) {
		opts->delay_pcie_doorbell = false;
	}

#undef FIELD_OK
}

+16 −2
Original line number Diff line number Diff line
@@ -171,6 +171,7 @@ struct nvme_pcie_qpair {

	uint16_t max_completions_cap;

	uint16_t last_sq_tail;
	uint16_t sq_tail;
	uint16_t cq_head;
	uint16_t sq_head;
@@ -179,6 +180,8 @@ struct nvme_pcie_qpair {

	bool is_enabled;

	bool delay_pcie_doorbell;

	/*
	 * Base qpair structure.
	 * This is located after the hot data in this structure so that the important parts of
@@ -671,6 +674,7 @@ nvme_pcie_ctrlr_construct_admin_qpair(struct spdk_nvme_ctrlr *ctrlr)
	}

	pqpair->num_entries = NVME_ADMIN_ENTRIES;
	pqpair->delay_pcie_doorbell = false;

	ctrlr->adminq = &pqpair->qpair;

@@ -936,7 +940,7 @@ nvme_pcie_qpair_reset(struct spdk_nvme_qpair *qpair)
{
	struct nvme_pcie_qpair *pqpair = nvme_pcie_qpair(qpair);

	pqpair->sq_tail = pqpair->cq_head = 0;
	pqpair->last_sq_tail = pqpair->sq_tail = pqpair->cq_head = 0;

	/*
	 * First time through the completion queue, HW will set phase
@@ -1206,8 +1210,10 @@ nvme_pcie_qpair_submit_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracke
		SPDK_ERRLOG("sq_tail is passing sq_head!\n");
	}

	if (!pqpair->delay_pcie_doorbell) {
		nvme_pcie_qpair_ring_sq_doorbell(qpair);
	}
}

static void
nvme_pcie_qpair_complete_tracker(struct spdk_nvme_qpair *qpair, struct nvme_tracker *tr,
@@ -1583,6 +1589,7 @@ nvme_pcie_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
	}

	pqpair->num_entries = opts->io_queue_size;
	pqpair->delay_pcie_doorbell = opts->delay_pcie_doorbell;

	qpair = &pqpair->qpair;

@@ -2118,6 +2125,13 @@ nvme_pcie_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_
		}
	}

	if (pqpair->delay_pcie_doorbell) {
		if (pqpair->last_sq_tail != pqpair->sq_tail) {
			nvme_pcie_qpair_ring_sq_doorbell(qpair);
			pqpair->last_sq_tail = pqpair->sq_tail;
		}
	}

	if (spdk_unlikely(ctrlr->timeout_enabled)) {
		/*
		 * User registered for timeout callback