Commit dd174597 authored by Rui Chang's avatar Rui Chang Committed by Tomasz Zawadzki
Browse files

nvmf/vfio-user: Add adaptive irq feature for vfio-user transport



In vfio-user transport, whenever one IO is completed, it will trigger
an interrupt to guest machine. This cost quite some overhead. This patch
adds an adaptive irq feature to reduce interrupt overhead and boost
performance.

Signed-off-by: default avatarRui Chang <rui.chang@arm.com>
Change-Id: I585be072231a934fa2e4fdf2439405de95151381
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/11840


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent 94494579
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -59,6 +59,9 @@ existing listener. Users should now explicitly add listeners for the discovery s
Host can still connect to the discovery subsystem as before, but a warning message will be
emitted if no listener was configured for the transport ID of the incoming connection.

Added adaptive interrupt feature for vfio-user transport. New parameter `disable_adaptive_irq`
is added to the RPC `nvmf_create_transport`.

### thread

Added `spdk_thread_exec_msg()` API.
+1 −0
Original line number Diff line number Diff line
@@ -6268,6 +6268,7 @@ abort_timeout_sec | Optional | number | Abort execution timeout value
no_wr_batching              | Optional | boolean | Disable work requests batching (RDMA only)
control_msg_num             | Optional | number  | The number of control messages per poll group (TCP only)
disable_mappable_bar0       | Optional | boolean | disable client mmap() of BAR0 (VFIO-USER only)
disable_adaptive_irq        | Optional | boolean | Disable adaptive interrupt feature (VFIO-USER only)
zcopy                       | Optional | boolean | Use zero-copy operations if the underlying bdev supports them

#### Example
+47 −1
Original line number Diff line number Diff line
@@ -311,6 +311,9 @@ struct nvmf_vfio_user_cq {

	uint16_t				iv;
	bool					ien;

	uint32_t				last_head;
	uint32_t				last_trigger_irq_tail;
};

struct nvmf_vfio_user_poll_group {
@@ -382,6 +385,7 @@ struct nvmf_vfio_user_endpoint {

struct nvmf_vfio_user_transport_opts {
	bool					disable_mappable_bar0;
	bool					disable_adaptive_irq;
};

struct nvmf_vfio_user_transport {
@@ -857,6 +861,11 @@ static const struct spdk_json_object_decoder vfio_user_transport_opts_decoder[]
		offsetof(struct nvmf_vfio_user_transport, transport_opts.disable_mappable_bar0),
		spdk_json_decode_bool, true
	},
	{
		"disable_adaptive_irq",
		offsetof(struct nvmf_vfio_user_transport, transport_opts.disable_adaptive_irq),
		spdk_json_decode_bool, true
	},
};

static struct spdk_nvmf_transport *
@@ -902,6 +911,8 @@ nvmf_vfio_user_create(struct spdk_nvmf_transport_opts *opts)

	SPDK_DEBUGLOG(nvmf_vfio, "vfio_user transport: disable_mappable_bar0=%d\n",
		      vu_transport->transport_opts.disable_mappable_bar0);
	SPDK_DEBUGLOG(nvmf_vfio, "vfio_user transport: disable_adaptive_irq=%d\n",
		      vu_transport->transport_opts.disable_adaptive_irq);

	/*
	 * To support interrupt mode, the transport must be configured with
@@ -1105,6 +1116,14 @@ static int
handle_cmd_req(struct nvmf_vfio_user_ctrlr *ctrlr, struct spdk_nvme_cmd *cmd,
	       struct nvmf_vfio_user_sq *sq);

static inline int
adaptive_irq_enabled(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_cq *cq)
{
	return (!spdk_interrupt_mode_is_enabled() && cq->qid != 0 &&
		!ctrlr->transport->transport_opts.disable_adaptive_irq);

}

/*
 * Posts a CQE in the completion queue.
 *
@@ -1179,7 +1198,8 @@ post_completion(struct nvmf_vfio_user_ctrlr *ctrlr, struct nvmf_vfio_user_cq *cq
	 * might be triggering interrupts from vfio-user thread context so
	 * check for race conditions.
	 */
	if (ctrlr_interrupt_enabled(ctrlr) && cq->ien) {
	if (!adaptive_irq_enabled(ctrlr, cq) &&
	    cq->ien && ctrlr_interrupt_enabled(ctrlr)) {
		err = vfu_irq_trigger(ctrlr->endpoint->vfu_ctx, cq->iv);
		if (err != 0) {
			SPDK_ERRLOG("%s: failed to trigger interrupt: %m\n",
@@ -4301,12 +4321,38 @@ static int
nvmf_vfio_user_sq_poll(struct nvmf_vfio_user_sq *sq)
{
	struct nvmf_vfio_user_ctrlr *ctrlr;
	struct nvmf_vfio_user_cq *cq;
	uint32_t new_tail;
	int count = 0;
	uint32_t cq_head;
	uint32_t cq_tail;
	int err;

	assert(sq != NULL);

	ctrlr = sq->ctrlr;
	cq = ctrlr->cqs[sq->cqid];

	if (cq->ien && ctrlr_interrupt_enabled(ctrlr) &&
	    adaptive_irq_enabled(ctrlr, cq)) {
		cq_tail = *cq_tailp(cq);

		if (cq_tail != cq->last_trigger_irq_tail) {
			spdk_ivdt_dcache(cq_dbl_headp(ctrlr, cq));
			cq_head = *cq_dbl_headp(ctrlr, cq);

			if (cq_head != cq_tail && cq_head == cq->last_head) {
				err = vfu_irq_trigger(ctrlr->endpoint->vfu_ctx, cq->iv);
				if (err != 0) {
					SPDK_ERRLOG("%s: failed to trigger interrupt: %m\n",
						    ctrlr_id(ctrlr));
				} else {
					cq->last_trigger_irq_tail = cq_tail;
				}
			}
			cq->last_head = cq_head;
		}
	}

	/* On aarch64 platforms, doorbells update from guest VM may not be seen
	 * on SPDK target side. This is because there is memory type mismatch
+2 −0
Original line number Diff line number Diff line
@@ -2088,6 +2088,8 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse
    Relevant only for TCP transport""", type=int)
    p.add_argument('-M', '--disable-mappable-bar0', action='store_true', help="""Disable mmap() of BAR0.
    Relevant only for VFIO-USER transport""")
    p.add_argument('-I', '--disable-adaptive-irq', action='store_true', help="""Disable adaptive interrupt feature.
    Relevant only for VFIO-USER transport""")
    p.add_argument('--acceptor-poll-rate', help='Polling interval of the acceptor for incoming connections (usec)', type=int)
    p.set_defaults(func=nvmf_create_transport)

+1 −0
Original line number Diff line number Diff line
@@ -120,6 +120,7 @@ def nvmf_create_transport(client, **params):
        no_wr_batching: Boolean flag to disable work requests batching - RDMA specific (optional)
        control_msg_num: The number of control messages per poll group - TCP specific (optional)
        disable_mappable_bar0: disable client mmap() of BAR0 - VFIO-USER specific (optional)
        disable_adaptive_irq: Disable adaptive interrupt feature - VFIO-USER specific (optional)
        acceptor_poll_rate: Acceptor poll period in microseconds (optional)
    Returns:
        True or False