Commit 6629202c authored by Or Gerlitz's avatar Or Gerlitz Committed by Darek Stojaczyk
Browse files

nvmf/tcp: Use the success optimization by default



By now (5.1 is released), the Linux kernel initiator supports the
success optimization and further, the version that doesn't support
it (5.0) was EOL-ed. As such, lets open it up @ spdk by default.

Doing so provides a notable performance improvement: running perf with
iodepth of 64, randread, two threads and block size of 512 bytes for 60s
("-q 64 -w randread -o 512 -c 0x5000 -t 60") over the VMA socket acceleration
library and null backing store, we got 730K IOPS with the success
optimization vs 550K without it.

IOPS           MiB/s    Average       min      max
549274.10     268.20     232.99      93.23 3256354.96
728117.57     355.53     175.76      85.93   14632.16

To allow for interop with older kernel initiators, we added
a config knob under which the success optimization can be
enabled or disabled.

Change-Id: Ia4c79f607f82c3563523ae3e07a67eac95b56dbb
Signed-off-by: default avatarOr Gerlitz <ogerlitz@mellanox.com>
Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/457644


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarBen Walker <benjamin.walker@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: default avatarZiye Yang <ziye.yang@intel.com>
Reviewed-by: default avatarDarek Stojaczyk <dariusz.stojaczyk@intel.com>
parent 2224554e
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -8,6 +8,9 @@ A new file API `spdk_posix_file_load` was added to load file content into a data

### NVMe-oF Target

The c2h success optimization under which a command capsule response is not sent
for reads is turned on. A config knob was added to allow for enable/disable.

Shared receive queue can now be disabled even for NICs that support it using the
`nvmf_create_transport` RPC method parameter `no_srq`. The actual use of a shared
receive queue is predicated on hardware support when this flag is not used.
+3 −0
Original line number Diff line number Diff line
@@ -134,6 +134,9 @@
  # Set the number of shared buffers to be cached per poll group
  #BufCacheSize 32

  # Set whether to use the C2H Success optimization, only used for TCP transport.
  # C2HSuccess true

[Nvme]
  # NVMe Device Whitelist
  # Users may specify which NVMe devices to claim by their transport id.
+1 −0
Original line number Diff line number Diff line
@@ -74,6 +74,7 @@ struct spdk_nvmf_transport_opts {
	uint32_t	buf_cache_size;
	uint32_t	max_srq_depth;
	bool		no_srq;
	bool		c2h_success;
};

/**
+18 −6
Original line number Diff line number Diff line
@@ -483,6 +483,7 @@ spdk_nvmf_parse_transport(struct spdk_nvmf_parse_transport_ctx *ctx)
	struct spdk_nvmf_transport_opts opts = { 0 };
	enum spdk_nvme_transport_type trtype;
	struct spdk_nvmf_transport *transport;
	bool bval;
	int val;

	type = spdk_conf_section_get_val(ctx->sp, "Type");
@@ -552,21 +553,32 @@ spdk_nvmf_parse_transport(struct spdk_nvmf_parse_transport_ctx *ctx)
			opts.max_srq_depth = val;
		} else {
			SPDK_ERRLOG("MaxSRQDepth is relevant only for RDMA transport '%s'\n", type);
			ctx->cb_fn(-1);
			free(ctx);
			return;
			goto error_out;
		}
	}

	bval = spdk_conf_section_get_boolval(ctx->sp, "C2HSuccess", true);
	if (trtype == SPDK_NVME_TRANSPORT_TCP) {
		opts.c2h_success = bval;
	} else {
		SPDK_ERRLOG("C2HSuccess is relevant only for TCP transport '%s'\n", type);
		goto error_out;
	}

	transport = spdk_nvmf_transport_create(trtype, &opts);
	if (transport) {
		spdk_nvmf_tgt_add_transport(g_spdk_nvmf_tgt, transport, spdk_nvmf_tgt_add_transport_done, ctx);
	} else {
		goto error_out;
	}

	return;

error_out:
	ctx->cb_fn(-1);
	free(ctx);
	return;
}
}

static int
spdk_nvmf_parse_transports(spdk_nvmf_parse_conf_done_fn cb_fn)
+6 −0
Original line number Diff line number Diff line
@@ -1458,6 +1458,10 @@ static const struct spdk_json_object_decoder nvmf_rpc_create_transport_decoder[]
		"no_srq", offsetof(struct nvmf_rpc_create_transport_ctx, opts.no_srq),
		spdk_json_decode_bool, true
	},
	{
		"c2h_success", offsetof(struct nvmf_rpc_create_transport_ctx, opts.c2h_success),
		spdk_json_decode_bool, true
	},
};

static void
@@ -1594,6 +1598,8 @@ dump_nvmf_transport(struct spdk_json_write_ctx *w, struct spdk_nvmf_transport *t
	if (type == SPDK_NVME_TRANSPORT_RDMA) {
		spdk_json_write_named_uint32(w, "max_srq_depth", opts->max_srq_depth);
		spdk_json_write_named_bool(w, "no_srq", opts->no_srq);
	} else if (type == SPDK_NVME_TRANSPORT_TCP) {
		spdk_json_write_named_bool(w, "c2h_success", opts->c2h_success);
	}

	spdk_json_write_object_end(w);
Loading