Commit 7a0c901a authored by Michael Haeuptle's avatar Michael Haeuptle Committed by Tomasz Zawadzki
Browse files

bdev/nvme: RPCs for adding/removing error injections



Provides RPCs for the qpair error injection APIs to bdev_nvme.
These RPCs are useful in testing NVMeoF/NVMe behavior for various
error scenarios in production.

Signed-off-by: default avatarMichael Haeuptle <michael.haeuptle@hpe.com>
Change-Id: I0db7995d7a712d4f8a60e643d564faa6908c3a55
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10992


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
parent 2ccaf2ac
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -2,6 +2,11 @@

## v22.05: (Upcoming Release)

### bdev_nvme

Added `bdev_nvme_add_error_injection` and `bdev_nvme_remove_error_injection` RPCs to add and
remove NVMe error injections.

## v22.01

### accel
+99 −0
Original line number Diff line number Diff line
@@ -9364,3 +9364,102 @@ Note that the config space buffer was trimmed.
  }
}
~~

### bdev_nvme_add_error_injection {#rpc_bdev_nvme_add_error_injection}

Add a NVMe command error injection for a bdev nvme controller.

#### Parameters

Name                    | Optional | Type        | Description
----------------------- | -------- | ----------- | -----------
name                    | Required | string      | Name of the operating NVMe controller
cmd_type                | Required | string      | Type of NVMe command. Valid values are: admin, io
opc                     | Required | number      | Opcode for which the error injection is applied
do_not_submit           | Optional | boolean     | Set to true if request should not be submitted to the controller (default false)
timeout_in_us           | Optional | number      | Wait specified microseconds when do_not_submit is true
err_count               | Optional | number      | Number of matching NVMe commands to inject errors
sct                     | Optional | number      | Status code type (default 0)
sc                      | Optional | number      | Status code (default 0)

#### Response

true on success

#### Example

Example request:

~~~json
{
  "jsonrpc": "2.0",
  "method": "bdev_nvme_add_error_injection",
  "id": 1,
  "params": {
    "name": "HotInNvme0",
    "opc": 2,
    "cmd_type": "io",
    "err_count": 1111111,
    "sct": 11,
    "sc": 33
  }
}

~~~

Example response:

~~~json
{
  "jsonrpc": "2.0",
  "id": 1,
  "result": true
}

~~~

### bdev_nvme_remove_error_injection {#rpc_bdev_nvme_remove_error_injection}

Remove a NVMe command error injection.

#### Parameters

Name                    | Optional | Type        | Description
----------------------- | -------- | ----------- | -----------
name                    | Required | string      | Name of the operating NVMe controller
cmd_type                | Required | string      | Type of NVMe command. Valid values are: admin, io
opc                     | Required | number      | Opcode for which the error injection is applied

#### Response

true on success

#### Example

Example request:

~~~json
{
  "jsonrpc": "2.0",
  "method": "bdev_nvme_remove_error_injection",
  "id": 1,
  "params": {
    "name": "HotInNvme0",
    "opc": 2,
    "cmd_type": "io"
  }
}


~~~

Example response:

~~~json
{
  "jsonrpc": "2.0",
  "id": 1,
  "result": true
}

~~~
+254 −0
Original line number Diff line number Diff line
@@ -1785,3 +1785,257 @@ cleanup:
}
SPDK_RPC_REGISTER("bdev_nvme_stop_discovery", rpc_bdev_nvme_stop_discovery,
		  SPDK_RPC_RUNTIME)

enum error_injection_cmd_type {
	NVME_ADMIN_CMD = 1,
	NVME_IO_CMD,
};

struct rpc_add_error_injection {
	char *name;
	enum error_injection_cmd_type cmd_type;
	uint8_t opc;
	bool do_not_submit;
	uint64_t timeout_in_us;
	uint32_t err_count;
	uint8_t sct;
	uint8_t sc;
};

static void
free_rpc_add_error_injection(struct rpc_add_error_injection *req)
{
	free(req->name);
}

static int
rpc_error_injection_decode_cmd_type(const struct spdk_json_val *val, void *out)
{
	int *cmd_type = out;

	if (spdk_json_strequal(val, "admin")) {
		*cmd_type = NVME_ADMIN_CMD;
	} else if (spdk_json_strequal(val, "io")) {
		*cmd_type = NVME_IO_CMD;
	} else {
		SPDK_ERRLOG("Invalid parameter value: cmd_type\n");
		return -EINVAL;
	}

	return 0;
}

static const struct spdk_json_object_decoder rpc_add_error_injection_decoders[] = {
	{ "name", offsetof(struct rpc_add_error_injection, name), spdk_json_decode_string },
	{ "cmd_type", offsetof(struct rpc_add_error_injection, cmd_type), rpc_error_injection_decode_cmd_type },
	{ "opc", offsetof(struct rpc_add_error_injection, opc), spdk_json_decode_uint8 },
	{ "do_not_submit", offsetof(struct rpc_add_error_injection, do_not_submit), spdk_json_decode_bool, true },
	{ "timeout_in_us", offsetof(struct rpc_add_error_injection, timeout_in_us), spdk_json_decode_uint64, true },
	{ "err_count", offsetof(struct rpc_add_error_injection, err_count), spdk_json_decode_uint32, true },
	{ "sct", offsetof(struct rpc_add_error_injection, sct), spdk_json_decode_uint8, true},
	{ "sc", offsetof(struct rpc_add_error_injection, sc), spdk_json_decode_uint8, true},
};

struct rpc_add_error_injection_ctx {
	struct spdk_jsonrpc_request *request;
	struct rpc_add_error_injection rpc;
};

static void
rpc_add_error_injection_done(struct spdk_io_channel_iter *i, int status)
{
	struct rpc_add_error_injection_ctx *ctx = spdk_io_channel_iter_get_ctx(i);

	if (status) {
		spdk_jsonrpc_send_error_response(ctx->request, status,
						 "Failed to add the error injection.");
	} else {
		spdk_jsonrpc_send_bool_response(ctx->request, true);
	}

	free_rpc_add_error_injection(&ctx->rpc);
	free(ctx);
}

static void
rpc_add_error_injection_per_channel(struct spdk_io_channel_iter *i)
{
	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
	struct rpc_add_error_injection_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
	struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
	struct nvme_ctrlr *nvme_ctrlr = nvme_ctrlr_channel_get_ctrlr(ctrlr_ch);
	struct spdk_nvme_qpair *qpair = ctrlr_ch->qpair;
	struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
	int rc = 0;

	if (qpair != NULL) {
		rc = spdk_nvme_qpair_add_cmd_error_injection(ctrlr, qpair, ctx->rpc.opc,
				ctx->rpc.do_not_submit, ctx->rpc.timeout_in_us, ctx->rpc.err_count,
				ctx->rpc.sct, ctx->rpc.sc);
	}

	spdk_for_each_channel_continue(i, rc);
}

static void
rpc_bdev_nvme_add_error_injection(
	struct spdk_jsonrpc_request *request,
	const struct spdk_json_val *params)
{
	struct rpc_add_error_injection_ctx *ctx;
	struct nvme_ctrlr *nvme_ctrlr;
	int rc;

	ctx = calloc(1, sizeof(*ctx));
	if (!ctx) {
		spdk_jsonrpc_send_error_response(request, -ENOMEM, spdk_strerror(ENOMEM));
		return;
	}
	ctx->rpc.err_count = 1;
	ctx->request = request;

	if (spdk_json_decode_object(params,
				    rpc_add_error_injection_decoders,
				    SPDK_COUNTOF(rpc_add_error_injection_decoders),
				    &ctx->rpc)) {
		spdk_jsonrpc_send_error_response(request, -EINVAL,
						 "Failed to parse the request");
		goto cleanup;
	}

	nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->rpc.name);
	if (nvme_ctrlr == NULL) {
		SPDK_ERRLOG("No controller with specified name was found.\n");
		spdk_jsonrpc_send_error_response(request, -ENODEV, spdk_strerror(ENODEV));
		goto cleanup;
	}

	if (ctx->rpc.cmd_type == NVME_IO_CMD) {
		spdk_for_each_channel(nvme_ctrlr,
				      rpc_add_error_injection_per_channel,
				      ctx,
				      rpc_add_error_injection_done);

		return;
	} else {
		rc = spdk_nvme_qpair_add_cmd_error_injection(nvme_ctrlr->ctrlr, NULL, ctx->rpc.opc,
				ctx->rpc.do_not_submit, ctx->rpc.timeout_in_us, ctx->rpc.err_count,
				ctx->rpc.sct, ctx->rpc.sc);
		if (rc) {
			spdk_jsonrpc_send_error_response(request, -rc,
							 "Failed to add the error injection");
		} else {
			spdk_jsonrpc_send_bool_response(ctx->request, true);
		}
	}

cleanup:
	free_rpc_add_error_injection(&ctx->rpc);
	free(ctx);
}
SPDK_RPC_REGISTER("bdev_nvme_add_error_injection", rpc_bdev_nvme_add_error_injection,
		  SPDK_RPC_RUNTIME)

struct rpc_remove_error_injection {
	char *name;
	enum error_injection_cmd_type cmd_type;
	uint8_t opc;
};

static void
free_rpc_remove_error_injection(struct rpc_remove_error_injection *req)
{
	free(req->name);
}

static const struct spdk_json_object_decoder rpc_remove_error_injection_decoders[] = {
	{ "name", offsetof(struct rpc_remove_error_injection, name), spdk_json_decode_string },
	{ "cmd_type", offsetof(struct rpc_remove_error_injection, cmd_type), rpc_error_injection_decode_cmd_type },
	{ "opc", offsetof(struct rpc_remove_error_injection, opc), spdk_json_decode_uint8 },
};

struct rpc_remove_error_injection_ctx {
	struct spdk_jsonrpc_request *request;
	struct rpc_remove_error_injection rpc;
};

static void
rpc_remove_error_injection_done(struct spdk_io_channel_iter *i, int status)
{
	struct rpc_remove_error_injection_ctx *ctx = spdk_io_channel_iter_get_ctx(i);

	if (status) {
		spdk_jsonrpc_send_error_response(ctx->request, status,
						 "Failed to remove the error injection.");
	} else {
		spdk_jsonrpc_send_bool_response(ctx->request, true);
	}

	free_rpc_remove_error_injection(&ctx->rpc);
	free(ctx);
}

static void
rpc_remove_error_injection_per_channel(struct spdk_io_channel_iter *i)
{
	struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
	struct rpc_remove_error_injection_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
	struct nvme_ctrlr_channel *ctrlr_ch = spdk_io_channel_get_ctx(ch);
	struct nvme_ctrlr *nvme_ctrlr = nvme_ctrlr_channel_get_ctrlr(ctrlr_ch);
	struct spdk_nvme_qpair *qpair = ctrlr_ch->qpair;
	struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;

	if (ctrlr_ch->qpair != NULL) {
		spdk_nvme_qpair_remove_cmd_error_injection(ctrlr, qpair, ctx->rpc.opc);
	}

	spdk_for_each_channel_continue(i, 0);
}

static void
rpc_bdev_nvme_remove_error_injection(struct spdk_jsonrpc_request *request,
				     const struct spdk_json_val *params)
{
	struct rpc_remove_error_injection_ctx *ctx;
	struct nvme_ctrlr *nvme_ctrlr;

	ctx = calloc(1, sizeof(*ctx));
	if (!ctx) {
		spdk_jsonrpc_send_error_response(request, -ENOMEM, spdk_strerror(ENOMEM));
		return;
	}
	ctx->request = request;

	if (spdk_json_decode_object(params,
				    rpc_remove_error_injection_decoders,
				    SPDK_COUNTOF(rpc_remove_error_injection_decoders),
				    &ctx->rpc)) {
		spdk_jsonrpc_send_error_response(request, -EINVAL,
						 "Failed to parse the request");
		goto cleanup;
	}

	nvme_ctrlr = nvme_ctrlr_get_by_name(ctx->rpc.name);
	if (nvme_ctrlr == NULL) {
		SPDK_ERRLOG("No controller with specified name was found.\n");
		spdk_jsonrpc_send_error_response(request, -ENODEV, spdk_strerror(ENODEV));
		goto cleanup;
	}

	if (ctx->rpc.cmd_type == NVME_IO_CMD) {
		spdk_for_each_channel(nvme_ctrlr,
				      rpc_remove_error_injection_per_channel,
				      ctx,
				      rpc_remove_error_injection_done);
		return;
	} else {
		spdk_nvme_qpair_remove_cmd_error_injection(nvme_ctrlr->ctrlr, NULL, ctx->rpc.opc);
		spdk_jsonrpc_send_bool_response(ctx->request, true);
	}

cleanup:
	free_rpc_remove_error_injection(&ctx->rpc);
	free(ctx);
}
SPDK_RPC_REGISTER("bdev_nvme_remove_error_injection", rpc_bdev_nvme_remove_error_injection,
		  SPDK_RPC_RUNTIME)
+38 −0
Original line number Diff line number Diff line
@@ -2804,6 +2804,44 @@ Format: 'user:u1 secret:s1 muser:mu1 msecret:ms1,user:u2 secret:s2 muser:mu2 mse
                   action='store_true')
    p.set_defaults(func=framework_get_pci_devices)

    # bdev_nvme_add_error_injection
    def bdev_nvme_add_error_injection(args):
        print_dict(rpc.nvme.bdev_nvme_add_error_injection(args.client,
                                                          name=args.nvme_name,
                                                          cmd_type=args.cmd_type,
                                                          opc=args.opc,
                                                          do_not_submit=args.do_not_submit,
                                                          timeout_in_us=args.timeout_in_us,
                                                          err_count=args.err_count,
                                                          sct=args.sct,
                                                          sc=args.sc))
    p = subparsers.add_parser('bdev_nvme_add_error_injection',
                              help='Add a NVMe command error injection.')
    p.add_argument('-n', '--nvme-name', help="""Name of the operating NVMe controller""", required=True)
    p.add_argument('-t', '--cmd-type', help="""Type of NVMe command. Valid values are: admin, io""", required=True)
    p.add_argument('-o', '--opc', help="""Opcode of the NVMe command.""", required=True, type=int)
    p.add_argument('-s', '--do-not-submit',
                   help="""Set to true if request should not be submitted to the controller (default false)""",
                   default=False, dest="do_not_submit", action='store_true')
    p.add_argument('-w', '--timeout-in-us', help="""Wait specified microseconds when do_not_submit is true""", type=int)
    p.add_argument('-e', '--err-count', help="""Number of matching NVMe commands to inject errors (default 1)""", type=int, default=1)
    p.add_argument('-u', '--sct', help="""Status code type""", type=int)
    p.add_argument('-c', '--sc', help="""Status code""", type=int)
    p.set_defaults(func=bdev_nvme_add_error_injection)

    # bdev_nvme_remove_error_injection
    def bdev_nvme_remove_error_injection(args):
        print_dict(rpc.nvme.bdev_nvme_remove_error_injection(args.client,
                                                             name=args.nvme_name,
                                                             cmd_type=args.cmd_type,
                                                             opc=args.opc))
    p = subparsers.add_parser('bdev_nvme_remove_error_injection',
                              help='Removes a NVMe command error injection.')
    p.add_argument('-n', '--nvme-name', help="""Name of the operating NVMe controller""", required=True)
    p.add_argument('-t', '--cmd-type', help="""Type of nvme cmd. Valid values are: admin, io""", required=True)
    p.add_argument('-o', '--opc', help="""Opcode of the nvme cmd.""", required=True, type=int)
    p.set_defaults(func=bdev_nvme_remove_error_injection)

    def check_called_name(name):
        if name in deprecated_aliases:
            print("{} is deprecated, use {} instead.".format(name, deprecated_aliases[name]), file=sys.stderr)
+53 −0
Original line number Diff line number Diff line
@@ -85,3 +85,56 @@ def bdev_nvme_opal_revert(client, nvme_ctrlr_name, password):
    }

    return client.call('bdev_nvme_opal_revert', params)


def bdev_nvme_add_error_injection(client, name, opc, cmd_type, do_not_submit, timeout_in_us,
                                  err_count, sct, sc):
    """Add error injection

    Args:
        name: Name of the operating NVMe controller
        opc: Opcode of the NVMe command
        cmd_type: Type of NVMe command. Valid values are: admin, io
        do_not_submit: Do not submit commands to the controller
        timeout_in_us: Wait specified microseconds when do_not_submit is true
        err_count: Number of matching NVMe commands to inject errors
        sct: NVMe status code type
        sc: NVMe status code

    Returns:
        True on success, RPC error otherwise
    """
    params = {'name': name,
              'opc': opc,
              'cmd_type': cmd_type}

    if do_not_submit:
        params['do_not_submit'] = do_not_submit
    if timeout_in_us:
        params['timeout_in_us'] = timeout_in_us
    if err_count:
        params['err_count'] = err_count
    if sct:
        params['sct'] = sct
    if sc:
        params['sc'] = sc

    return client.call('bdev_nvme_add_error_injection', params)


def bdev_nvme_remove_error_injection(client, name, opc, cmd_type):
    """Remove error injection

    Args:
        name: Name of the operating NVMe controller
        opc: Opcode of the NVMe command
        cmd_type: Type of NVMe command. Valid values are: admin, io

    Returns:
        True on success, RPC error otherwise
    """
    params = {'name': name,
              'opc': opc,
              'cmd_type': cmd_type}

    return client.call('bdev_nvme_remove_error_injection', params)