Commit 953b74b9 authored by Richael Zhuang's avatar Richael Zhuang Committed by Konrad Sztyber
Browse files

bdev_nvme: fix heap-use-after-free when detaching controller



There is heap-use-after-free error when detaching a controller
when "io_path_stat" option set as true.
(if build spdk without asan ubsan, error is free(): corrupted
unsorted chunks)

It's because io_path is accessed in bdev_nvme_io_complete_nvme_status
after the io_path is freed.

io_path is freed when we detach the controller in function
_bdev_nvme_delete_io_path, this function will execute 1 and 2.
And before 4 is executed, 3 may be executed which accesses io_path.

1.spdk_put_io_channel() is called. bdev_nvme_destroy_ctrlr_channel_cb
has not been called.
2.free(io_path->stat); free(io_path);
3.bdev_nvme_poll; nbdev_io1 is success; bdev_nvme_io_complete_nvme_status()
access nbdev_io1->io_path.
4.bdev_nvme_destroy_ctrlr_channel_cb disconnect qpair and abort nbdev_io1.

This patch fixed this by moving 2 down under 4. We don't free io_path in
_bdev_nvme_delete_io_path but just remove from the nbdev_ch->io_path_list.

The processes to reproduce the error:
target: run nvmf_tgt
initiator: (build spdk with asan,ubsan enabled)
sudo ./build/examples/bdevperf --json bdevperf-multipath-rdma-active-active.json  -r tmp.sock -q 128 -o 4096  -w randrw -M 50 -t 120
sudo ./scripts/rpc.py -s tmp.sock  bdev_nvme_detach_controller -t rdma -a 10.10.10.10 -f IPv4 -s 4420 -n nqn.2016-06.io.spdk:cnode1 NVMe0

========
bdevperf-multipath-rdma-active-active.json

{
  "subsystems": [
  {
    "subsystem": "bdev",
    "config": [
       {
         "method":"bdev_nvme_attach_controller",
         "params": {
           "name": "NVMe0",
           "trtype": "tcp",
           "traddr": "10.169.204.201",
           "trsvcid": "4420",
           "subnqn": "nqn.2016-06.io.spdk:cnode1",
           "hostnqn": "nqn.2016-06.io.spdk:init",
           "adrfam": "IPv4"
        }
      },
      {
        "method":"bdev_nvme_attach_controller",
        "params": {
        "name": "NVMe0",
        "trtype": "rdma",
         "traddr": "10.10.10.10",
           "trsvcid": "4420",
           "subnqn": "nqn.2016-06.io.spdk:cnode1",
           "hostnqn": "nqn.2016-06.io.spdk:init",
           "adrfam": "IPv4",
           "multipath": "multipath"
        }
    },
    {
       "method":"bdev_nvme_set_multipath_policy",
       "params": {
         "name": "NVMe0n1",
         "policy": "active_active"
       }
    },
    {
       "method":"bdev_nvme_set_options",
         "params": {
           "io_path_stat": true
         }
    }
    ]
    }
  ]
}
======

Change-Id: I8f4f9dc7195f49992a5ba9798613b64d44266e5e
Signed-off-by: default avatarRichael Zhuang <richael.zhuang@arm.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/17581


Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
parent e351b190
Loading
Loading
Loading
Loading
+46 −14
Original line number Diff line number Diff line
@@ -582,18 +582,15 @@ _bdev_nvme_get_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_
	return io_path;
}

static int
_bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
static struct nvme_io_path *
nvme_io_path_alloc(void)
{
	struct nvme_io_path *io_path;
	struct spdk_io_channel *ch;
	struct nvme_ctrlr_channel *ctrlr_ch;
	struct nvme_qpair *nvme_qpair;

	io_path = calloc(1, sizeof(*io_path));
	if (io_path == NULL) {
		SPDK_ERRLOG("Failed to alloc io_path.\n");
		return -ENOMEM;
		return NULL;
	}

	if (g_opts.io_path_stat) {
@@ -601,17 +598,39 @@ _bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_
		if (io_path->stat == NULL) {
			free(io_path);
			SPDK_ERRLOG("Failed to alloc io_path stat.\n");
			return -ENOMEM;
			return NULL;
		}
		spdk_bdev_reset_io_stat(io_path->stat, SPDK_BDEV_RESET_STAT_MAXMIN);
	}

	return io_path;
}

static void
nvme_io_path_free(struct nvme_io_path *io_path)
{
	free(io_path->stat);
	free(io_path);
}

static int
_bdev_nvme_add_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_ns *nvme_ns)
{
	struct nvme_io_path *io_path;
	struct spdk_io_channel *ch;
	struct nvme_ctrlr_channel *ctrlr_ch;
	struct nvme_qpair *nvme_qpair;

	io_path = nvme_io_path_alloc();
	if (io_path == NULL) {
		return -ENOMEM;
	}

	io_path->nvme_ns = nvme_ns;

	ch = spdk_get_io_channel(nvme_ns->ctrlr);
	if (ch == NULL) {
		free(io_path->stat);
		free(io_path);
		nvme_io_path_free(io_path);
		SPDK_ERRLOG("Failed to alloc io_channel.\n");
		return -ENOMEM;
	}
@@ -652,20 +671,22 @@ _bdev_nvme_delete_io_path(struct nvme_bdev_channel *nbdev_ch, struct nvme_io_pat
	bdev_nvme_clear_current_io_path(nbdev_ch);

	STAILQ_REMOVE(&nbdev_ch->io_path_list, io_path, nvme_io_path, stailq);
	io_path->nbdev_ch = NULL;

	nvme_qpair = io_path->qpair;
	assert(nvme_qpair != NULL);

	TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);

	ctrlr_ch = nvme_qpair->ctrlr_ch;
	assert(ctrlr_ch != NULL);

	ch = spdk_io_channel_from_ctx(ctrlr_ch);
	spdk_put_io_channel(ch);

	free(io_path->stat);
	free(io_path);
	/* After an io_path is removed, I/Os submitted to it may complete and update statistics
	 * of the io_path. To avoid heap-use-after-free error from this case, do not free the
	 * io_path here but free the io_path when the associated qpair is freed. It is ensured
	 * that all I/Os submitted to the io_path are completed when the associated qpair is freed.
	 */
}

static void
@@ -1401,6 +1422,9 @@ _bdev_nvme_clear_io_path_cache(struct nvme_qpair *nvme_qpair)
	struct nvme_io_path *io_path;

	TAILQ_FOREACH(io_path, &nvme_qpair->io_path_list, tailq) {
		if (io_path->nbdev_ch == NULL) {
			continue;
		}
		bdev_nvme_clear_current_io_path(io_path->nbdev_ch);
	}
}
@@ -2624,8 +2648,15 @@ bdev_nvme_create_ctrlr_channel_cb(void *io_device, void *ctx_buf)
static void
nvme_qpair_delete(struct nvme_qpair *nvme_qpair)
{
	struct nvme_io_path *io_path, *next;

	assert(nvme_qpair->group != NULL);

	TAILQ_FOREACH_SAFE(io_path, &nvme_qpair->io_path_list, tailq, next) {
		TAILQ_REMOVE(&nvme_qpair->io_path_list, io_path, tailq);
		nvme_io_path_free(io_path);
	}

	TAILQ_REMOVE(&nvme_qpair->group->qpair_list, nvme_qpair, tailq);

	spdk_put_io_channel(spdk_io_channel_from_ctx(nvme_qpair->group));
@@ -7249,7 +7280,8 @@ nvme_io_path_info_json(struct spdk_json_write_ctx *w, struct nvme_io_path *io_pa
	trid = spdk_nvme_ctrlr_get_transport_id(nvme_ctrlr->ctrlr);

	spdk_json_write_named_uint32(w, "cntlid", cdata->cntlid);
	spdk_json_write_named_bool(w, "current", io_path == io_path->nbdev_ch->current_io_path);
	spdk_json_write_named_bool(w, "current", io_path->nbdev_ch != NULL &&
				   io_path == io_path->nbdev_ch->current_io_path);
	spdk_json_write_named_bool(w, "connected", nvme_io_path_is_connected(io_path));
	spdk_json_write_named_bool(w, "accessible", nvme_ns_is_accessible(nvme_ns));