Commit 549be9ad authored by sijie.sun's avatar sijie.sun Committed by Tomasz Zawadzki
Browse files

nvmf/rdma: Recreate resources and listeners after IB device is hotplugged



IB device may be unplugged & hotplugged when modifying slaves of bonding
IB devices. This patch will try to recreate ibv device contexts, poller
and listeners after IB devices come back.

Signed-off-by: default avatarsijie.sun <sijie.sun@smartx.com>
Change-Id: I3288174bad847edc2d9859cb34aa93c6af8c673b
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/15616


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarAleksey Marchuk <alexeymar@nvidia.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
parent 8ddc5cd4
Loading
Loading
Loading
Loading
+288 −35
Original line number Diff line number Diff line
@@ -444,6 +444,7 @@ struct spdk_nvmf_rdma_device {
	int					num_srq;
	bool					need_destroy;
	bool					ready_to_destroy;
	bool					is_ready;

	TAILQ_ENTRY(spdk_nvmf_rdma_device)	link;
};
@@ -482,6 +483,9 @@ struct spdk_nvmf_rdma_transport {
	TAILQ_HEAD(, spdk_nvmf_rdma_device)	devices;
	TAILQ_HEAD(, spdk_nvmf_rdma_port)	ports;
	TAILQ_HEAD(, spdk_nvmf_rdma_poll_group)	poll_groups;

	/* ports that are removed unexpectedly and need retry listen */
	TAILQ_HEAD(, spdk_nvmf_rdma_port)		retry_ports;
};

struct poller_manage_ctx {
@@ -913,7 +917,6 @@ nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
			spdk_rdma_qp_destroy(rqpair->rdma_qp);
			rqpair->rdma_qp = NULL;
		}
		rdma_destroy_id(rqpair->cm_id);

		if (rqpair->poller != NULL && rqpair->srq == NULL) {
			rqpair->poller->required_num_wr -= MAX_WR_PER_QP(rqpair->max_queue_depth);
@@ -934,6 +937,12 @@ nvmf_rdma_qpair_destroy(struct spdk_nvmf_rdma_qpair *rqpair)
	if (rqpair->poller && rqpair->poller->need_destroy && RB_EMPTY(&rqpair->poller->qpairs)) {
		nvmf_rdma_poller_destroy(rqpair->poller);
	}

	/* destroy cm_id last so cma device will not be freed before we destroy the cq. */
	if (rqpair->cm_id) {
		rdma_destroy_id(rqpair->cm_id);
	}

	free(rqpair);
}

@@ -2387,6 +2396,10 @@ nvmf_rdma_is_rxe_device(struct spdk_nvmf_rdma_device *device)
}

static int nvmf_rdma_accept(void *ctx);
static bool nvmf_rdma_retry_listen_port(struct spdk_nvmf_rdma_transport *rtransport);
static void destroy_ib_device(struct spdk_nvmf_rdma_transport *rtransport,
			      struct spdk_nvmf_rdma_device *device);

static int
create_ib_device(struct spdk_nvmf_rdma_transport *rtransport, struct ibv_context *context,
		 struct spdk_nvmf_rdma_device **new_device)
@@ -2448,6 +2461,7 @@ create_ib_device(struct spdk_nvmf_rdma_transport *rtransport, struct ibv_context

	if (!device->pd) {
		SPDK_ERRLOG("Unable to allocate protection domain.\n");
		destroy_ib_device(rtransport, device);
		return -ENOMEM;
	}

@@ -2456,6 +2470,7 @@ create_ib_device(struct spdk_nvmf_rdma_transport *rtransport, struct ibv_context
	device->map = spdk_rdma_create_mem_map(device->pd, &g_nvmf_hooks, SPDK_RDMA_MEMORY_MAP_ROLE_TARGET);
	if (!device->map) {
		SPDK_ERRLOG("Unable to allocate memory map for listen address\n");
		destroy_ib_device(rtransport, device);
		return -ENOMEM;
	}

@@ -2465,6 +2480,9 @@ create_ib_device(struct spdk_nvmf_rdma_transport *rtransport, struct ibv_context
	if (new_device) {
		*new_device = device;
	}
	SPDK_NOTICELOG("Create IB device %s(%p/%p) succeed.\n", ibv_get_device_name(context->device),
		       device, context);

	return 0;
}

@@ -2533,6 +2551,7 @@ nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts)
	TAILQ_INIT(&rtransport->devices);
	TAILQ_INIT(&rtransport->ports);
	TAILQ_INIT(&rtransport->poll_groups);
	TAILQ_INIT(&rtransport->retry_ports);

	rtransport->transport.ops = &spdk_nvmf_transport_rdma;
	rtransport->rdma_opts.num_cqe = DEFAULT_NVMF_RDMA_CQ_SIZE;
@@ -2660,6 +2679,7 @@ nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts)
		}
		i++;
		max_device_sge = spdk_min(max_device_sge, device->attr.max_sge);
		device->is_ready = true;
	}
	rdma_free_devices(contexts);

@@ -2707,8 +2727,7 @@ destroy_ib_device(struct spdk_nvmf_rdma_transport *rtransport,
			ibv_dealloc_pd(device->pd);
		}
	}
	SPDK_NOTICELOG("IB device %s[%p] is destroyed.\n", ibv_get_device_name(device->context->device),
		       device);
	SPDK_DEBUGLOG(rdma, "IB device [%p] is destroyed.\n", device);
	free(device);
}

@@ -2738,6 +2757,11 @@ nvmf_rdma_destroy(struct spdk_nvmf_transport *transport,

	rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);

	TAILQ_FOREACH_SAFE(port, &rtransport->retry_ports, link, port_tmp) {
		TAILQ_REMOVE(&rtransport->retry_ports, port, link);
		free(port);
	}

	TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, port_tmp) {
		TAILQ_REMOVE(&rtransport->ports, port, link);
		rdma_destroy_id(port->id);
@@ -2778,17 +2802,20 @@ static int nvmf_rdma_trid_from_cm_id(struct rdma_cm_id *id,
				     struct spdk_nvme_transport_id *trid,
				     bool peer);

static bool nvmf_rdma_rescan_devices(struct spdk_nvmf_rdma_transport *rtransport);

static int
nvmf_rdma_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_transport_id *trid,
		 struct spdk_nvmf_listen_opts *listen_opts)
{
	struct spdk_nvmf_rdma_transport	*rtransport;
	struct spdk_nvmf_rdma_device	*device;
	struct spdk_nvmf_rdma_port	*port;
	struct spdk_nvmf_rdma_port	*port, *tmp_port;
	struct addrinfo			*res;
	struct addrinfo			hints;
	int				family;
	int				rc;
	bool				is_retry = false;

	if (!strlen(trid->trsvcid)) {
		SPDK_ERRLOG("Service id is required\n");
@@ -2844,7 +2871,15 @@ nvmf_rdma_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_t
	freeaddrinfo(res);

	if (rc < 0) {
		TAILQ_FOREACH(tmp_port, &rtransport->retry_ports, link) {
			if (spdk_nvme_transport_id_compare(tmp_port->trid, trid) == 0) {
				is_retry = true;
				break;
			}
		}
		if (!is_retry) {
			SPDK_ERRLOG("rdma_bind_addr() failed\n");
		}
		rdma_destroy_id(port->id);
		free(port);
		return rc;
@@ -2866,7 +2901,7 @@ nvmf_rdma_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_t
	}

	TAILQ_FOREACH(device, &rtransport->devices, link) {
		if (device->context == port->id->verbs) {
		if (device->context == port->id->verbs && device->is_ready) {
			port->device = device;
			break;
		}
@@ -2887,34 +2922,59 @@ nvmf_rdma_listen(struct spdk_nvmf_transport *transport, const struct spdk_nvme_t
}

static void
nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport,
		      const struct spdk_nvme_transport_id *trid)
nvmf_rdma_stop_listen_ex(struct spdk_nvmf_transport *transport,
			 const struct spdk_nvme_transport_id *trid, bool need_retry)
{
	struct spdk_nvmf_rdma_transport	*rtransport;
	struct spdk_nvmf_rdma_port	*port, *tmp;

	rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);

	if (!need_retry) {
		TAILQ_FOREACH_SAFE(port, &rtransport->retry_ports, link, tmp) {
			if (spdk_nvme_transport_id_compare(port->trid, trid) == 0) {
				TAILQ_REMOVE(&rtransport->retry_ports, port, link);
				free(port);
			}
		}
	}

	TAILQ_FOREACH_SAFE(port, &rtransport->ports, link, tmp) {
		if (spdk_nvme_transport_id_compare(port->trid, trid) == 0) {
			SPDK_DEBUGLOG(rdma, "Port %s:%s removed. need retry: %d\n",
				      port->trid->traddr, port->trid->trsvcid, need_retry);
			TAILQ_REMOVE(&rtransport->ports, port, link);
			rdma_destroy_id(port->id);
			port->id = NULL;
			port->device = NULL;
			if (need_retry) {
				TAILQ_INSERT_TAIL(&rtransport->retry_ports, port, link);
			} else {
				free(port);
			}
			break;
		}
	}
}

static void
nvmf_rdma_stop_listen(struct spdk_nvmf_transport *transport,
		      const struct spdk_nvme_transport_id *trid)
{
	nvmf_rdma_stop_listen_ex(transport, trid, false);
}

static void _nvmf_rdma_register_poller_in_group(void *c);
static void _nvmf_rdma_remove_poller_in_group(void *c);

static bool
nvmf_rdma_all_pollers_are_destroyed(void *c)
nvmf_rdma_all_pollers_management_done(void *c)
{
	struct poller_manage_ctx	*ctx = c;
	int				counter;

	counter = __atomic_sub_fetch(ctx->inflight_op_counter, 1, __ATOMIC_SEQ_CST);
	SPDK_DEBUGLOG(rdma, "nvmf_rdma_all_pollers_are_destroyed called. counter: %d, poller: %p\n",
	SPDK_DEBUGLOG(rdma, "nvmf_rdma_all_pollers_management_done called. counter: %d, poller: %p\n",
		      counter, ctx->rpoller);

	if (counter == 0) {
@@ -2926,9 +2986,8 @@ nvmf_rdma_all_pollers_are_destroyed(void *c)
}

static int
nvmf_rdma_remove_pollers_on_dev(struct spdk_nvmf_rdma_transport *rtransport,
				struct spdk_nvmf_rdma_device *device,
				bool *has_inflight)
nvmf_rdma_manage_poller(struct spdk_nvmf_rdma_transport *rtransport,
			struct spdk_nvmf_rdma_device *device, bool *has_inflight, bool is_add)
{
	struct spdk_nvmf_rdma_poll_group	*rgroup;
	struct spdk_nvmf_rdma_poller		*rpoller;
@@ -2939,7 +2998,7 @@ nvmf_rdma_remove_pollers_on_dev(struct spdk_nvmf_rdma_transport *rtransport,
	spdk_msg_fn				do_fn;

	*has_inflight = false;
	do_fn = _nvmf_rdma_remove_poller_in_group;
	do_fn = is_add ? _nvmf_rdma_register_poller_in_group : _nvmf_rdma_remove_poller_in_group;
	inflight_counter = calloc(1, sizeof(int));
	if (!inflight_counter) {
		SPDK_ERRLOG("Failed to allocate inflight counter when removing pollers\n");
@@ -2958,7 +3017,7 @@ nvmf_rdma_remove_pollers_on_dev(struct spdk_nvmf_rdma_transport *rtransport,
				break;
			}
		}
		if (!found) {
		if (found == is_add) {
			__atomic_fetch_sub(inflight_counter, 1, __ATOMIC_SEQ_CST);
			continue;
		}
@@ -2995,6 +3054,167 @@ nvmf_rdma_remove_pollers_on_dev(struct spdk_nvmf_rdma_transport *rtransport,
	return 0;
}

static void nvmf_rdma_handle_device_removal(struct spdk_nvmf_rdma_transport *rtransport,
		struct spdk_nvmf_rdma_device *device);

static struct spdk_nvmf_rdma_device *
nvmf_rdma_find_ib_device(struct spdk_nvmf_rdma_transport *rtransport,
			 struct ibv_context *context)
{
	struct spdk_nvmf_rdma_device	*device, *tmp_device;

	TAILQ_FOREACH_SAFE(device, &rtransport->devices, link, tmp_device) {
		if (device->need_destroy) {
			continue;
		}

		if (strcmp(device->context->device->dev_name, context->device->dev_name) == 0) {
			return device;
		}
	}

	return NULL;
}

static bool
nvmf_rdma_check_devices_context(struct spdk_nvmf_rdma_transport *rtransport,
				struct ibv_context *context)
{
	struct spdk_nvmf_rdma_device	*old_device, *new_device;
	int				rc = 0;
	bool				has_inflight;

	old_device = nvmf_rdma_find_ib_device(rtransport, context);

	if (old_device) {
		if (old_device->context != context && !old_device->need_destroy && old_device->is_ready) {
			/* context may not have time to be cleaned when rescan. exactly one context
			 * is valid for a device so this context must be invalid and just remove it. */
			SPDK_WARNLOG("Device %p has a invalid context %p\n", old_device, old_device->context);
			old_device->need_destroy = true;
			nvmf_rdma_handle_device_removal(rtransport, old_device);
		}
		return false;
	}

	rc = create_ib_device(rtransport, context, &new_device);
	/* TODO: update transport opts. */
	if (rc < 0) {
		SPDK_ERRLOG("Failed to create ib device for context: %s(%p)\n",
			    ibv_get_device_name(context->device), context);
		return false;
	}

	rc = nvmf_rdma_manage_poller(rtransport, new_device, &has_inflight, true);
	if (rc < 0) {
		SPDK_ERRLOG("Failed to add poller for device context: %s(%p)\n",
			    ibv_get_device_name(context->device), context);
		return false;
	}

	if (has_inflight) {
		new_device->is_ready = true;
	}

	return true;
}

static bool
nvmf_rdma_rescan_devices(struct spdk_nvmf_rdma_transport *rtransport)
{
	struct spdk_nvmf_rdma_device	*device;
	struct ibv_device		**ibv_device_list = NULL;
	struct ibv_context		**contexts = NULL;
	int				i = 0;
	int				num_dev = 0;
	bool				new_create = false, has_new_device = false;
	struct ibv_context		*tmp_verbs = NULL;

	/* do not rescan when any device is destroying, or context may be freed when
	 * regenerating the poll fds.
	 */
	TAILQ_FOREACH(device, &rtransport->devices, link) {
		if (device->need_destroy) {
			return false;
		}
	}

	ibv_device_list = ibv_get_device_list(&num_dev);

	/* There is a bug in librdmacm. If verbs init failed in rdma_get_devices, it'll be
	 * marked as dead verbs and never be init again. So we need to make sure the
	 * verbs is available before we call rdma_get_devices. */
	if (num_dev >= 0) {
		for (i = 0; i < num_dev; i++) {
			tmp_verbs = ibv_open_device(ibv_device_list[i]);
			if (!tmp_verbs) {
				SPDK_WARNLOG("Failed to init ibv device %p, err %d. Skip rescan.\n", ibv_device_list[i], errno);
				break;
			}
			if (nvmf_rdma_find_ib_device(rtransport, tmp_verbs) == NULL) {
				SPDK_DEBUGLOG(rdma, "Find new verbs init ibv device %p(%s).\n", ibv_device_list[i],
					      tmp_verbs->device->dev_name);
				has_new_device = true;
			}
			ibv_close_device(tmp_verbs);
		}
		ibv_free_device_list(ibv_device_list);
		if (!tmp_verbs || !has_new_device) {
			return false;
		}
	}

	contexts = rdma_get_devices(NULL);

	for (i = 0; contexts && contexts[i] != NULL; i++) {
		new_create |= nvmf_rdma_check_devices_context(rtransport, contexts[i]);
	}

	if (new_create) {
		free_poll_fds(rtransport);
		generate_poll_fds(rtransport);
	}

	if (contexts) {
		rdma_free_devices(contexts);
	}

	return new_create;
}

static bool
nvmf_rdma_retry_listen_port(struct spdk_nvmf_rdma_transport *rtransport)
{
	struct spdk_nvmf_rdma_port	*port, *tmp_port;
	int				rc = 0;
	bool				new_create = false;

	if (TAILQ_EMPTY(&rtransport->retry_ports)) {
		return false;
	}

	new_create = nvmf_rdma_rescan_devices(rtransport);

	TAILQ_FOREACH_SAFE(port, &rtransport->retry_ports, link, tmp_port) {
		rc = nvmf_rdma_listen(&rtransport->transport, port->trid, NULL);

		TAILQ_REMOVE(&rtransport->retry_ports, port, link);
		if (rc) {
			if (new_create) {
				SPDK_ERRLOG("Found new IB device but port %s:%s is still failed(%d) to listen.\n",
					    port->trid->traddr, port->trid->trsvcid, rc);
			}
			TAILQ_INSERT_TAIL(&rtransport->retry_ports, port, link);
			break;
		} else {
			SPDK_NOTICELOG("Port %s:%s come back\n", port->trid->traddr, port->trid->trsvcid);
			free(port);
		}
	}

	return true;
}

static void
nvmf_rdma_qpair_process_pending(struct spdk_nvmf_rdma_transport *rtransport,
				struct spdk_nvmf_rdma_qpair *rqpair, bool drain)
@@ -3200,7 +3420,7 @@ nvmf_rdma_handle_device_removal(struct spdk_nvmf_rdma_transport *rtransport,
	int				rc;
	bool				has_inflight;

	rc = nvmf_rdma_remove_pollers_on_dev(rtransport, device, &has_inflight);
	rc = nvmf_rdma_manage_poller(rtransport, device, &has_inflight, false);
	if (rc) {
		SPDK_ERRLOG("Failed to handle device removal, rc %d\n", rc);
		return;
@@ -3223,7 +3443,7 @@ nvmf_rdma_handle_device_removal(struct spdk_nvmf_rdma_transport *rtransport,
			 * RDMA transport. when the device comes back we can retry listening
			 * and the application's workflow will not be interrupted.
			 */
			nvmf_rdma_stop_listen(&rtransport->transport, port->trid);
			nvmf_rdma_stop_listen_ex(&rtransport->transport, port->trid, true);
		}
	}
}
@@ -3232,7 +3452,7 @@ static void
nvmf_rdma_handle_cm_event_port_removal(struct spdk_nvmf_transport *transport,
				       struct rdma_cm_event *event)
{
	struct spdk_nvmf_rdma_port		*port;
	struct spdk_nvmf_rdma_port		*port, *tmp_port;
	struct spdk_nvmf_rdma_transport		*rtransport;

	port = event->id->context;
@@ -3240,11 +3460,17 @@ nvmf_rdma_handle_cm_event_port_removal(struct spdk_nvmf_transport *transport,

	rdma_ack_cm_event(event);

	if (!port->device->need_destroy) {
	/* if device removal happens during ctrl qpair disconnecting, it's possible that we receive
	 * an DEVICE_REMOVAL event on qpair but the id->qp is just NULL. So we should make sure that
	 * we are handling a port event here.
	 */
	TAILQ_FOREACH(tmp_port, &rtransport->ports, link) {
		if (port == tmp_port && port->device && !port->device->need_destroy) {
			port->device->need_destroy = true;
			nvmf_rdma_handle_device_removal(rtransport, port->device);
		}
	}
}

static void
nvmf_process_cm_event(struct spdk_nvmf_transport *transport)
@@ -3315,11 +3541,15 @@ nvmf_process_cm_event(struct spdk_nvmf_transport *transport)
			 * don't make attempts to call any ibv_query/modify/create functions. We can only call
			 * ibv_destroy* functions to release user space memory allocated by IB. All kernel
			 * resources are already cleaned. */
			if (!event->id->qp) {
			if (event->id->qp) {
				/* If rdma_cm event has a valid `qp` pointer then the event refers to the
				 * corresponding qpair. Otherwise the event refers to a listening device.
				 * Only handle this event on device because we will disconnect all qpairs
				 * when removing device */
				 * corresponding qpair. Otherwise the event refers to a listening device. */
				rc = nvmf_rdma_disconnect(event);
				if (rc < 0) {
					SPDK_ERRLOG("Unable to process disconnect event. rc: %d\n", rc);
					break;
				}
			} else {
				nvmf_rdma_handle_cm_event_port_removal(transport, event);
				event_acked = true;
			}
@@ -3526,12 +3756,15 @@ nvmf_rdma_accept(void *ctx)
	struct spdk_nvmf_rdma_device *device, *tmp;
	uint32_t count;
	short revents;
	bool do_retry;

	rtransport = SPDK_CONTAINEROF(transport, struct spdk_nvmf_rdma_transport, transport);
	do_retry = nvmf_rdma_retry_listen_port(rtransport);

	count = nfds = poll(rtransport->poll_fds, rtransport->npoll_fds, 0);

	if (nfds <= 0) {
		return SPDK_POLLER_IDLE;
		return do_retry ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
	}

	/* The first poll descriptor is RDMA CM event */
@@ -3684,6 +3917,25 @@ nvmf_rdma_poller_create(struct spdk_nvmf_rdma_transport *rtransport,
	return 0;
}

static void
_nvmf_rdma_register_poller_in_group(void *c)
{
	struct spdk_nvmf_rdma_poller	*poller;
	struct poller_manage_ctx	*ctx = c;
	struct spdk_nvmf_rdma_device	*device;
	int				rc;

	rc = nvmf_rdma_poller_create(ctx->rtransport, ctx->rgroup, ctx->device, &poller);
	if (rc < 0 && poller) {
		nvmf_rdma_poller_destroy(poller);
	}

	device = ctx->device;
	if (nvmf_rdma_all_pollers_management_done(ctx)) {
		device->is_ready = true;
	}
}

static void nvmf_rdma_poll_group_destroy(struct spdk_nvmf_transport_poll_group *group);

static struct spdk_nvmf_transport_poll_group *
@@ -3902,6 +4154,11 @@ nvmf_rdma_poll_group_add(struct spdk_nvmf_transport_poll_group *group,
		return -1;
	}

	if (poller->need_destroy) {
		SPDK_ERRLOG("Poller is destroying.\n");
		return -1;
	}

	rqpair->poller = poller;
	rqpair->srq = rqpair->poller->srq;

@@ -4419,7 +4676,7 @@ _nvmf_rdma_remove_poller_in_group_cb(void *c)
	struct spdk_nvmf_rdma_device	*device = ctx->device;
	struct spdk_thread		*thread = ctx->thread;

	if (nvmf_rdma_all_pollers_are_destroyed(c)) {
	if (nvmf_rdma_all_pollers_management_done(c)) {
		/* destroy device when last poller is destroyed */
		device->ready_to_destroy = true;
		spdk_thread_send_msg(thread, _nvmf_rdma_remove_destroyed_device, rtransport);
@@ -4429,19 +4686,15 @@ _nvmf_rdma_remove_poller_in_group_cb(void *c)
static void
_nvmf_rdma_remove_poller_in_group(void *c)
{
	struct spdk_nvmf_rdma_qpair		*rqpair, *tmp_qpair;
	struct poller_manage_ctx		*ctx = c;

	ctx->rpoller->need_destroy = true;
	ctx->rpoller->destroy_cb_ctx = ctx;
	ctx->rpoller->destroy_cb = _nvmf_rdma_remove_poller_in_group_cb;

	/* qp will be disconnected after receiving a RDMA_CM_EVENT_DEVICE_REMOVAL event. */
	if (RB_EMPTY(&ctx->rpoller->qpairs)) {
		nvmf_rdma_poller_destroy(ctx->rpoller);
	} else {
		RB_FOREACH_SAFE(rqpair, qpairs_tree, &ctx->rpoller->qpairs, tmp_qpair) {
			spdk_nvmf_qpair_disconnect(&rqpair->qpair, NULL, NULL);
		}
	}
}

+40 −9
Original line number Diff line number Diff line
@@ -76,6 +76,17 @@ function get_rdma_device_name() {
	ls $(get_pci_dir $dev_name)/infiniband
}

function check_rdma_dev_exists_in_nvmf_tgt() {
	local rdma_dev_name=$1
	$rpc_py nvmf_get_stats | jq -r '.poll_groups[0].transports[].devices[].name' | grep "$rdma_dev_name"
	return $?
}

function get_rdma_dev_count_in_nvmf_tgt() {
	local rdma_dev_name=$1
	$rpc_py nvmf_get_stats | jq -r '.poll_groups[0].transports[].devices | length'
}

function test_remove_and_rescan() {
	nvmfappstart -m 0xF

@@ -91,7 +102,7 @@ function test_remove_and_rescan() {
		origin_ip=$(get_ip_address "$net_dev")
		pci_dir=$(get_pci_dir $net_dev)

		if ! $rpc_py nvmf_get_stats | grep "\"name\": \"$rdma_dev_name\""; then
		if ! check_rdma_dev_exists_in_nvmf_tgt "$rdma_dev_name"; then
			echo "Device $rdma_dev_name is not registered in tgt".
			exit 1
		fi
@@ -99,7 +110,7 @@ function test_remove_and_rescan() {
		remove_one_nic $net_dev

		for i in $(seq 1 10); do
			if ! $rpc_py nvmf_get_stats | grep "\"name\": \"$rdma_dev_name\""; then
			if ! check_rdma_dev_exists_in_nvmf_tgt "$rdma_dev_name"; then
				break
			fi
			if [[ $i == 10 ]]; then
@@ -109,6 +120,8 @@ function test_remove_and_rescan() {
			sleep 1
		done

		ib_count_after_remove=$(get_rdma_dev_count_in_nvmf_tgt)

		rescan_pci

		for i in $(seq 1 10); do
@@ -132,9 +145,25 @@ function test_remove_and_rescan() {
		if [[ -z $(get_ip_address "$net_dev") ]]; then
			ip addr add $origin_ip/24 dev $net_dev
		fi

		# if rdma device name is renamed, nvmf_get_stats may return an obsoleted name.
		# so we check ib device count here instead of the device name.
		for i in $(seq 1 10); do
			ib_count=$(get_rdma_dev_count_in_nvmf_tgt)
			if ((ib_count > ib_count_after_remove)); then
				break
			fi

			if [[ $i == 10 ]]; then
				# failed to rescan this device
				exit 1
			fi
			sleep 2
		done
	done

	killprocess $nvmfpid
	# NOTE: rdma-core <= v43.0 has memleak bug (fixed in commit 7720071f).
	killprocess $nvmfpid || true
	nvmfpid=

	return 0
@@ -205,7 +234,7 @@ function test_bonding_slaves_on_nics() {

	create_subsystem_and_connect_on_netdev $BOND_NAME

	ib_count=$($rpc_py nvmf_get_stats | grep devices -A 2 | grep -c name)
	ib_count=$(get_rdma_dev_count_in_nvmf_tgt)
	echo "IB Count: " $ib_count

	$rootdir/scripts/fio-wrapper -p nvmf -i 4096 -d 1 -t randrw -r 10 &
@@ -213,21 +242,23 @@ function test_bonding_slaves_on_nics() {

	sleep 2
	echo -$nic1 | sudo tee /sys/class/net/${BOND_NAME}/bonding/slaves
	sleep 10
	echo +$nic1 | sudo tee /sys/class/net/${BOND_NAME}/bonding/slaves

	ib_count2=$ib_count
	for i in $(seq 1 10); do
		ib_count2=$($rpc_py nvmf_get_stats | grep devices -A 2 | grep -c name)
		if ((ib_count2 < ib_count)); then
		ib_count2=$(get_rdma_dev_count_in_nvmf_tgt)
		if ((ib_count2 == ib_count)); then
			break
		fi
		sleep 2
	done
	if ((ib_count2 == ib_count)); then
	if ((ib_count2 != ib_count)); then
		exit 1
	fi

	# fio will exit when nvmf fin. do not wait here because it may be in D state.
	killprocess $nvmfpid
	# NOTE: rdma-core <= v43.0 has memleak bug (fixed in commit 7720071f).
	killprocess $nvmfpid || true
	nvmfpid=
	return 0
}