Commit 43c35d80 authored by Konrad Sztyber's avatar Konrad Sztyber Committed by Jim Harris
Browse files

util: multi-level fd_group nesting



This patch adds the ability to nest multiple fd_groups into one another.
This builds a tree with fds from all fd_groups being registered at root
fd_group's epfd. For instance, in the following configuration:

          fgrp0
            |
   fgrp1----+----fgrp2
     |
   fgrp3

fds from all fd_groups will be registered to epfd of fgrp0.  After
unnesting fgrp1, fgrp1 and fgrp3 fds will be removed from frgp0's epfd
and added to fgrp1 epfd.

Signed-off-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I4f586c21fe3db1739bf2010578b20606c53e5e84
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/25463


Reviewed-by: default avatarAnkit Kumar <ankit.kumar@samsung.com>
Reviewed-by: default avatarBen Walker <ben@nvidia.com>
Community-CI: Mellanox Build Bot
Community-CI: Community CI Samsung <spdk.community.ci.samsung@gmail.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarJim Harris <jim.harris@nvidia.com>
parent 6336b7c5
Loading
Loading
Loading
Loading
+89 −65
Original line number Diff line number Diff line
@@ -179,9 +179,75 @@ recover:
	return ret;
}

static struct spdk_fd_group *
fd_group_get_root(struct spdk_fd_group *fgrp)
{
	while (fgrp->parent != NULL) {
		fgrp = fgrp->parent;
	}

	return fgrp;
}

static int
fd_group_change_parent(struct spdk_fd_group *fgrp, struct spdk_fd_group *old,
		       struct spdk_fd_group *new)
{
	struct spdk_fd_group *child, *tmp;
	int rc, ret;

	TAILQ_FOREACH(child, &fgrp->children, link) {
		ret = fd_group_change_parent(child, old, new);
		if (ret != 0) {
			goto recover_children;
		}
	}

	ret = _fd_group_del_all(old->epfd, fgrp);
	if (ret < 0) {
		goto recover_children;
	}

	assert(old->num_fds >= (uint32_t)ret);
	old->num_fds -= ret;

	ret = _fd_group_add_all(new->epfd, fgrp);
	if (ret < 0) {
		goto recover_epfd;
	}

	new->num_fds += ret;
	return 0;

recover_epfd:
	if (ret == -ENOTRECOVERABLE) {
		goto recover_children;
	}
	rc = _fd_group_add_all(old->epfd, fgrp);
	if (rc >= 0) {
		old->num_fds += rc;
	} else {
		SPDK_ERRLOG("Failed to recover epfd\n");
		ret = -ENOTRECOVERABLE;
	}
recover_children:
	TAILQ_FOREACH(tmp, &fgrp->children, link) {
		if (tmp == child) {
			break;
		}
		rc = fd_group_change_parent(tmp, new, old);
		if (rc != 0) {
			SPDK_ERRLOG("Failed to recover fd_group_change_parent\n");
			ret = -ENOTRECOVERABLE;
		}
	}
	return ret;
}

int
spdk_fd_group_unnest(struct spdk_fd_group *parent, struct spdk_fd_group *child)
{
	struct spdk_fd_group *root;
	int rc;

	if (parent == NULL || child == NULL) {
@@ -192,30 +258,24 @@ spdk_fd_group_unnest(struct spdk_fd_group *parent, struct spdk_fd_group *child)
		return -EINVAL;
	}

	rc = _fd_group_del_all(parent->epfd, child);
	if (rc < 0) {
	root = fd_group_get_root(parent);
	assert(root == parent || parent->num_fds == 0);

	rc = fd_group_change_parent(child, root, child);
	if (rc != 0) {
		return rc;
	} else {
		assert(parent->num_fds >= (uint32_t)rc);
		parent->num_fds -= rc;
	}

	child->parent = NULL;
	TAILQ_REMOVE(&parent->children, child, link);

	rc = _fd_group_add_all(child->epfd, child);
	if (rc < 0) {
		return rc;
	} else {
		child->num_fds += rc;
	}

	return 0;
}

int
spdk_fd_group_nest(struct spdk_fd_group *parent, struct spdk_fd_group *child)
{
	struct spdk_fd_group *root;
	int rc;

	if (parent == NULL || child == NULL) {
@@ -226,25 +286,15 @@ spdk_fd_group_nest(struct spdk_fd_group *parent, struct spdk_fd_group *child)
		return -EINVAL;
	}

	if (parent->parent) {
		/* More than one layer of nesting is currently not supported */
		assert(false);
		return -ENOTSUP;
	}

	rc = _fd_group_del_all(child->epfd, child);
	if (rc < 0) {
		return rc;
	} else {
		assert(child->num_fds >= (uint32_t)rc);
		child->num_fds -= rc;
	}
	/* The epoll instance at the root holds all fds, so either the parent is the root or it
	 * doesn't hold any fds.
	 */
	root = fd_group_get_root(parent);
	assert(root == parent || parent->num_fds == 0);

	rc =  _fd_group_add_all(parent->epfd, child);
	if (rc < 0) {
	rc = fd_group_change_parent(child, child, root);
	if (rc != 0) {
		return rc;
	} else {
		parent->num_fds += rc;
	}

	child->parent = parent;
@@ -342,8 +392,8 @@ spdk_fd_group_add_ext(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn, void *
	struct event_handler *ehdlr = NULL;
	struct epoll_event epevent = {0};
	struct spdk_event_handler_opts eh_opts = {};
	struct spdk_fd_group *root;
	int rc;
	int epfd;

	/* parameter checking */
	if (fgrp == NULL || efd < 0 || fn == NULL) {
@@ -376,15 +426,10 @@ spdk_fd_group_add_ext(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn, void *
	ehdlr->fd_type = eh_opts.fd_type;
	snprintf(ehdlr->name, sizeof(ehdlr->name), "%s", name);

	if (fgrp->parent) {
		epfd = fgrp->parent->epfd;
	} else {
		epfd = fgrp->epfd;
	}

	root = fd_group_get_root(fgrp);
	epevent.events = ehdlr->events;
	epevent.data.ptr = ehdlr;
	rc = epoll_ctl(epfd, EPOLL_CTL_ADD, efd, &epevent);
	rc = epoll_ctl(root->epfd, EPOLL_CTL_ADD, efd, &epevent);
	if (rc < 0) {
		SPDK_ERRLOG("Failed to add fd: %d to fd group(%p): %s\n",
			    efd, fgrp, strerror(errno));
@@ -393,11 +438,7 @@ spdk_fd_group_add_ext(struct spdk_fd_group *fgrp, int efd, spdk_fd_fn fn, void *
	}

	TAILQ_INSERT_TAIL(&fgrp->event_handlers, ehdlr, next);
	if (fgrp->parent) {
		fgrp->parent->num_fds++;
	} else {
		fgrp->num_fds++;
	}
	root->num_fds++;

	return 0;
}
@@ -406,8 +447,8 @@ void
spdk_fd_group_remove(struct spdk_fd_group *fgrp, int efd)
{
	struct event_handler *ehdlr;
	struct spdk_fd_group *root;
	int rc;
	int epfd;

	if (fgrp == NULL || efd < 0) {
		SPDK_ERRLOG("Cannot remove fd: %d from fd group(%p)\n", efd, fgrp);
@@ -428,27 +469,17 @@ spdk_fd_group_remove(struct spdk_fd_group *fgrp, int efd)
	}

	assert(ehdlr->state != EVENT_HANDLER_STATE_REMOVED);
	root = fd_group_get_root(fgrp);

	if (fgrp->parent) {
		epfd = fgrp->parent->epfd;
	} else {
		epfd = fgrp->epfd;
	}

	rc = epoll_ctl(epfd, EPOLL_CTL_DEL, ehdlr->fd, NULL);
	rc = epoll_ctl(root->epfd, EPOLL_CTL_DEL, ehdlr->fd, NULL);
	if (rc < 0) {
		SPDK_ERRLOG("Failed to remove fd: %d from fd group(%p): %s\n",
			    ehdlr->fd, fgrp, strerror(errno));
		return;
	}

	if (fgrp->parent) {
		assert(fgrp->parent->num_fds > 0);
		fgrp->parent->num_fds--;
	} else {
		assert(fgrp->num_fds > 0);
		fgrp->num_fds--;
	}
	assert(root->num_fds > 0);
	root->num_fds--;
	TAILQ_REMOVE(&fgrp->event_handlers, ehdlr, next);

	/* Delay ehdlr's free in case it is waiting for execution in fgrp wait loop */
@@ -465,7 +496,6 @@ spdk_fd_group_event_modify(struct spdk_fd_group *fgrp,
{
	struct epoll_event epevent;
	struct event_handler *ehdlr;
	int epfd;

	if (fgrp == NULL || efd < 0) {
		return -EINVAL;
@@ -485,16 +515,10 @@ spdk_fd_group_event_modify(struct spdk_fd_group *fgrp,

	ehdlr->events = event_types;

	if (fgrp->parent) {
		epfd = fgrp->parent->epfd;
	} else {
		epfd = fgrp->epfd;
	}

	epevent.events = ehdlr->events;
	epevent.data.ptr = ehdlr;

	return epoll_ctl(epfd, EPOLL_CTL_MOD, ehdlr->fd, &epevent);
	return epoll_ctl(fd_group_get_root(fgrp)->epfd, EPOLL_CTL_MOD, ehdlr->fd, &epevent);
}

int
+151 −0
Original line number Diff line number Diff line
@@ -139,6 +139,156 @@ test_fd_group_nest_unnest(void)
	spdk_fd_group_destroy(not_parent);
}

struct ut_fgrp {
	struct spdk_fd_group	*fgrp;
	size_t			num_fds;
#define UT_MAX_FDS 4
	int			fd[UT_MAX_FDS];
};

static void
test_fd_group_multi_nest(void)
{
	struct ut_fgrp fgrp[] = {
		{ .num_fds = 1 },
		{ .num_fds = 2 },
		{ .num_fds = 2 },
		{ .num_fds = 3 },
	};
	size_t i, j;
	int fd, rc;

	/* Create four fd_groups with the folowing hierarchy:
	 *           fgrp[0]
	 *           (fd:0)
	 *              |
	 *  fgrp[1]-----+-----fgrp[2]
	 * (fd:1,2)          (fd:3,4)
	 *     |
	 *  fgrp[3]
	 * (fd:5,6,7)
	 */
	for (i = 0; i < SPDK_COUNTOF(fgrp); i++) {
		rc = spdk_fd_group_create(&fgrp[i].fgrp);
		SPDK_CU_ASSERT_FATAL(rc == 0);
		for (j = 0; j < fgrp[i].num_fds; j++) {
			fgrp[i].fd[j] = fd = eventfd(0, 0);
			CU_ASSERT(fd >= 0);
			rc = SPDK_FD_GROUP_ADD(fgrp[i].fgrp, fd, fd_group_cb_fn, NULL);
			CU_ASSERT_EQUAL(rc, 0);
		}
	}

	CU_ASSERT_EQUAL(fgrp[0].fgrp->num_fds, fgrp[0].num_fds);
	CU_ASSERT_EQUAL(fgrp[1].fgrp->num_fds, fgrp[1].num_fds);
	CU_ASSERT_EQUAL(fgrp[2].fgrp->num_fds, fgrp[2].num_fds);
	CU_ASSERT_EQUAL(fgrp[3].fgrp->num_fds, fgrp[3].num_fds);

	rc = spdk_fd_group_nest(fgrp[0].fgrp, fgrp[2].fgrp);
	CU_ASSERT_EQUAL(rc, 0);
	rc = spdk_fd_group_nest(fgrp[1].fgrp, fgrp[3].fgrp);
	CU_ASSERT_EQUAL(rc, 0);
	rc = spdk_fd_group_nest(fgrp[0].fgrp, fgrp[1].fgrp);
	CU_ASSERT_EQUAL(rc, 0);

	CU_ASSERT_PTR_EQUAL(fgrp[0].fgrp->parent, NULL);
	CU_ASSERT_PTR_EQUAL(fgrp[1].fgrp->parent, fgrp[0].fgrp);
	CU_ASSERT_PTR_EQUAL(fgrp[2].fgrp->parent, fgrp[0].fgrp);
	CU_ASSERT_PTR_EQUAL(fgrp[3].fgrp->parent, fgrp[1].fgrp);
	CU_ASSERT_EQUAL(fgrp[0].fgrp->num_fds,
			fgrp[0].num_fds + fgrp[1].num_fds +
			fgrp[2].num_fds + fgrp[3].num_fds);
	CU_ASSERT_EQUAL(fgrp[1].fgrp->num_fds, 0);
	CU_ASSERT_EQUAL(fgrp[2].fgrp->num_fds, 0);
	CU_ASSERT_EQUAL(fgrp[3].fgrp->num_fds, 0);

	/* Unnest fgrp[1] and verify that it now owns its own fds along with fgrp[3] fds */
	rc = spdk_fd_group_unnest(fgrp[0].fgrp, fgrp[1].fgrp);
	CU_ASSERT_EQUAL(rc, 0);

	CU_ASSERT_PTR_EQUAL(fgrp[0].fgrp->parent, NULL);
	CU_ASSERT_PTR_EQUAL(fgrp[1].fgrp->parent, NULL);
	CU_ASSERT_PTR_EQUAL(fgrp[2].fgrp->parent, fgrp[0].fgrp);
	CU_ASSERT_PTR_EQUAL(fgrp[3].fgrp->parent, fgrp[1].fgrp);
	CU_ASSERT_EQUAL(fgrp[0].fgrp->num_fds, fgrp[0].num_fds + fgrp[2].num_fds);
	CU_ASSERT_EQUAL(fgrp[1].fgrp->num_fds, fgrp[1].num_fds + fgrp[3].num_fds);
	CU_ASSERT_EQUAL(fgrp[2].fgrp->num_fds, 0);
	CU_ASSERT_EQUAL(fgrp[3].fgrp->num_fds, 0);

	/* Nest it again, keeping the same configuration */
	rc = spdk_fd_group_nest(fgrp[0].fgrp, fgrp[1].fgrp);
	CU_ASSERT_EQUAL(rc, 0);
	CU_ASSERT_PTR_EQUAL(fgrp[0].fgrp->parent, NULL);
	CU_ASSERT_PTR_EQUAL(fgrp[1].fgrp->parent, fgrp[0].fgrp);
	CU_ASSERT_PTR_EQUAL(fgrp[2].fgrp->parent, fgrp[0].fgrp);
	CU_ASSERT_PTR_EQUAL(fgrp[3].fgrp->parent, fgrp[1].fgrp);
	CU_ASSERT_EQUAL(fgrp[0].fgrp->num_fds,
			fgrp[0].num_fds + fgrp[1].num_fds +
			fgrp[2].num_fds + fgrp[3].num_fds);
	CU_ASSERT_EQUAL(fgrp[1].fgrp->num_fds, 0);
	CU_ASSERT_EQUAL(fgrp[2].fgrp->num_fds, 0);
	CU_ASSERT_EQUAL(fgrp[3].fgrp->num_fds, 0);

	/* Add a new fd to the fgrp at the bottom, fgrp[3] */
	fgrp[3].fd[fgrp[3].num_fds++] = fd = eventfd(0, 0);
	rc = SPDK_FD_GROUP_ADD(fgrp[3].fgrp, fd, fd_group_cb_fn, NULL);
	CU_ASSERT_EQUAL(rc, 0);
	CU_ASSERT_EQUAL(fgrp[0].fgrp->num_fds,
			fgrp[0].num_fds + fgrp[1].num_fds +
			fgrp[2].num_fds + fgrp[3].num_fds);
	CU_ASSERT_EQUAL(fgrp[1].fgrp->num_fds, 0);
	CU_ASSERT_EQUAL(fgrp[2].fgrp->num_fds, 0);
	CU_ASSERT_EQUAL(fgrp[3].fgrp->num_fds, 0);

	/* Remove one of the fds from fgrp[2] */
	fd = fgrp[2].fd[--fgrp[2].num_fds];
	spdk_fd_group_remove(fgrp[2].fgrp, fd);
	close(fd);
	CU_ASSERT_EQUAL(fgrp[0].fgrp->num_fds,
			fgrp[0].num_fds + fgrp[1].num_fds +
			fgrp[2].num_fds + fgrp[3].num_fds);
	CU_ASSERT_EQUAL(fgrp[1].fgrp->num_fds, 0);
	CU_ASSERT_EQUAL(fgrp[2].fgrp->num_fds, 0);
	CU_ASSERT_EQUAL(fgrp[3].fgrp->num_fds, 0);

	/* Unnest the fgrp at the bottom, fgrp[3] */
	rc = spdk_fd_group_unnest(fgrp[1].fgrp, fgrp[3].fgrp);
	CU_ASSERT_EQUAL(rc, 0);
	CU_ASSERT_PTR_EQUAL(fgrp[0].fgrp->parent, NULL);
	CU_ASSERT_PTR_EQUAL(fgrp[1].fgrp->parent, fgrp[0].fgrp);
	CU_ASSERT_PTR_EQUAL(fgrp[2].fgrp->parent, fgrp[0].fgrp);
	CU_ASSERT_PTR_EQUAL(fgrp[3].fgrp->parent, NULL);
	CU_ASSERT_EQUAL(fgrp[0].fgrp->num_fds, fgrp[0].num_fds + fgrp[1].num_fds + fgrp[2].num_fds);
	CU_ASSERT_EQUAL(fgrp[1].fgrp->num_fds, 0);
	CU_ASSERT_EQUAL(fgrp[2].fgrp->num_fds, 0);
	CU_ASSERT_EQUAL(fgrp[3].fgrp->num_fds, fgrp[3].num_fds);

	/* Unnest the remaining fgrps, fgrp[1] and fgrp[2] */
	rc = spdk_fd_group_unnest(fgrp[0].fgrp, fgrp[1].fgrp);
	CU_ASSERT_EQUAL(rc, 0);
	CU_ASSERT_PTR_EQUAL(fgrp[0].fgrp->parent, NULL);
	CU_ASSERT_PTR_EQUAL(fgrp[1].fgrp->parent, NULL);
	CU_ASSERT_PTR_EQUAL(fgrp[2].fgrp->parent, fgrp[0].fgrp);
	CU_ASSERT_EQUAL(fgrp[0].fgrp->num_fds, fgrp[0].num_fds + fgrp[2].num_fds);
	CU_ASSERT_EQUAL(fgrp[1].fgrp->num_fds, fgrp[1].num_fds);
	CU_ASSERT_EQUAL(fgrp[2].fgrp->num_fds, 0);

	rc = spdk_fd_group_unnest(fgrp[0].fgrp, fgrp[2].fgrp);
	CU_ASSERT_EQUAL(rc, 0);
	CU_ASSERT_PTR_EQUAL(fgrp[0].fgrp->parent, NULL);
	CU_ASSERT_PTR_EQUAL(fgrp[2].fgrp->parent, NULL);;
	CU_ASSERT_EQUAL(fgrp[0].fgrp->num_fds, fgrp[0].num_fds);
	CU_ASSERT_EQUAL(fgrp[2].fgrp->num_fds, fgrp[2].num_fds);

	for (i = 0; i < SPDK_COUNTOF(fgrp); i++) {
		for (j = 0; j < fgrp[i].num_fds; j++) {
			spdk_fd_group_remove(fgrp[i].fgrp, fgrp[i].fd[j]);
			close(fgrp[i].fd[j]);
		}
		spdk_fd_group_destroy(fgrp[i].fgrp);
	}
}

int
main(int argc, char **argv)
{
@@ -151,6 +301,7 @@ main(int argc, char **argv)

	CU_ADD_TEST(suite, test_fd_group_basic);
	CU_ADD_TEST(suite, test_fd_group_nest_unnest);
	CU_ADD_TEST(suite, test_fd_group_multi_nest);

	num_failures = spdk_ut_run_tests(argc, argv, NULL);