Commit fb12887f authored by Curt Bruns's avatar Curt Bruns Committed by Tomasz Zawadzki
Browse files

test: add multi-process Async Event Report test



Modified the existing nvme aer test to include a multi-process
option that verifies that two processes will receive an async
event notification. Also added the multi-process aer test to
the CI test suite.

Signed-off-by: default avatarCurt Bruns <curt.e.bruns@gmail.com>
Change-Id: I08731fad317d43dcfb1766d22a3f4c6aa1738d2a
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/12293


Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarPaul Luse <paul.e.luse@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
parent 0b9100e8
Loading
Loading
Loading
Loading
+227 −24
Original line number Diff line number Diff line
@@ -40,6 +40,13 @@ static char *g_touch_file;
static int g_enable_temp_test = 0;
/* Expected changed NS ID */
static uint32_t g_expected_ns_test = 0;
/* For multi-process test */
static int g_multi_process_test = 0;
static bool g_parent_process;
static const char *g_sem_init_name = "/init";
static const char *g_sem_child_name = "/child";
static sem_t *g_sem_init_id;
static sem_t *g_sem_child_id;

static void
set_temp_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl)
@@ -72,6 +79,9 @@ set_temp_threshold(struct dev *dev, uint32_t temp)
	rc = spdk_nvme_ctrlr_cmd_admin_raw(dev->ctrlr, &cmd, NULL, 0, set_temp_completion, dev);
	if (rc == 0) {
		g_outstanding_commands++;
	} else {
		fprintf(stderr, "Submitting Admin cmd failed with rc: %d (%s)\n", \
			rc, (g_parent_process ? "Parent" : "Child"));
	}

	return rc;
@@ -180,26 +190,48 @@ cleanup(void)
static void
aer_cb(void *arg, const struct spdk_nvme_cpl *cpl)
{
	uint32_t log_page_id = (cpl->cdw0 & 0xFF0000) >> 16;
	struct dev				*dev = arg;
	uint32_t				log_page_id;
	uint32_t				aen_event_info;
	uint32_t				aen_event_type;
	union spdk_nvme_async_event_completion	aen_cpl;

	aen_cpl.raw = cpl->cdw0;
	aen_event_info = aen_cpl.bits.async_event_info;
	aen_event_type = aen_cpl.bits.async_event_type;
	log_page_id = aen_cpl.bits.log_page_identifier;

	if (spdk_nvme_cpl_is_error(cpl)) {
		printf("%s: AER failed\n", dev->name);
		fprintf(stderr, "%s: AER failed\n", dev->name);
		g_failed = 1;
		return;
	}

	printf("%s: aer_cb for log page %d\n", dev->name, log_page_id);

	if (log_page_id == SPDK_NVME_LOG_HEALTH_INFORMATION) {
		/* Set the temperature threshold back to the original value
		 * so the AER doesn't trigger again.
	printf("%s: aer_cb for log page %d, aen_event_type: 0x%02x, aen_event_info: 0x%02x (%s)\n", \
	       dev->name, log_page_id, aen_event_type, aen_event_info, \
	       (g_parent_process ? "Parent" : "Child"));
	/* Temp Test: Verify proper EventType, Event Info and Log Page.
	 * NOTE: QEMU NVMe controllers return Spare Below Threshold Status event info
	 * instead of Temperate Threshold even info which is why it's used in the check
	 * below.
	 */
	if ((log_page_id == SPDK_NVME_LOG_HEALTH_INFORMATION) && \
	    (aen_event_type == SPDK_NVME_ASYNC_EVENT_TYPE_SMART) && \
	    ((aen_event_info == SPDK_NVME_ASYNC_EVENT_TEMPERATURE_THRESHOLD) || \
	     (aen_event_info == SPDK_NVME_ASYNC_EVENT_SPARE_BELOW_THRESHOLD))) {
		/* Set the temperature threshold back to the original value to stop triggering  */
		printf("aer_cb - Resetting Temp Threshold for device: %s (%s)\n", \
		       dev->name, (g_parent_process ? "Parent" : "Child"));
		set_temp_threshold(dev, dev->orig_temp_threshold);
		get_health_log_page(dev);
	} else if (log_page_id == SPDK_NVME_LOG_CHANGED_NS_LIST) {
		printf("aer_cb - Changed Namespace (%s)\n", \
		       (g_parent_process ? "Parent" : "Child"));
		get_ns_state_test(dev, g_expected_ns_test);
		g_aer_done++;
	} else {
		printf("aer_cb - Unknown Log Page (%s)\n", \
		       (g_parent_process ? "Parent" : "Child"));
	}
}

@@ -225,7 +257,8 @@ usage(const char *program_name)

	spdk_log_usage(stdout, "-L");

	printf(" -v         verbose (enable warnings)\n");
	printf(" -i <id>    shared memory group ID\n");
	printf(" -m         Multi-Process AER Test (only with Temp Test)\n");
	printf(" -H         show this usage\n");
}

@@ -238,7 +271,7 @@ parse_args(int argc, char **argv, struct spdk_env_opts *env_opts)
	spdk_nvme_trid_populate_transport(&g_trid, SPDK_NVME_TRANSPORT_PCIE);
	snprintf(g_trid.subnqn, sizeof(g_trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN);

	while ((op = getopt(argc, argv, "n:gr:t:HL:T")) != -1) {
	while ((op = getopt(argc, argv, "gi:mn:r:t:HL:T")) != -1) {
		switch (op) {
		case 'n':
			val = spdk_strtol(optarg, 10);
@@ -277,6 +310,16 @@ parse_args(int argc, char **argv, struct spdk_env_opts *env_opts)
		case 'H':
			usage(argv[0]);
			exit(EXIT_SUCCESS);
		case 'i':
			env_opts->shm_id = spdk_strtol(optarg, 10);
			if (env_opts->shm_id < 0) {
				fprintf(stderr, "Invalid shared memory ID\n");
				return env_opts->shm_id;
			}
			break;
		case 'm':
			g_multi_process_test = 1;
			break;
		default:
			usage(argv[0]);
			return 1;
@@ -364,7 +407,8 @@ spdk_aer_temperature_test(void)
{
	struct dev *dev;

	printf("Getting temperature thresholds of all controllers...\n");
	printf("Getting orig temperature thresholds of all controllers (%s)\n",
	       (g_parent_process ? "Parent" : "Child"));
	foreach_dev(dev) {
		/* Get the original temperature threshold */
		get_temp_threshold(dev);
@@ -391,16 +435,38 @@ spdk_aer_temperature_test(void)
		return g_failed;
	}

	printf("Waiting for all controllers to trigger AER...\n");
	/* Only single process needs to set and verify lower threshold */
	if (g_parent_process) {
		/* Wait until child has init'd and ready for test to continue */
		if (g_multi_process_test) {
			sem_wait(g_sem_child_id);
		}
		printf("Setting all controllers temperature threshold low to trigger AER\n");
		foreach_dev(dev) {
			/* Set the temperature threshold to a low value */
			set_temp_threshold(dev, 200);
		}

		printf("Waiting for all controllers temperature threshold to be set lower\n");
		while (!g_failed && (g_temperature_done < g_num_devs)) {
			foreach_dev(dev) {
				spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
			}
		}
		g_temperature_done = 0;

		if (g_failed) {
			return g_failed;
		}
	}

	printf("Waiting for all controllers to trigger AER and reset threshold (%s)\n",
	       (g_parent_process ? "Parent" : "Child"));
	/* Let parent know init is done and it's okay to continue */
	if (!g_parent_process) {
		sem_post(g_sem_child_id);
	}
	/* Waiting for AEN to be occur here */
	while (!g_failed && (g_aer_done < g_num_devs || g_temperature_done < g_num_devs)) {
		foreach_dev(dev) {
			spdk_nvme_ctrlr_process_admin_completions(dev->ctrlr);
@@ -445,6 +511,50 @@ spdk_aer_changed_ns_test(void)
	return 0;
}

static int
setup_multi_process(void)
{
	pid_t pid;
	int rc = 0;

	/* If AEN test was killed, remove named semaphore to start again */
	rc = sem_unlink(g_sem_init_name);
	if (rc < 0 && errno != ENOENT) {
		fprintf(stderr, "Init semaphore removal failure: %s", spdk_strerror(errno));
		return rc;
	}
	rc = sem_unlink(g_sem_child_name);
	if (rc < 0 && errno != ENOENT) {
		fprintf(stderr, "Child semaphore removal failure: %s", spdk_strerror(errno));
		return rc;
	}
	pid = fork();
	if (pid == -1) {
		perror("Failed to fork\n");
		return -1;
	} else if (pid == 0) {
		printf("Child process pid: %d\n", getpid());
		g_parent_process = false;
		g_sem_init_id = sem_open(g_sem_init_name, O_CREAT, 0600, 0);
		g_sem_child_id = sem_open(g_sem_child_name, O_CREAT, 0600, 0);
		if ((g_sem_init_id == SEM_FAILED) || (g_sem_child_id == SEM_FAILED)) {
			fprintf(stderr, "Sem Open failed for child: %s\n", spdk_strerror(errno));
			return -1;
		}
	}
	/* Parent process */
	else {
		g_parent_process = true;
		g_sem_init_id = sem_open(g_sem_init_name, O_CREAT, 0600, 0);
		g_sem_child_id = sem_open(g_sem_child_name, O_CREAT, 0600, 0);
		if ((g_sem_init_id == SEM_FAILED) || (g_sem_child_id == SEM_FAILED)) {
			fprintf(stderr, "Sem Open failed for parent: %s\n", spdk_strerror(errno));
			return -1;
		}
	}
	return 0;
}

int main(int argc, char **argv)
{
	struct dev		*dev;
@@ -452,30 +562,83 @@ int main(int argc, char **argv)
	int			rc;
	struct spdk_nvme_detach_ctx *detach_ctx = NULL;

	spdk_env_opts_init(&opts);
	rc = parse_args(argc, argv, &opts);
	if (rc != 0) {
		return rc;
	}

	spdk_env_opts_init(&opts);
	if (g_multi_process_test)  {
		/* Multi-Process test only available with Temp Test */
		if (!g_enable_temp_test) {
			fprintf(stderr, "Multi Process test only available with Temp Test (-T)\n");
			return 1;
		}
		if (opts.shm_id < 0) {
			fprintf(stderr, "Multi Process requires shared memory id (-i <id>)\n");
			return 1;
		}
		rc = setup_multi_process();
		if (rc != 0) {
			fprintf(stderr, "Multi Process test failed to setup\n");
			return rc;
		}
	} else {
		/* Only one process in test, set it to the parent process */
		g_parent_process = true;
	}
	opts.name = "aer";
	if (g_parent_process) {
		opts.core_mask = "0x1";
	} else {
		opts.core_mask = "0x2";
	}

	/*
	 * For multi-process test, parent (primary) and child (secondary) processes
	 * will execute all following code but DPDK setup is serialized
	 */
	if (!g_parent_process) {
		if (sem_wait(g_sem_init_id) < 0) {
			fprintf(stderr, "sem_wait failed for child process\n");
			return (-1);
		}
	}
	if (spdk_env_init(&opts) < 0) {
		fprintf(stderr, "Unable to initialize SPDK env\n");
		return 1;
	}

	printf("Asynchronous Event Request test\n");
	printf("Asynchronous Event Request test (%s)\n",
	       (g_parent_process ? "Parent" : "Child"));

	if (spdk_nvme_probe(&g_trid, NULL, probe_cb, attach_cb, NULL) != 0) {
		fprintf(stderr, "spdk_nvme_probe() failed\n");
		return 1;
	}

	if (g_num_devs == 0) {
		fprintf(stderr, "No controllers found - exiting\n");
		g_failed = 1;
	}
	if (g_failed) {
		goto done;
	}

	if (g_parent_process && g_enable_temp_test) {
		printf("Reset controller to setup AER completions for this process\n");
		foreach_dev(dev) {
			if (spdk_nvme_ctrlr_reset(dev->ctrlr) < 0) {
				fprintf(stderr, "nvme reset failed.\n");
				return -1;
			}
		}
	}
	if (g_parent_process && g_multi_process_test) {
		/* Primary can release child/secondary for init now */
		sem_post(g_sem_init_id);
	}

	printf("Registering asynchronous event callbacks...\n");
	foreach_dev(dev) {
		spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, aer_cb, dev);
@@ -507,7 +670,7 @@ int main(int argc, char **argv)
		}
	}

	printf("Cleaning up...\n");
	printf("Cleaning up...(%s)\n", (g_parent_process ? "Parent" : "Child"));

	while (g_outstanding_commands) {
		foreach_dev(dev) {
@@ -515,6 +678,11 @@ int main(int argc, char **argv)
		}
	}

	/* Only one process cleans up at a time - let child go first */
	if (g_multi_process_test && g_parent_process) {
		/* Parent waits for child to clean up before executing clean up process */
		sem_wait(g_sem_child_id);
	}
	/* unregister AER callback so we don't fail on aborted AERs when we close out qpairs. */
	foreach_dev(dev) {
		spdk_nvme_ctrlr_register_aer_callback(dev->ctrlr, NULL, NULL);
@@ -528,8 +696,43 @@ int main(int argc, char **argv)
		spdk_nvme_detach_poll(detach_ctx);
	}

	/* Release semaphore to allow parent to cleanup */
	if (!g_parent_process) {
		sem_post(g_sem_child_id);
		sem_wait(g_sem_init_id);
	}
done:
	cleanup();

	/* Wait for child process to finish and verify it finished correctly before detaching resources */
	if (g_multi_process_test && g_parent_process) {
		int status;
		sem_post(g_sem_init_id);
		wait(&status);
		if (WIFEXITED(status)) {
			/* Child ended normally */
			if (WEXITSTATUS(status) != 0) {
				fprintf(stderr, "Child Failed with status: %d.\n", (int8_t)(WEXITSTATUS(status)));
				g_failed = true;
			}
		}
		if (sem_close(g_sem_init_id) != 0) {
			perror("sem_close Failed for init\n");
			g_failed = true;
		}
		if (sem_close(g_sem_child_id) != 0) {
			perror("sem_close Failed for child\n");
			g_failed = true;
		}

		if (sem_unlink(g_sem_init_name) != 0) {
			perror("sem_unlink Failed for init\n");
			g_failed = true;
		}
		if (sem_unlink(g_sem_child_name) != 0) {
			perror("sem_unlink Failed for child\n");
			g_failed = true;
		}
	}
	return g_failed;
}
+2 −0
Original line number Diff line number Diff line
@@ -134,8 +134,10 @@ run_test "nvme_reserve" $testdir/reserve/reserve
run_test "nvme_err_injection" $testdir/err_injection/err_injection
run_test "nvme_overhead" $testdir/overhead/overhead -s 4096 -t 1 -H -i 0
run_test "nvme_arbitration" $SPDK_EXAMPLE_DIR/arbitration -t 3 -i 0
run_test "nvme_single_aen" $testdir/aer/aer -T -i 0 -L log

if [ $(uname) != "FreeBSD" ]; then
	run_test "nvme_multi_aen" $testdir/aer/aer -m -T -i 0 -L log
	run_test "nvme_startup" $testdir/startup/startup -t 1000000
	run_test "nvme_multi_secondary" nvme_multi_secondary
	trap - SIGINT SIGTERM EXIT