Commit ae89daab authored by Jim Harris's avatar Jim Harris
Browse files

trace: add concept of "owner" to trace files



An "owner" is now represented by a textual string describing the owner of
a trace event.

A trace file can have up to 16K outstanding owners at any given time. We need
this many to be able to track all of the various NVMe-oF qpairs, bdevs,
NVMe driver qpairs, accel channels, etc. A library creating trace events
can call spdk_trace_register_owner() with a description string to allocate a
a 16-bit owner_id. This owner_id is then passed as a parameter to
spdk_trace_record().

Libraries can change the description of a registered owner by calling
spdk_trace_owner_set_description(). This can be useful for cases like NVMe-oF,
where where a newly established qpair may only have a source IP/port, but later
can be changed to also add the subsystem nqn once the CONNECT command arrives.

A library is responsible for calling spdk_trace_unregister_owner() once the
associated object is destroyed (i.e. a qpair disconnects or a bdev is
unregistered).

An owner lifetime is tracked by recording the tsc when the owner was
registered. This allows the trace app to handle the case where an owner_id
gets reused during the span of a trace file.

Signed-off-by: default avatarJim Harris <jim.harris@samsung.com>
Change-Id: I79c9979a0769c32a85ee646c8237fe8329993c45
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/22646


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: default avatarShuhei Matsumoto <smatsumoto@nvidia.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
parent 57c2d6d3
Loading
Loading
Loading
Loading
+23 −4
Original line number Diff line number Diff line
@@ -442,7 +442,9 @@ trace_files_aggregate(struct aggr_trace_record_ctx *ctx)
	int rc, i;
	ssize_t len = 0;
	uint64_t current_offset;
	uint64_t owner_offset, owner_size;
	uint64_t len_sum;
	uint8_t *owner_buf;

	ctx->out_fd = open(ctx->out_file, flags, 0600);
	if (ctx->out_fd < 0) {
@@ -465,6 +467,10 @@ trace_files_aggregate(struct aggr_trace_record_ctx *ctx)
			lcore_offsets[i] = 0;
		}
	}
	owner_size = (uint64_t)ctx->trace_file->num_owners *
		     (sizeof(struct spdk_trace_owner) + ctx->trace_file->owner_description_size);
	owner_offset = current_offset;
	current_offset += owner_size;

	/* Write size of converged trace file */
	rc = cont_write(ctx->out_fd, &current_offset, sizeof(ctx->trace_file->file_size));
@@ -476,7 +482,7 @@ trace_files_aggregate(struct aggr_trace_record_ctx *ctx)
	/* Write rest of metadata (spdk_trace_file) of converged trace file */
	rc = cont_write(ctx->out_fd, &ctx->trace_file->tsc_rate,
			sizeof(struct spdk_trace_file) - sizeof(lcore_offsets) -
			sizeof(ctx->trace_file->file_size));
			sizeof(owner_offset) - sizeof(ctx->trace_file->file_size));
	if (rc < 0) {
		fprintf(stderr, "Failed to write metadata into trace file\n");
		goto out;
@@ -489,6 +495,13 @@ trace_files_aggregate(struct aggr_trace_record_ctx *ctx)
		goto out;
	}

	/* Write owner_offset of converged trace file */
	rc = cont_write(ctx->out_fd, &owner_offset, sizeof(owner_offset));
	if (rc < 0) {
		fprintf(stderr, "Failed to write owner_description_size into trace file\n");
		goto out;
	}

	/* Append each lcore trace file into converged trace file */
	for (i = 0; i < SPDK_TRACE_MAX_LCORE; i++) {
		lcore_port = &ctx->lcore_ports[i];
@@ -521,15 +534,21 @@ trace_files_aggregate(struct aggr_trace_record_ctx *ctx)
			}
		}

		/* Clear rc so that the last cont_write() doesn't get interpreted as a failure. */
		rc = 0;

		if (len_sum != lcore_port->num_entries * sizeof(struct spdk_trace_entry)) {
			fprintf(stderr, "Len of lcore trace file doesn't match number of entries for lcore\n");
		}
	}

	/* Append owner data into converged trace file */
	owner_buf = (uint8_t *)ctx->trace_file + ctx->trace_file->owner_offset;
	rc = cont_write(ctx->out_fd, owner_buf, owner_size);
	if (rc < 0) {
		fprintf(stderr, "Failed to write owner_data into trace file\n");
		goto out;
	}

	printf("All lcores trace entries are aggregated into trace file %s\n", ctx->out_file);
	return 0;

out:
	close(ctx->out_fd);
+69 −0
Original line number Diff line number Diff line
@@ -47,6 +47,12 @@ struct spdk_trace_owner_type {
	char	id_prefix;
};

struct spdk_trace_owner {
	uint64_t	tsc;
	uint8_t		type;
	char		description[];
} __attribute__((packed));

/* If type changes from a uint8_t, change this value. */
#define SPDK_TRACE_MAX_OBJECT (UCHAR_MAX + 1)

@@ -125,9 +131,16 @@ struct spdk_trace_file {
	struct spdk_trace_object	object[UCHAR_MAX + 1];
	struct spdk_trace_tpoint	tpoint[SPDK_TRACE_MAX_TPOINT_ID];

	uint16_t			num_owners;
	uint16_t			owner_description_size;
	uint8_t				reserved[4];

	/** Offset of each trace_history from the beginning of this data structure. */
	uint64_t			lcore_history_offsets[SPDK_TRACE_MAX_LCORE];

	/** Offset of beginning of struct spdk_trace_owner data. */
	uint64_t			owner_offset;

	/** Variable sized data sections are at the end of this data structure,
	 *  referenced by offsets defined in this structure.
	 */
@@ -164,6 +177,20 @@ spdk_get_per_lcore_history(struct spdk_trace_file *trace_file, unsigned lcore)
	return (struct spdk_trace_history *)(((char *)trace_file) + lcore_history_offset);
}

static inline struct spdk_trace_owner *
spdk_get_trace_owner(const struct spdk_trace_file *trace_file, uint16_t owner_id)
{
	uint64_t owner_size;

	if (owner_id >= trace_file->num_owners) {
		return NULL;
	}

	owner_size = sizeof(struct spdk_trace_owner) + trace_file->owner_description_size;
	return (struct spdk_trace_owner *)
	       (((char *)trace_file) + trace_file->owner_offset + owner_id * owner_size);
}

void _spdk_trace_record(uint64_t tsc, uint16_t tpoint_id, uint16_t poller_id,
			uint32_t size, uint64_t object_id, int num_args, ...);

@@ -309,6 +336,48 @@ void spdk_trace_cleanup(void);
 */
void spdk_trace_register_owner_type(uint8_t type, char id_prefix);

/**
 * Register the trace owner.
 *
 * \param owner_type Type of the owner being registered.
 * \param description Textual string describing the trace owner.
 * \return Allocated owner_id for the newly registered owner. Returns 0 if
 *         no trace_id could be allocated.
 */
uint16_t spdk_trace_register_owner(uint8_t owner_type, const char *description);

/**
 * Change the description for a previously registered owner.
 *
 * \param owner_id ID of previously registered owner.
 * \param description New description for the owner.
 */
void spdk_trace_owner_set_description(uint16_t owner_id, const char *description);

/**
 * Append to the description for a previously registered owner.
 *
 * A space will be inserted before the appended string.
 *
 * This may be useful for modules that are modifying an existing description
 * with additional information.
 *
 * Callers may pass 0 safely, in this case the function will just be a nop.
 *
 * \param owner_id ID of previously registered owner.
 * \param description Additional description for the owner
 */
void spdk_trace_owner_append_description(uint16_t owner_id, const char *description);

/**
 * Unregister a previously registered owner.
 *
 * Callers may pass 0 safely, in this case the function will just be a nop.
 *
 * \param owner_id ID of previously registered owner.
 */
void spdk_trace_unregister_owner(uint16_t owner_id);

/**
 * Register the trace object.
 *
+4 −0
Original line number Diff line number Diff line
@@ -12,6 +12,10 @@
	spdk_trace_init;
	spdk_trace_cleanup;
	spdk_trace_register_owner_type;
	spdk_trace_register_owner;
	spdk_trace_owner_set_description;
	spdk_trace_owner_append_description;
	spdk_trace_unregister_owner;
	spdk_trace_register_object;
	spdk_trace_register_description;
	spdk_trace_register_description_ext;
+13 −0
Original line number Diff line number Diff line
@@ -27,6 +27,12 @@ struct spdk_trace_file *g_trace_file;
struct spdk_bit_array *g_ut_array;
pthread_mutex_t g_ut_array_mutex;

#define TRACE_NUM_OWNERS (16 * 1024)
#define TRACE_OWNER_DESCRIPTION_SIZE (119)
SPDK_STATIC_ASSERT(sizeof(struct spdk_trace_owner) == 9, "incorrect size");
SPDK_STATIC_ASSERT(sizeof(struct spdk_trace_owner) + TRACE_OWNER_DESCRIPTION_SIZE == 128,
		   "incorrect size");

static inline struct spdk_trace_entry *
get_trace_entry(struct spdk_trace_history *history, uint64_t offset)
{
@@ -231,6 +237,7 @@ spdk_trace_init(const char *shm_name, uint64_t num_entries, uint32_t num_threads
	uint32_t i = 0, max_dedicated_cpu = 0;
	uint64_t file_size;
	uint64_t lcore_offsets[SPDK_TRACE_MAX_LCORE] = { 0 };
	uint64_t owner_offset;
	struct spdk_cpuset cpuset = {};

	/* 0 entries requested - skip trace initialization */
@@ -270,6 +277,9 @@ spdk_trace_init(const char *shm_name, uint64_t num_entries, uint32_t num_threads
		lcore_offsets[i] = file_size;
		file_size += spdk_get_trace_history_size(num_entries);
	}
	owner_offset = file_size;
	file_size += TRACE_NUM_OWNERS *
		     (sizeof(struct spdk_trace_owner) + TRACE_OWNER_DESCRIPTION_SIZE);

	snprintf(g_shm_name, sizeof(g_shm_name), "%s", shm_name);

@@ -328,6 +338,9 @@ spdk_trace_init(const char *shm_name, uint64_t num_entries, uint32_t num_threads
		lcore_history->num_entries = num_entries;
	}
	g_trace_file->file_size = file_size;
	g_trace_file->num_owners = TRACE_NUM_OWNERS;
	g_trace_file->owner_description_size = TRACE_OWNER_DESCRIPTION_SIZE;
	g_trace_file->owner_offset = owner_offset;

	if (trace_flags_init()) {
		goto trace_init_err;
+157 −1
Original line number Diff line number Diff line
@@ -10,8 +10,16 @@
#include "spdk/log.h"
#include "spdk/util.h"
#include "trace_internal.h"
#include "spdk/bit_array.h"

static struct spdk_trace_register_fn *g_reg_fn_head = NULL;
static struct {
	uint16_t *ring;
	uint32_t head;
	uint32_t tail;
	uint32_t size;
	pthread_spinlock_t lock;
} g_owner_ids;

SPDK_LOG_REGISTER_COMPONENT(trace)

@@ -262,6 +270,119 @@ spdk_trace_register_owner_type(uint8_t type, char id_prefix)
	owner_type->id_prefix = id_prefix;
}

static void
_owner_set_description(uint16_t owner_id, const char *description, bool append)
{
	struct spdk_trace_owner *owner;
	char old[256] = {};

	assert(sizeof(old) >= g_trace_file->owner_description_size);
	owner = spdk_get_trace_owner(g_trace_file, owner_id);
	assert(owner != NULL);
	if (append) {
		memcpy(old, owner->description, g_trace_file->owner_description_size);
	}

	snprintf(owner->description, g_trace_file->owner_description_size,
		 "%s%s%s", old, append ? " " : "", description);
}

uint16_t
spdk_trace_register_owner(uint8_t owner_type, const char *description)
{
	struct spdk_trace_owner *owner;
	uint32_t owner_id;

	if (g_owner_ids.ring == NULL) {
		/* Help the unit test environment by simply returning instead
		 * of requiring it to initialize the trace library.
		 */
		return 0;
	}

	pthread_spin_lock(&g_owner_ids.lock);

	if (g_owner_ids.head == g_owner_ids.tail) {
		/* No owner ids available. Return 0 which means no owner. */
		pthread_spin_unlock(&g_owner_ids.lock);
		return 0;
	}

	owner_id = g_owner_ids.ring[g_owner_ids.head];
	if (++g_owner_ids.head == g_owner_ids.size) {
		g_owner_ids.head = 0;
	}

	owner = spdk_get_trace_owner(g_trace_file, owner_id);
	owner->tsc = spdk_get_ticks();
	owner->type = owner_type;
	_owner_set_description(owner_id, description, false);
	pthread_spin_unlock(&g_owner_ids.lock);
	return owner_id;
}

void
spdk_trace_unregister_owner(uint16_t owner_id)
{
	if (g_owner_ids.ring == NULL) {
		/* Help the unit test environment by simply returning instead
		 * of requiring it to initialize the trace library.
		 */
		return;
	}

	if (owner_id == 0) {
		/* owner_id 0 means no owner. Allow this to be passed here, it
		 * avoids caller having to do extra checking.
		 */
		return;
	}

	pthread_spin_lock(&g_owner_ids.lock);
	g_owner_ids.ring[g_owner_ids.tail] = owner_id;
	if (++g_owner_ids.tail == g_owner_ids.size) {
		g_owner_ids.tail = 0;
	}
	pthread_spin_unlock(&g_owner_ids.lock);
}

void
spdk_trace_owner_set_description(uint16_t owner_id, const char *description)
{
	if (g_owner_ids.ring == NULL) {
		/* Help the unit test environment by simply returning instead
		 * of requiring it to initialize the trace library.
		 */
		return;
	}

	pthread_spin_lock(&g_owner_ids.lock);
	_owner_set_description(owner_id, description, false);
	pthread_spin_unlock(&g_owner_ids.lock);
}

void
spdk_trace_owner_append_description(uint16_t owner_id, const char *description)
{
	if (g_owner_ids.ring == NULL) {
		/* Help the unit test environment by simply returning instead
		 * of requiring it to initialize the trace library.
		 */
		return;
	}

	if (owner_id == 0) {
		/* owner_id 0 means no owner. Allow this to be passed here, it
		 * avoids caller having to do extra checking.
		 */
		return;
	}

	pthread_spin_lock(&g_owner_ids.lock);
	_owner_set_description(owner_id, description, true);
	pthread_spin_unlock(&g_owner_ids.lock);
}

void
spdk_trace_register_object(uint8_t type, char id_prefix)
{
@@ -461,6 +582,9 @@ int
trace_flags_init(void)
{
	struct spdk_trace_register_fn *reg_fn;
	uint16_t i;
	uint16_t owner_id_start;
	int rc;

	reg_fn = g_reg_fn_head;
	while (reg_fn) {
@@ -468,10 +592,42 @@ trace_flags_init(void)
		reg_fn = reg_fn->next;
	}

	return 0;
	/* We will not use owner_id 0, it will be reserved to mean "no owner".
	 * But for now, we will start with owner_id 256 instead of owner_id 1.
	 * This will account for some libraries and modules which pass a
	 * "poller_id" to spdk_trace_record() which is now an owner_id. Until
	 * all of those libraries and modules are converted, we will start
	 * owner_ids at 256 to avoid collisions.
	 */
	owner_id_start = 256;
	g_owner_ids.ring = calloc(g_trace_file->num_owners, sizeof(uint16_t));
	if (g_owner_ids.ring == NULL) {
		SPDK_ERRLOG("could not allocate g_owner_ids.ring\n");
		return -ENOMEM;
	}
	g_owner_ids.head = 0;
	g_owner_ids.tail = g_trace_file->num_owners - owner_id_start;
	g_owner_ids.size = g_trace_file->num_owners;
	for (i = 0; i < g_owner_ids.tail; i++) {
		g_owner_ids.ring[i] = i + owner_id_start;
	}

	rc = pthread_spin_init(&g_owner_ids.lock, PTHREAD_PROCESS_PRIVATE);
	if (rc != 0) {
		free(g_owner_ids.ring);
		g_owner_ids.ring = NULL;
	}

	return rc;
}

void
trace_flags_fini(void)
{
	if (g_owner_ids.ring == NULL) {
		return;
	}
	pthread_spin_destroy(&g_owner_ids.lock);
	free(g_owner_ids.ring);
	g_owner_ids.ring = NULL;
}