Commit 597688b2 authored by Konrad Sztyber's avatar Konrad Sztyber Committed by Tomasz Zawadzki
Browse files

scripts/trace: use ijson to parse the traces

Since the trace files can get very large (several GBs), parsing them
using python's json module might require an unfeasible amount of memory,
as it needs to load the whole file first.  The ijson [1] library
provides interfaces for parsing files iteratively, only loading a small
portion of a file at a time.

It requires the input JSON to have the tsc_rate and the definitions of
the tracepoints listed before the tracepoint entries.  It's not a big
deal, as this is the way `spdk_trace` generates it, but it's worth
noting, as passing that file through something like `jq -S` might make
it unreadable to the trace script.

[1] https://pypi.org/project/ijson



Signed-off-by: default avatarKonrad Sztyber <konrad.sztyber@intel.com>
Change-Id: I03c0c3fb47091da615a3978b8d63edf4d876b811
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8275


Tested-by: default avatarSPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: default avatarJim Harris <james.r.harris@intel.com>
Reviewed-by: default avatarTomasz Zawadzki <tomasz.zawadzki@intel.com>
Reviewed-by: default avatarMonica Kenguva <monica.kenguva@intel.com>
parent e61fbe91
Loading
Loading
Loading
Loading
+36 −10
Original line number Diff line number Diff line
@@ -4,7 +4,7 @@ from argparse import ArgumentParser
from dataclasses import dataclass, field
from itertools import islice
from typing import Dict, List, TypeVar
import json
import ijson
import os
import re
import subprocess
@@ -168,21 +168,40 @@ class TraceEntry:
class Trace:
    """Stores, parses, and prints out SPDK traces"""
    def __init__(self, file):
        self._json = json.load(file)
        self._parser = ijson.parse(file)
        self._objects = []
        self._argfmt = {TracepointArgument.TYPE_PTR: lambda a: f'0x{a:x}'}
        self.tpoints = {t.id: t for t in self._parse_tpoints()}
        self.tsc_rate = self._json['tsc_rate']
        self.tpoints = {}
        self._parse_defs()

    def _parse_tpoints(self):
        for tpoint in self._json.get('tpoints', []):
            yield Tracepoint(
                name=tpoint['name'], id=tpoint['id'],
    def _parse_tpoints(self, tpoints):
        for tpoint in tpoints:
            tpoint_id = tpoint['id']
            self.tpoints[tpoint_id] = Tracepoint(
                name=tpoint['name'], id=tpoint_id,
                new_object=tpoint['new_object'],
                args=[TracepointArgument(name=a['name'],
                                         argtype=a['type'])
                      for a in tpoint.get('args', [])])

    def _parse_defs(self):
        builder = None
        for prefix, event, value in self._parser:
            # If we reach entries array, there are no more tracepoint definitions
            if prefix == 'entries':
                break
            elif prefix == 'tsc_rate':
                self.tsc_rate = value
                continue

            if (prefix, event) == ('tpoints', 'start_array'):
                builder = ijson.ObjectBuilder()
            if builder is not None:
                builder.event(event, value)
            if (prefix, event) == ('tpoints', 'end_array'):
                self._parse_tpoints(builder.value)
                builder = None

    def _parse_entry(self, entry):
        tpoint = self.tpoints[entry['tpoint']]
        obj = entry.get('object', {})
@@ -193,8 +212,15 @@ class Trace:
                          args={n.name: v for n, v in zip(tpoint.args, entry.get('args', []))})

    def _entries(self):
        for entry in self._json.get('entries', []):
            yield self._parse_entry(entry)
        builder = None
        for prefix, event, value in self._parser:
            if (prefix, event) == ('entries.item', 'start_map'):
                builder = ijson.ObjectBuilder()
            if builder is not None:
                builder.event(event, value)
            if (prefix, event) == ('entries.item', 'end_map'):
                yield self._parse_entry(builder.value)
                builder = None

    def _annotate_args(self, entry):
        annotations = {}
+1 −0
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ pacman -Sy --needed --noconfirm gcc make cmake cunit libaio openssl \
pacman -Sy --needed --noconfirm python-pexpect python-pip libffi
pip install configshell_fb
pip install pyelftools
pip install ijson
# Additional dependencies for DPDK
pacman -Sy --needed --noconfirm numactl nasm
# Additional dependencies for ISA-L used in compression
+1 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@ if ! pip3 install meson; then
	apt-get install -y meson
fi
pip3 install pyelftools
pip3 install ijson
# Additional dependencies for SPDK CLI - not available on older Ubuntus
apt-get install -y python3-configshell-fb python3-pexpect || echo \
	"Note: Some SPDK CLI dependencies could not be installed."
+1 −0
Original line number Diff line number Diff line
@@ -95,6 +95,7 @@ yum install -y python3-pip
pip3 install ninja
pip3 install meson
pip3 install pyelftools
pip3 install ijson

# Additional dependencies for SPDK CLI - not available in rhel and centos
if ! echo "$ID $VERSION_ID" | grep -E -q 'rhel 7|centos 7'; then