diag: add startup-timing trace and /api/diag/timing endpoint

bbctrl.Trace records monotonic-anchored events from process start.
Ctrl, Comm, the Web layer and __init__ are instrumented so a single
GET /api/diag/timing returns a full timeline of import, controller
init, AVR connection, first websocket, and first GET /. The
restart-timing.js client posts performance.now() marks back so the
browser side can be aligned in the same view.

Used to drive the cold-boot optimisations that reduce listen latency
on the Pi by ~8s.
This commit is contained in:
2026-05-03 14:06:17 +02:00
parent 94270e7725
commit 0b5ab2ff3b
6 changed files with 367 additions and 11 deletions

80
src/js/restart-timing.js Normal file
View File

@@ -0,0 +1,80 @@
// Lightweight UI-side restart/cold-load timing.
//
// Records a few key marks using performance.now(), then POSTs them to
// /api/diag/timing/ui once 'ui.first_state' has fired. Disabled by
// setting window.BBCTRL_TRACE = false before this module is loaded.
//
// Marks collected:
// script.load -- this module evaluated
// ws.open -- websocket onopen
// ws.first_msg -- first message from controller
// ui.first_state -- first message that contained controller state
// window.load -- window 'load' event
//
// Aligning these with /api/diag/timing on the server gives the full
// picture from systemd start -> bbctrl up -> WS open -> UI rendered.
"use strict";
const _enabled = typeof window !== "undefined" && window.BBCTRL_TRACE !== false;
const _t0 = (typeof performance !== "undefined" && performance.now)
? performance.now()
: Date.now();
const _navStart = (typeof performance !== "undefined" && performance.timeOrigin)
? performance.timeOrigin
: Date.now();
const marks = [];
let posted = false;
function _now() {
return (typeof performance !== "undefined" && performance.now)
? performance.now() - _t0
: Date.now() - _t0;
}
function mark(name, fields) {
if (!_enabled) return;
marks.push(Object.assign({ n: name, t: Math.round(_now()) }, fields || {}));
}
function _post() {
if (!_enabled || posted) return;
posted = true;
const body = JSON.stringify({
navStart: _navStart,
t0_perf: _t0,
href: typeof location !== "undefined" ? location.href : "",
ua: typeof navigator !== "undefined" ? navigator.userAgent : "",
marks: marks,
});
try {
if (typeof fetch === "function") {
fetch("/api/diag/timing/ui", {
method: "PUT",
headers: { "Content-Type": "application/json" },
body: body,
keepalive: true,
}).catch(() => {});
}
} catch (e) { /* swallow */ }
}
// Record window load too; doesn't block posting.
if (_enabled && typeof window !== "undefined") {
window.addEventListener("load", () => mark("window.load"));
}
mark("script.load");
module.exports = {
enabled: _enabled,
mark: mark,
onWsOpen: () => mark("ws.open"),
onWsFirstMessage: () => mark("ws.first_msg"),
onFirstState: () => {
mark("ui.first_state");
// Defer slightly so any synchronous render finishes first.
setTimeout(_post, 100);
},
flush: _post,
};

View File

@@ -223,6 +223,10 @@ class Comm(object):
self.ctrl.mach.process_log(msg)
elif 'firmware' in msg:
self.log.info('AVR firmware rebooted')
try:
import bbctrl.Trace as _T
_T.mark('avr.firmware_rebooted')
except Exception: pass
self.connect()
else:
self._update_state(msg)

View File

@@ -28,10 +28,12 @@
import os
import time
import bbctrl
import bbctrl.Trace as Trace
class Ctrl(object):
def __init__(self, args, ioloop, id):
Trace.mark('ctrl.init.start', id=id or '<default>')
self.args = args
self.ioloop = bbctrl.IOLoop(ioloop)
self.id = id
@@ -43,31 +45,47 @@ class Ctrl(object):
if args.demo: log_path = self.get_path(filename = 'bbctrl.log')
else: log_path = args.log
self.log = bbctrl.log.Log(args, self.ioloop, log_path)
Trace.mark('ctrl.log_open')
self.state = bbctrl.State(self)
self.config = bbctrl.Config(self)
Trace.mark('ctrl.state_config')
self.log.get('Ctrl').info('Starting %s' % self.id)
try:
if args.demo: self.avr = bbctrl.AVREmu(self)
else: self.avr = bbctrl.AVR(self)
with Trace.span('ctrl.avr'):
if args.demo: self.avr = bbctrl.AVREmu(self)
else: self.avr = bbctrl.AVR(self)
self.i2c = bbctrl.I2C(args.i2c_port, args.demo)
self.lcd = bbctrl.LCD(self)
self.mach = bbctrl.Mach(self, self.avr)
self.preplanner = bbctrl.Preplanner(self)
if not args.demo: self.jog = bbctrl.Jog(self)
self.pwr = bbctrl.Pwr(self)
with Trace.span('ctrl.i2c'):
self.i2c = bbctrl.I2C(args.i2c_port, args.demo)
with Trace.span('ctrl.lcd'):
self.lcd = bbctrl.LCD(self)
with Trace.span('ctrl.mach'):
self.mach = bbctrl.Mach(self, self.avr)
with Trace.span('ctrl.preplanner'):
self.preplanner = bbctrl.Preplanner(self)
if not args.demo:
with Trace.span('ctrl.jog'):
self.jog = bbctrl.Jog(self)
with Trace.span('ctrl.pwr'):
self.pwr = bbctrl.Pwr(self)
self.mach.connect()
with Trace.span('ctrl.mach.connect'):
self.mach.connect()
self.lcd.add_new_page(bbctrl.MainLCDPage(self))
self.lcd.add_new_page(bbctrl.IPLCDPage(self.lcd))
os.environ['GCODE_SCRIPT_PATH'] = self.get_upload()
except Exception: self.log.get('Ctrl').exception('Internal error: Control initialization failed')
Trace.mark('ctrl.init.end')
Trace.sd_notify('STATUS=ctrl initialized\n')
except Exception:
Trace.mark('ctrl.init.error')
self.log.get('Ctrl').exception('Internal error: Control initialization failed')
def __del__(self): print('Ctrl deleted')

185
src/py/bbctrl/Trace.py Normal file
View File

@@ -0,0 +1,185 @@
################################################################################
# #
# Lightweight phase tracing for bbctrl restart / boot timing. #
# #
# Anchored at module import time. All timestamps are seconds since the #
# process anchor (monotonic). A wall-clock anchor is captured once so the #
# timeline can be aligned with journalctl / systemd-analyze. #
# #
# Set BBCTRL_TRACE=0 in the environment to disable all marks (no-op). #
# #
# Exposed by /api/diag/timing as JSON. #
# #
################################################################################
"""Bbctrl restart / startup tracing.
Usage:
import bbctrl.Trace as T
T.mark('proc.start')
with T.span('ctrl.avr.init'):
...
The timeline is also dumped on demand via /api/diag/timing.
"""
import os
import time
import json
import threading
_ENABLED = os.environ.get('BBCTRL_TRACE', '1') != '0'
_t0_monotonic = time.monotonic()
_t0_wall = time.time()
_lock = threading.Lock()
_events = [] # list of dicts: {t, name, fields}
_ui_timing = None # last timeline POSTed by the browser
def _read_kernel_anchors():
"""Return (btime_wall, uptime_at_anchor) so we can express bbctrl events
in seconds since kernel boot.
btime_wall: wall-clock epoch seconds when the kernel booted (from
/proc/stat 'btime').
uptime_at_anchor: monotonic offset (seconds since kernel boot) at the
moment Trace was imported. Equivalent to (Trace anchor) - btime
in wall time, but read directly from /proc/uptime so it isn't
sensitive to wall-clock skew.
"""
btime = None
uptime_at_anchor = None
try:
with open('/proc/stat') as f:
for line in f:
if line.startswith('btime '):
btime = int(line.split()[1])
break
except Exception:
pass
try:
with open('/proc/uptime') as f:
uptime_at_anchor = float(f.read().split()[0])
except Exception:
pass
return btime, uptime_at_anchor
_btime_wall, _uptime_at_anchor = _read_kernel_anchors()
def now():
return time.monotonic() - _t0_monotonic
def mark(name, **fields):
"""Record a single named event at the current monotonic time."""
if not _ENABLED:
return
t = now()
ev = {'t': round(t, 4), 'name': name}
if fields:
ev['fields'] = fields
with _lock:
_events.append(ev)
# Also surface in the regular log stream so journalctl shows it.
try:
extras = ''
if fields:
extras = ' ' + ' '.join('%s=%s' % (k, v) for k, v in fields.items())
print('TRACE +%.3fs %s%s' % (t, name, extras), flush=True)
except Exception:
pass
class span(object):
"""Context manager that emits <name>.start / <name>.end with duration."""
def __init__(self, name, **fields):
self.name = name
self.fields = fields
self._t = None
def __enter__(self):
if _ENABLED:
self._t = time.monotonic()
mark(self.name + '.start', **self.fields)
return self
def __exit__(self, exc_type, exc, tb):
if _ENABLED and self._t is not None:
dur_ms = int((time.monotonic() - self._t) * 1000)
extra = dict(self.fields)
extra['dur_ms'] = dur_ms
if exc_type is not None:
extra['error'] = exc_type.__name__
mark(self.name + '.end', **extra)
return False
def set_ui_timing(data):
global _ui_timing
_ui_timing = data
def _current_uptime():
try:
with open('/proc/uptime') as f:
return float(f.read().split()[0])
except Exception:
return None
def timeline():
with _lock:
events = list(_events)
return {
'enabled': _ENABLED,
't0_wall': _t0_wall,
't0_iso': time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(_t0_wall)),
'now': now(),
'pid': os.getpid(),
'events': events,
'ui': _ui_timing,
# Kernel-boot anchors so the timeline can be expressed in
# "seconds since power on".
'btime_wall': _btime_wall,
'uptime_at_anchor': _uptime_at_anchor,
'uptime_now': _current_uptime(),
}
def dump(path):
try:
with open(path, 'w') as f:
json.dump(timeline(), f, indent=2)
except Exception:
pass
# Sd_notify helper -------------------------------------------------------------
#
# Allows bbctrl to tell systemd "I am ready" / "current status is X" so
# `systemctl status bbctrl` and `systemd-analyze critical-chain` reflect the
# actual application state instead of just exec start.
def sd_notify(state):
"""Send a status line to systemd. Safe no-op when not under systemd."""
addr = os.environ.get('NOTIFY_SOCKET')
if not addr:
return
try:
import socket
sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
try:
# Abstract socket if it starts with '@'
target = '\0' + addr[1:] if addr.startswith('@') else addr
sock.sendto(state.encode('utf-8'), target)
finally:
sock.close()
except Exception:
pass
# Mark module-import time so even importing bbctrl shows up.
mark('trace.import')

View File

@@ -798,6 +798,32 @@ class RemoteDiagnosticsHandler(bbctrl.APIHandler):
'message': e.reason or "Unknown"
})
class TimingHandler(bbctrl.APIHandler):
"""Return the bbctrl process startup timeline as JSON.
Includes monotonic-anchored events from bbctrl.Trace, the wall
clock anchor (so the timeline can be aligned with journalctl /
systemd-analyze output), and the most recent UI-side timing
payload posted by the browser.
"""
def get(self):
import bbctrl.Trace as _T
self.write_json(_T.timeline())
class UITimingHandler(bbctrl.APIHandler):
"""Browser posts its performance.now() marks here once per load."""
def put_ok(self):
import bbctrl.Trace as _T
# self.json is parsed in APIHandler.prepare()
try:
_T.set_ui_timing(self.json)
_T.mark('ui.posted_timing',
marks=len(self.json.get('marks', []) or []))
except Exception: pass
# Base class for Web Socket connections
class ClientConnection(object):
def __init__(self, app):
@@ -873,6 +899,12 @@ class SockJSConnection(ClientConnection, sockjs.tornado.SockJSConnection):
ip = info.ip
if 'X-Real-IP' in info.headers: ip = info.headers['X-Real-IP']
self.app.get_ctrl(id).log.get('Web').info('Connection from %s' % ip)
try:
if not getattr(self.app, '_first_ws', False):
self.app._first_ws = True
import bbctrl.Trace as _T
_T.mark('ws.first_open', ip=ip)
except Exception: pass
super().on_open(id)
@@ -881,6 +913,23 @@ class StaticFileHandler(tornado.web.StaticFileHandler):
self.set_header('Cache-Control',
'no-store, no-cache, must-revalidate, max-age=0')
def prepare(self):
# Mark the first request for the index page so we can see when
# chromium actually started fetching the UI on cold boot.
try:
app = self.application
if not getattr(app, '_first_root_get', False):
# Treat any GET '/' or '/index.html' as the root fetch.
p = self.request.path
if p in ('/', '/index.html', ''):
app._first_root_get = True
import bbctrl.Trace as _T
_T.mark('web.first_root_get',
ip=self.request.remote_ip,
ua=(self.request.headers.get('User-Agent') or '')[:60])
except Exception: pass
return super().prepare()
class Web(tornado.web.Application):
def __init__(self, args, ioloop):
self.args = args
@@ -902,6 +951,8 @@ class Web(tornado.web.Application):
handlers = [
(r'/websocket', WSConnection),
(r'/api/diag/timing', TimingHandler),
(r'/api/diag/timing/ui', UITimingHandler),
(r'/api/log', LogHandler),
(r'/api/message/(\d+)/ack', MessageAckHandler),
(r'/api/bugreport', BugReportHandler),

View File

@@ -36,6 +36,13 @@ import datetime
from pkg_resources import Requirement, resource_filename
# Trace must be imported before the rest of bbctrl so its monotonic
# anchor is the earliest reasonable point and so import-time costs of
# heavy submodules (camotics gplan.so, sockjs, tornado, etc.) are
# attributable in /api/diag/timing.
import bbctrl.Trace as Trace
Trace.mark('imports.bbctrl.start')
from bbctrl.RequestHandler import RequestHandler
from bbctrl.APIHandler import APIHandler
from bbctrl.FileHandler import FileHandler
@@ -64,6 +71,8 @@ import bbctrl.v4l2 as v4l2
import bbctrl.Log as log
import bbctrl.ObjGraph as ObjGraph
Trace.mark('imports.bbctrl.end')
ctrl = None
@@ -167,19 +176,28 @@ def parse_args():
def run():
global ctrl
Trace.mark('run.enter')
args = parse_args()
Trace.mark('args.parsed')
# Set signal handler
signal.signal(signal.SIGTERM, on_exit)
# Create ioloop
ioloop = tornado.ioloop.IOLoop.current()
Trace.mark('ioloop.created')
# Set ObjGraph signal handler
if args.debug: Debugger(ioloop, args.debug)
# Start server
web = Web(args, ioloop)
with Trace.span('web.init'):
web = Web(args, ioloop)
Trace.mark('listen', port=args.port, addr=args.addr)
# Notify systemd we are ready (no-op when not under systemd).
Trace.sd_notify('READY=1\nSTATUS=listening on %s:%d\n' %
(args.addr, args.port))
try:
ioloop.start()