Restart timing: bbctrl.Trace, /api/diag/timing, UI marks
Add a lightweight, self-contained phase tracer for measuring end-to-end bbctrl restart and Pi boot time. Disabled by setting BBCTRL_TRACE=0. - src/py/bbctrl/Trace.py: monotonic-anchored event log + sd_notify helper. - bbctrl/__init__.py: marks for imports, args parsed, ioloop, web init, listen, and an sd_notify READY=1 once HTTP is bound. - bbctrl/Ctrl.py: spans around each subsystem (avr, i2c, lcd, mach, preplanner, jog, pwr, hooks, aux, mach.connect). - bbctrl/Comm.py: avr.firmware_rebooted mark. - bbctrl/Web.py: TimingHandler (GET /api/diag/timing) and UITimingHandler (PUT /api/diag/timing/ui), plus a ws.first_open mark. - src/js/restart-timing.js + app.js: UI-side performance.now() marks (script.load, ws.open, ws.first_msg, ui.first_state, window.load), posted once to the controller. - scripts/bbctrl.service: stdout/stderr -> journal so TRACE lines are visible via journalctl -u bbctrl. (Was StandardOutput=null.) Revert: git revert this commit. To disable at runtime without reverting, set BBCTRL_TRACE=0 in the bbctrl service environment.
This commit is contained in:
@@ -7,7 +7,11 @@ User=root
|
||||
ExecStart=/usr/local/bin/bbctrl -l /var/log/bbctrl.log
|
||||
WorkingDirectory=/var/lib/bbctrl
|
||||
Restart=always
|
||||
StandardOutput=null
|
||||
# StandardOutput was 'null'. Set to 'journal' so TRACE lines emitted by
|
||||
# bbctrl.Trace are visible via `journalctl -u bbctrl`. Bbctrl still
|
||||
# writes its own log via -l above; this only affects stdout/stderr.
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
Nice=-10
|
||||
KillMode=process
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ const api = require("./api");
|
||||
const cookie = require("./cookie")("bbctrl-");
|
||||
const Sock = require("./sock");
|
||||
const semverLt = require("semver/functions/lt");
|
||||
const restartTiming = require("./restart-timing");
|
||||
|
||||
if (document.getElementById("svelte-dialog-host") != undefined) {
|
||||
SvelteComponents.createComponent(
|
||||
@@ -497,11 +498,19 @@ module.exports = new Vue({
|
||||
connect: function() {
|
||||
this.sock = new Sock(`//${location.host}/sockjs`);
|
||||
|
||||
let _gotFirstMsg = false;
|
||||
let _gotFirstState = false;
|
||||
|
||||
this.sock.onmessage = (e) => {
|
||||
if (typeof e.data != "object") {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!_gotFirstMsg) {
|
||||
_gotFirstMsg = true;
|
||||
restartTiming.onWsFirstMessage();
|
||||
}
|
||||
|
||||
if (e.data.log && e.data.log.msg !== "Switch not found") {
|
||||
this.$broadcast("log", e.data.log);
|
||||
|
||||
@@ -511,6 +520,11 @@ module.exports = new Vue({
|
||||
}
|
||||
}
|
||||
|
||||
if (!_gotFirstState) {
|
||||
_gotFirstState = true;
|
||||
restartTiming.onFirstState();
|
||||
}
|
||||
|
||||
// Check for session ID change on controller
|
||||
if ("sid" in e.data) {
|
||||
if (typeof this.sid == "undefined") {
|
||||
@@ -535,6 +549,7 @@ module.exports = new Vue({
|
||||
|
||||
this.sock.onopen = () => {
|
||||
this.status = "connected";
|
||||
restartTiming.onWsOpen();
|
||||
this.$emit(this.status);
|
||||
this.$broadcast(this.status);
|
||||
};
|
||||
|
||||
80
src/js/restart-timing.js
Normal file
80
src/js/restart-timing.js
Normal file
@@ -0,0 +1,80 @@
|
||||
// Lightweight UI-side restart/cold-load timing.
|
||||
//
|
||||
// Records a few key marks using performance.now(), then POSTs them to
|
||||
// /api/diag/timing/ui once 'ui.first_state' has fired. Disabled by
|
||||
// setting window.BBCTRL_TRACE = false before this module is loaded.
|
||||
//
|
||||
// Marks collected:
|
||||
// script.load -- this module evaluated
|
||||
// ws.open -- websocket onopen
|
||||
// ws.first_msg -- first message from controller
|
||||
// ui.first_state -- first message that contained controller state
|
||||
// window.load -- window 'load' event
|
||||
//
|
||||
// Aligning these with /api/diag/timing on the server gives the full
|
||||
// picture from systemd start -> bbctrl up -> WS open -> UI rendered.
|
||||
"use strict";
|
||||
|
||||
const _enabled = typeof window !== "undefined" && window.BBCTRL_TRACE !== false;
|
||||
const _t0 = (typeof performance !== "undefined" && performance.now)
|
||||
? performance.now()
|
||||
: Date.now();
|
||||
const _navStart = (typeof performance !== "undefined" && performance.timeOrigin)
|
||||
? performance.timeOrigin
|
||||
: Date.now();
|
||||
|
||||
const marks = [];
|
||||
let posted = false;
|
||||
|
||||
function _now() {
|
||||
return (typeof performance !== "undefined" && performance.now)
|
||||
? performance.now() - _t0
|
||||
: Date.now() - _t0;
|
||||
}
|
||||
|
||||
function mark(name, fields) {
|
||||
if (!_enabled) return;
|
||||
marks.push(Object.assign({ n: name, t: Math.round(_now()) }, fields || {}));
|
||||
}
|
||||
|
||||
function _post() {
|
||||
if (!_enabled || posted) return;
|
||||
posted = true;
|
||||
const body = JSON.stringify({
|
||||
navStart: _navStart,
|
||||
t0_perf: _t0,
|
||||
href: typeof location !== "undefined" ? location.href : "",
|
||||
ua: typeof navigator !== "undefined" ? navigator.userAgent : "",
|
||||
marks: marks,
|
||||
});
|
||||
try {
|
||||
if (typeof fetch === "function") {
|
||||
fetch("/api/diag/timing/ui", {
|
||||
method: "PUT",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: body,
|
||||
keepalive: true,
|
||||
}).catch(() => {});
|
||||
}
|
||||
} catch (e) { /* swallow */ }
|
||||
}
|
||||
|
||||
// Record window load too; doesn't block posting.
|
||||
if (_enabled && typeof window !== "undefined") {
|
||||
window.addEventListener("load", () => mark("window.load"));
|
||||
}
|
||||
|
||||
mark("script.load");
|
||||
|
||||
module.exports = {
|
||||
enabled: _enabled,
|
||||
mark: mark,
|
||||
onWsOpen: () => mark("ws.open"),
|
||||
onWsFirstMessage: () => mark("ws.first_msg"),
|
||||
onFirstState: () => {
|
||||
mark("ui.first_state");
|
||||
// Defer slightly so any synchronous render finishes first.
|
||||
setTimeout(_post, 100);
|
||||
},
|
||||
flush: _post,
|
||||
};
|
||||
@@ -223,6 +223,10 @@ class Comm(object):
|
||||
self.ctrl.mach.process_log(msg)
|
||||
elif 'firmware' in msg:
|
||||
self.log.info('AVR firmware rebooted')
|
||||
try:
|
||||
import bbctrl.Trace as _T
|
||||
_T.mark('avr.firmware_rebooted')
|
||||
except Exception: pass
|
||||
self.connect()
|
||||
else:
|
||||
self._update_state(msg)
|
||||
|
||||
@@ -28,10 +28,12 @@
|
||||
import os
|
||||
import time
|
||||
import bbctrl
|
||||
import bbctrl.Trace as Trace
|
||||
|
||||
|
||||
class Ctrl(object):
|
||||
def __init__(self, args, ioloop, id):
|
||||
Trace.mark('ctrl.init.start', id=id or '<default>')
|
||||
self.args = args
|
||||
self.ioloop = bbctrl.IOLoop(ioloop)
|
||||
self.id = id
|
||||
@@ -43,26 +45,39 @@ class Ctrl(object):
|
||||
if args.demo: log_path = self.get_path(filename = 'bbctrl.log')
|
||||
else: log_path = args.log
|
||||
self.log = bbctrl.log.Log(args, self.ioloop, log_path)
|
||||
Trace.mark('ctrl.log_open')
|
||||
|
||||
self.state = bbctrl.State(self)
|
||||
self.config = bbctrl.Config(self)
|
||||
Trace.mark('ctrl.state_config')
|
||||
|
||||
self.log.get('Ctrl').info('Starting %s' % self.id)
|
||||
|
||||
try:
|
||||
with Trace.span('ctrl.avr'):
|
||||
if args.demo: self.avr = bbctrl.AVREmu(self)
|
||||
else: self.avr = bbctrl.AVR(self)
|
||||
|
||||
with Trace.span('ctrl.i2c'):
|
||||
self.i2c = bbctrl.I2C(args.i2c_port, args.demo)
|
||||
with Trace.span('ctrl.lcd'):
|
||||
self.lcd = bbctrl.LCD(self)
|
||||
with Trace.span('ctrl.mach'):
|
||||
self.mach = bbctrl.Mach(self, self.avr)
|
||||
with Trace.span('ctrl.preplanner'):
|
||||
self.preplanner = bbctrl.Preplanner(self)
|
||||
if not args.demo: self.jog = bbctrl.Jog(self)
|
||||
if not args.demo:
|
||||
with Trace.span('ctrl.jog'):
|
||||
self.jog = bbctrl.Jog(self)
|
||||
with Trace.span('ctrl.pwr'):
|
||||
self.pwr = bbctrl.Pwr(self)
|
||||
with Trace.span('ctrl.hooks'):
|
||||
self.hooks = bbctrl.Hooks(self)
|
||||
with Trace.span('ctrl.aux'):
|
||||
self.aux = bbctrl.AuxAxis(self)
|
||||
self._register_aux_hooks()
|
||||
|
||||
with Trace.span('ctrl.mach.connect'):
|
||||
self.mach.connect()
|
||||
|
||||
self.lcd.add_new_page(bbctrl.MainLCDPage(self))
|
||||
@@ -70,7 +85,12 @@ class Ctrl(object):
|
||||
|
||||
os.environ['GCODE_SCRIPT_PATH'] = self.get_upload()
|
||||
|
||||
except Exception: self.log.get('Ctrl').exception('Internal error: Control initialization failed')
|
||||
Trace.mark('ctrl.init.end')
|
||||
Trace.sd_notify('STATUS=ctrl initialized\n')
|
||||
|
||||
except Exception:
|
||||
Trace.mark('ctrl.init.error')
|
||||
self.log.get('Ctrl').exception('Internal error: Control initialization failed')
|
||||
|
||||
|
||||
def __del__(self): print('Ctrl deleted')
|
||||
|
||||
140
src/py/bbctrl/Trace.py
Normal file
140
src/py/bbctrl/Trace.py
Normal file
@@ -0,0 +1,140 @@
|
||||
################################################################################
|
||||
# #
|
||||
# Lightweight phase tracing for bbctrl restart / boot timing. #
|
||||
# #
|
||||
# Anchored at module import time. All timestamps are seconds since the #
|
||||
# process anchor (monotonic). A wall-clock anchor is captured once so the #
|
||||
# timeline can be aligned with journalctl / systemd-analyze. #
|
||||
# #
|
||||
# Set BBCTRL_TRACE=0 in the environment to disable all marks (no-op). #
|
||||
# #
|
||||
# Exposed by /api/diag/timing as JSON. #
|
||||
# #
|
||||
################################################################################
|
||||
"""Bbctrl restart / startup tracing.
|
||||
|
||||
Usage:
|
||||
import bbctrl.Trace as T
|
||||
T.mark('proc.start')
|
||||
with T.span('ctrl.avr.init'):
|
||||
...
|
||||
|
||||
The timeline is also dumped on demand via /api/diag/timing.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
import threading
|
||||
|
||||
|
||||
_ENABLED = os.environ.get('BBCTRL_TRACE', '1') != '0'
|
||||
|
||||
_t0_monotonic = time.monotonic()
|
||||
_t0_wall = time.time()
|
||||
_lock = threading.Lock()
|
||||
_events = [] # list of dicts: {t, name, fields}
|
||||
_ui_timing = None # last timeline POSTed by the browser
|
||||
|
||||
|
||||
def now():
|
||||
return time.monotonic() - _t0_monotonic
|
||||
|
||||
|
||||
def mark(name, **fields):
|
||||
"""Record a single named event at the current monotonic time."""
|
||||
if not _ENABLED:
|
||||
return
|
||||
t = now()
|
||||
ev = {'t': round(t, 4), 'name': name}
|
||||
if fields:
|
||||
ev['fields'] = fields
|
||||
with _lock:
|
||||
_events.append(ev)
|
||||
# Also surface in the regular log stream so journalctl shows it.
|
||||
try:
|
||||
extras = ''
|
||||
if fields:
|
||||
extras = ' ' + ' '.join('%s=%s' % (k, v) for k, v in fields.items())
|
||||
print('TRACE +%.3fs %s%s' % (t, name, extras), flush=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
class span(object):
|
||||
"""Context manager that emits <name>.start / <name>.end with duration."""
|
||||
|
||||
def __init__(self, name, **fields):
|
||||
self.name = name
|
||||
self.fields = fields
|
||||
self._t = None
|
||||
|
||||
def __enter__(self):
|
||||
if _ENABLED:
|
||||
self._t = time.monotonic()
|
||||
mark(self.name + '.start', **self.fields)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
if _ENABLED and self._t is not None:
|
||||
dur_ms = int((time.monotonic() - self._t) * 1000)
|
||||
extra = dict(self.fields)
|
||||
extra['dur_ms'] = dur_ms
|
||||
if exc_type is not None:
|
||||
extra['error'] = exc_type.__name__
|
||||
mark(self.name + '.end', **extra)
|
||||
return False
|
||||
|
||||
|
||||
def set_ui_timing(data):
|
||||
global _ui_timing
|
||||
_ui_timing = data
|
||||
|
||||
|
||||
def timeline():
|
||||
with _lock:
|
||||
events = list(_events)
|
||||
return {
|
||||
'enabled': _ENABLED,
|
||||
't0_wall': _t0_wall,
|
||||
't0_iso': time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(_t0_wall)),
|
||||
'now': now(),
|
||||
'pid': os.getpid(),
|
||||
'events': events,
|
||||
'ui': _ui_timing,
|
||||
}
|
||||
|
||||
|
||||
def dump(path):
|
||||
try:
|
||||
with open(path, 'w') as f:
|
||||
json.dump(timeline(), f, indent=2)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# Sd_notify helper -------------------------------------------------------------
|
||||
#
|
||||
# Allows bbctrl to tell systemd "I am ready" / "current status is X" so
|
||||
# `systemctl status bbctrl` and `systemd-analyze critical-chain` reflect the
|
||||
# actual application state instead of just exec start.
|
||||
def sd_notify(state):
|
||||
"""Send a status line to systemd. Safe no-op when not under systemd."""
|
||||
addr = os.environ.get('NOTIFY_SOCKET')
|
||||
if not addr:
|
||||
return
|
||||
try:
|
||||
import socket
|
||||
sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
|
||||
try:
|
||||
# Abstract socket if it starts with '@'
|
||||
target = '\0' + addr[1:] if addr.startswith('@') else addr
|
||||
sock.sendto(state.encode('utf-8'), target)
|
||||
finally:
|
||||
sock.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# Mark module-import time so even importing bbctrl shows up.
|
||||
mark('trace.import')
|
||||
@@ -885,6 +885,31 @@ class RemoteDiagnosticsHandler(bbctrl.APIHandler):
|
||||
'message': e.reason or "Unknown"
|
||||
})
|
||||
|
||||
class TimingHandler(bbctrl.APIHandler):
|
||||
"""Return the bbctrl process startup timeline as JSON.
|
||||
|
||||
Includes monotonic-anchored events from bbctrl.Trace, the wall
|
||||
clock anchor (so the timeline can be aligned with journalctl /
|
||||
systemd-analyze output), and the most recent UI-side timing
|
||||
payload posted by the browser.
|
||||
"""
|
||||
def get(self):
|
||||
import bbctrl.Trace as _T
|
||||
self.write_json(_T.timeline())
|
||||
|
||||
|
||||
class UITimingHandler(bbctrl.APIHandler):
|
||||
"""Browser posts its performance.now() marks here once per load."""
|
||||
def put_ok(self):
|
||||
import bbctrl.Trace as _T
|
||||
# self.json is parsed in APIHandler.prepare()
|
||||
try:
|
||||
_T.set_ui_timing(self.json)
|
||||
_T.mark('ui.posted_timing',
|
||||
marks=len(self.json.get('marks', []) or []))
|
||||
except Exception: pass
|
||||
|
||||
|
||||
# Base class for Web Socket connections
|
||||
class ClientConnection(object):
|
||||
def __init__(self, app):
|
||||
@@ -960,6 +985,12 @@ class SockJSConnection(ClientConnection, sockjs.tornado.SockJSConnection):
|
||||
ip = info.ip
|
||||
if 'X-Real-IP' in info.headers: ip = info.headers['X-Real-IP']
|
||||
self.app.get_ctrl(id).log.get('Web').info('Connection from %s' % ip)
|
||||
try:
|
||||
if not getattr(self.app, '_first_ws', False):
|
||||
self.app._first_ws = True
|
||||
import bbctrl.Trace as _T
|
||||
_T.mark('ws.first_open', ip=ip)
|
||||
except Exception: pass
|
||||
super().on_open(id)
|
||||
|
||||
|
||||
@@ -989,6 +1020,8 @@ class Web(tornado.web.Application):
|
||||
|
||||
handlers = [
|
||||
(r'/websocket', WSConnection),
|
||||
(r'/api/diag/timing', TimingHandler),
|
||||
(r'/api/diag/timing/ui', UITimingHandler),
|
||||
(r'/api/log', LogHandler),
|
||||
(r'/api/message/(\d+)/ack', MessageAckHandler),
|
||||
(r'/api/bugreport', BugReportHandler),
|
||||
|
||||
@@ -36,6 +36,13 @@ import datetime
|
||||
|
||||
from pkg_resources import Requirement, resource_filename
|
||||
|
||||
# Trace must be imported before the rest of bbctrl so its monotonic
|
||||
# anchor is the earliest reasonable point and so import-time costs of
|
||||
# heavy submodules (camotics gplan.so, sockjs, tornado, etc.) are
|
||||
# attributable in /api/diag/timing.
|
||||
import bbctrl.Trace as Trace
|
||||
Trace.mark('imports.bbctrl.start')
|
||||
|
||||
from bbctrl.RequestHandler import RequestHandler
|
||||
from bbctrl.APIHandler import APIHandler
|
||||
from bbctrl.FileHandler import FileHandler
|
||||
@@ -66,6 +73,8 @@ import bbctrl.v4l2 as v4l2
|
||||
import bbctrl.Log as log
|
||||
import bbctrl.ObjGraph as ObjGraph
|
||||
|
||||
Trace.mark('imports.bbctrl.end')
|
||||
|
||||
|
||||
ctrl = None
|
||||
|
||||
@@ -169,19 +178,28 @@ def parse_args():
|
||||
def run():
|
||||
global ctrl
|
||||
|
||||
Trace.mark('run.enter')
|
||||
args = parse_args()
|
||||
Trace.mark('args.parsed')
|
||||
|
||||
# Set signal handler
|
||||
signal.signal(signal.SIGTERM, on_exit)
|
||||
|
||||
# Create ioloop
|
||||
ioloop = tornado.ioloop.IOLoop.current()
|
||||
Trace.mark('ioloop.created')
|
||||
|
||||
# Set ObjGraph signal handler
|
||||
if args.debug: Debugger(ioloop, args.debug)
|
||||
|
||||
# Start server
|
||||
with Trace.span('web.init'):
|
||||
web = Web(args, ioloop)
|
||||
Trace.mark('listen', port=args.port, addr=args.addr)
|
||||
|
||||
# Notify systemd we are ready (no-op when not under systemd).
|
||||
Trace.sd_notify('READY=1\nSTATUS=listening on %s:%d\n' %
|
||||
(args.addr, args.port))
|
||||
|
||||
try:
|
||||
ioloop.start()
|
||||
|
||||
Reference in New Issue
Block a user