diff --git a/scripts/bbctrl.service b/scripts/bbctrl.service index 8b22cc7..d38b194 100644 --- a/scripts/bbctrl.service +++ b/scripts/bbctrl.service @@ -7,7 +7,11 @@ User=root ExecStart=/usr/local/bin/bbctrl -l /var/log/bbctrl.log WorkingDirectory=/var/lib/bbctrl Restart=always -StandardOutput=null +# StandardOutput was 'null'. Set to 'journal' so TRACE lines emitted by +# bbctrl.Trace are visible via `journalctl -u bbctrl`. Bbctrl still +# writes its own log via -l above; this only affects stdout/stderr. +StandardOutput=journal +StandardError=journal Nice=-10 KillMode=process diff --git a/src/js/app.js b/src/js/app.js index 85dbcd7..4af7117 100644 --- a/src/js/app.js +++ b/src/js/app.js @@ -4,6 +4,7 @@ const api = require("./api"); const cookie = require("./cookie")("bbctrl-"); const Sock = require("./sock"); const semverLt = require("semver/functions/lt"); +const restartTiming = require("./restart-timing"); if (document.getElementById("svelte-dialog-host") != undefined) { SvelteComponents.createComponent( @@ -497,11 +498,19 @@ module.exports = new Vue({ connect: function() { this.sock = new Sock(`//${location.host}/sockjs`); + let _gotFirstMsg = false; + let _gotFirstState = false; + this.sock.onmessage = (e) => { if (typeof e.data != "object") { return; } + if (!_gotFirstMsg) { + _gotFirstMsg = true; + restartTiming.onWsFirstMessage(); + } + if (e.data.log && e.data.log.msg !== "Switch not found") { this.$broadcast("log", e.data.log); @@ -511,6 +520,11 @@ module.exports = new Vue({ } } + if (!_gotFirstState) { + _gotFirstState = true; + restartTiming.onFirstState(); + } + // Check for session ID change on controller if ("sid" in e.data) { if (typeof this.sid == "undefined") { @@ -535,6 +549,7 @@ module.exports = new Vue({ this.sock.onopen = () => { this.status = "connected"; + restartTiming.onWsOpen(); this.$emit(this.status); this.$broadcast(this.status); }; diff --git a/src/js/restart-timing.js b/src/js/restart-timing.js new file mode 100644 index 0000000..f9cbeab --- /dev/null +++ b/src/js/restart-timing.js @@ -0,0 +1,80 @@ +// Lightweight UI-side restart/cold-load timing. +// +// Records a few key marks using performance.now(), then POSTs them to +// /api/diag/timing/ui once 'ui.first_state' has fired. Disabled by +// setting window.BBCTRL_TRACE = false before this module is loaded. +// +// Marks collected: +// script.load -- this module evaluated +// ws.open -- websocket onopen +// ws.first_msg -- first message from controller +// ui.first_state -- first message that contained controller state +// window.load -- window 'load' event +// +// Aligning these with /api/diag/timing on the server gives the full +// picture from systemd start -> bbctrl up -> WS open -> UI rendered. +"use strict"; + +const _enabled = typeof window !== "undefined" && window.BBCTRL_TRACE !== false; +const _t0 = (typeof performance !== "undefined" && performance.now) + ? performance.now() + : Date.now(); +const _navStart = (typeof performance !== "undefined" && performance.timeOrigin) + ? performance.timeOrigin + : Date.now(); + +const marks = []; +let posted = false; + +function _now() { + return (typeof performance !== "undefined" && performance.now) + ? performance.now() - _t0 + : Date.now() - _t0; +} + +function mark(name, fields) { + if (!_enabled) return; + marks.push(Object.assign({ n: name, t: Math.round(_now()) }, fields || {})); +} + +function _post() { + if (!_enabled || posted) return; + posted = true; + const body = JSON.stringify({ + navStart: _navStart, + t0_perf: _t0, + href: typeof location !== "undefined" ? location.href : "", + ua: typeof navigator !== "undefined" ? navigator.userAgent : "", + marks: marks, + }); + try { + if (typeof fetch === "function") { + fetch("/api/diag/timing/ui", { + method: "PUT", + headers: { "Content-Type": "application/json" }, + body: body, + keepalive: true, + }).catch(() => {}); + } + } catch (e) { /* swallow */ } +} + +// Record window load too; doesn't block posting. +if (_enabled && typeof window !== "undefined") { + window.addEventListener("load", () => mark("window.load")); +} + +mark("script.load"); + +module.exports = { + enabled: _enabled, + mark: mark, + onWsOpen: () => mark("ws.open"), + onWsFirstMessage: () => mark("ws.first_msg"), + onFirstState: () => { + mark("ui.first_state"); + // Defer slightly so any synchronous render finishes first. + setTimeout(_post, 100); + }, + flush: _post, +}; diff --git a/src/py/bbctrl/Comm.py b/src/py/bbctrl/Comm.py index b7dc27e..f723f84 100644 --- a/src/py/bbctrl/Comm.py +++ b/src/py/bbctrl/Comm.py @@ -223,6 +223,10 @@ class Comm(object): self.ctrl.mach.process_log(msg) elif 'firmware' in msg: self.log.info('AVR firmware rebooted') + try: + import bbctrl.Trace as _T + _T.mark('avr.firmware_rebooted') + except Exception: pass self.connect() else: self._update_state(msg) diff --git a/src/py/bbctrl/Ctrl.py b/src/py/bbctrl/Ctrl.py index f1e0e18..ede7c52 100644 --- a/src/py/bbctrl/Ctrl.py +++ b/src/py/bbctrl/Ctrl.py @@ -28,10 +28,12 @@ import os import time import bbctrl +import bbctrl.Trace as Trace class Ctrl(object): def __init__(self, args, ioloop, id): + Trace.mark('ctrl.init.start', id=id or '') self.args = args self.ioloop = bbctrl.IOLoop(ioloop) self.id = id @@ -43,34 +45,52 @@ class Ctrl(object): if args.demo: log_path = self.get_path(filename = 'bbctrl.log') else: log_path = args.log self.log = bbctrl.log.Log(args, self.ioloop, log_path) + Trace.mark('ctrl.log_open') self.state = bbctrl.State(self) self.config = bbctrl.Config(self) + Trace.mark('ctrl.state_config') self.log.get('Ctrl').info('Starting %s' % self.id) try: - if args.demo: self.avr = bbctrl.AVREmu(self) - else: self.avr = bbctrl.AVR(self) + with Trace.span('ctrl.avr'): + if args.demo: self.avr = bbctrl.AVREmu(self) + else: self.avr = bbctrl.AVR(self) - self.i2c = bbctrl.I2C(args.i2c_port, args.demo) - self.lcd = bbctrl.LCD(self) - self.mach = bbctrl.Mach(self, self.avr) - self.preplanner = bbctrl.Preplanner(self) - if not args.demo: self.jog = bbctrl.Jog(self) - self.pwr = bbctrl.Pwr(self) - self.hooks = bbctrl.Hooks(self) - self.aux = bbctrl.AuxAxis(self) + with Trace.span('ctrl.i2c'): + self.i2c = bbctrl.I2C(args.i2c_port, args.demo) + with Trace.span('ctrl.lcd'): + self.lcd = bbctrl.LCD(self) + with Trace.span('ctrl.mach'): + self.mach = bbctrl.Mach(self, self.avr) + with Trace.span('ctrl.preplanner'): + self.preplanner = bbctrl.Preplanner(self) + if not args.demo: + with Trace.span('ctrl.jog'): + self.jog = bbctrl.Jog(self) + with Trace.span('ctrl.pwr'): + self.pwr = bbctrl.Pwr(self) + with Trace.span('ctrl.hooks'): + self.hooks = bbctrl.Hooks(self) + with Trace.span('ctrl.aux'): + self.aux = bbctrl.AuxAxis(self) self._register_aux_hooks() - self.mach.connect() + with Trace.span('ctrl.mach.connect'): + self.mach.connect() self.lcd.add_new_page(bbctrl.MainLCDPage(self)) self.lcd.add_new_page(bbctrl.IPLCDPage(self.lcd)) os.environ['GCODE_SCRIPT_PATH'] = self.get_upload() - except Exception: self.log.get('Ctrl').exception('Internal error: Control initialization failed') + Trace.mark('ctrl.init.end') + Trace.sd_notify('STATUS=ctrl initialized\n') + + except Exception: + Trace.mark('ctrl.init.error') + self.log.get('Ctrl').exception('Internal error: Control initialization failed') def __del__(self): print('Ctrl deleted') diff --git a/src/py/bbctrl/Trace.py b/src/py/bbctrl/Trace.py new file mode 100644 index 0000000..56093a8 --- /dev/null +++ b/src/py/bbctrl/Trace.py @@ -0,0 +1,140 @@ +################################################################################ +# # +# Lightweight phase tracing for bbctrl restart / boot timing. # +# # +# Anchored at module import time. All timestamps are seconds since the # +# process anchor (monotonic). A wall-clock anchor is captured once so the # +# timeline can be aligned with journalctl / systemd-analyze. # +# # +# Set BBCTRL_TRACE=0 in the environment to disable all marks (no-op). # +# # +# Exposed by /api/diag/timing as JSON. # +# # +################################################################################ +"""Bbctrl restart / startup tracing. + +Usage: + import bbctrl.Trace as T + T.mark('proc.start') + with T.span('ctrl.avr.init'): + ... + +The timeline is also dumped on demand via /api/diag/timing. +""" + +import os +import time +import json +import threading + + +_ENABLED = os.environ.get('BBCTRL_TRACE', '1') != '0' + +_t0_monotonic = time.monotonic() +_t0_wall = time.time() +_lock = threading.Lock() +_events = [] # list of dicts: {t, name, fields} +_ui_timing = None # last timeline POSTed by the browser + + +def now(): + return time.monotonic() - _t0_monotonic + + +def mark(name, **fields): + """Record a single named event at the current monotonic time.""" + if not _ENABLED: + return + t = now() + ev = {'t': round(t, 4), 'name': name} + if fields: + ev['fields'] = fields + with _lock: + _events.append(ev) + # Also surface in the regular log stream so journalctl shows it. + try: + extras = '' + if fields: + extras = ' ' + ' '.join('%s=%s' % (k, v) for k, v in fields.items()) + print('TRACE +%.3fs %s%s' % (t, name, extras), flush=True) + except Exception: + pass + + +class span(object): + """Context manager that emits .start / .end with duration.""" + + def __init__(self, name, **fields): + self.name = name + self.fields = fields + self._t = None + + def __enter__(self): + if _ENABLED: + self._t = time.monotonic() + mark(self.name + '.start', **self.fields) + return self + + def __exit__(self, exc_type, exc, tb): + if _ENABLED and self._t is not None: + dur_ms = int((time.monotonic() - self._t) * 1000) + extra = dict(self.fields) + extra['dur_ms'] = dur_ms + if exc_type is not None: + extra['error'] = exc_type.__name__ + mark(self.name + '.end', **extra) + return False + + +def set_ui_timing(data): + global _ui_timing + _ui_timing = data + + +def timeline(): + with _lock: + events = list(_events) + return { + 'enabled': _ENABLED, + 't0_wall': _t0_wall, + 't0_iso': time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(_t0_wall)), + 'now': now(), + 'pid': os.getpid(), + 'events': events, + 'ui': _ui_timing, + } + + +def dump(path): + try: + with open(path, 'w') as f: + json.dump(timeline(), f, indent=2) + except Exception: + pass + + +# Sd_notify helper ------------------------------------------------------------- +# +# Allows bbctrl to tell systemd "I am ready" / "current status is X" so +# `systemctl status bbctrl` and `systemd-analyze critical-chain` reflect the +# actual application state instead of just exec start. +def sd_notify(state): + """Send a status line to systemd. Safe no-op when not under systemd.""" + addr = os.environ.get('NOTIFY_SOCKET') + if not addr: + return + try: + import socket + sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) + try: + # Abstract socket if it starts with '@' + target = '\0' + addr[1:] if addr.startswith('@') else addr + sock.sendto(state.encode('utf-8'), target) + finally: + sock.close() + except Exception: + pass + + +# Mark module-import time so even importing bbctrl shows up. +mark('trace.import') diff --git a/src/py/bbctrl/Web.py b/src/py/bbctrl/Web.py index e4c0557..d303055 100644 --- a/src/py/bbctrl/Web.py +++ b/src/py/bbctrl/Web.py @@ -885,6 +885,31 @@ class RemoteDiagnosticsHandler(bbctrl.APIHandler): 'message': e.reason or "Unknown" }) +class TimingHandler(bbctrl.APIHandler): + """Return the bbctrl process startup timeline as JSON. + + Includes monotonic-anchored events from bbctrl.Trace, the wall + clock anchor (so the timeline can be aligned with journalctl / + systemd-analyze output), and the most recent UI-side timing + payload posted by the browser. + """ + def get(self): + import bbctrl.Trace as _T + self.write_json(_T.timeline()) + + +class UITimingHandler(bbctrl.APIHandler): + """Browser posts its performance.now() marks here once per load.""" + def put_ok(self): + import bbctrl.Trace as _T + # self.json is parsed in APIHandler.prepare() + try: + _T.set_ui_timing(self.json) + _T.mark('ui.posted_timing', + marks=len(self.json.get('marks', []) or [])) + except Exception: pass + + # Base class for Web Socket connections class ClientConnection(object): def __init__(self, app): @@ -960,6 +985,12 @@ class SockJSConnection(ClientConnection, sockjs.tornado.SockJSConnection): ip = info.ip if 'X-Real-IP' in info.headers: ip = info.headers['X-Real-IP'] self.app.get_ctrl(id).log.get('Web').info('Connection from %s' % ip) + try: + if not getattr(self.app, '_first_ws', False): + self.app._first_ws = True + import bbctrl.Trace as _T + _T.mark('ws.first_open', ip=ip) + except Exception: pass super().on_open(id) @@ -989,6 +1020,8 @@ class Web(tornado.web.Application): handlers = [ (r'/websocket', WSConnection), + (r'/api/diag/timing', TimingHandler), + (r'/api/diag/timing/ui', UITimingHandler), (r'/api/log', LogHandler), (r'/api/message/(\d+)/ack', MessageAckHandler), (r'/api/bugreport', BugReportHandler), diff --git a/src/py/bbctrl/__init__.py b/src/py/bbctrl/__init__.py index 1c1c4d6..3d235cb 100644 --- a/src/py/bbctrl/__init__.py +++ b/src/py/bbctrl/__init__.py @@ -36,6 +36,13 @@ import datetime from pkg_resources import Requirement, resource_filename +# Trace must be imported before the rest of bbctrl so its monotonic +# anchor is the earliest reasonable point and so import-time costs of +# heavy submodules (camotics gplan.so, sockjs, tornado, etc.) are +# attributable in /api/diag/timing. +import bbctrl.Trace as Trace +Trace.mark('imports.bbctrl.start') + from bbctrl.RequestHandler import RequestHandler from bbctrl.APIHandler import APIHandler from bbctrl.FileHandler import FileHandler @@ -66,6 +73,8 @@ import bbctrl.v4l2 as v4l2 import bbctrl.Log as log import bbctrl.ObjGraph as ObjGraph +Trace.mark('imports.bbctrl.end') + ctrl = None @@ -169,19 +178,28 @@ def parse_args(): def run(): global ctrl + Trace.mark('run.enter') args = parse_args() + Trace.mark('args.parsed') # Set signal handler signal.signal(signal.SIGTERM, on_exit) # Create ioloop ioloop = tornado.ioloop.IOLoop.current() + Trace.mark('ioloop.created') # Set ObjGraph signal handler if args.debug: Debugger(ioloop, args.debug) # Start server - web = Web(args, ioloop) + with Trace.span('web.init'): + web = Web(args, ioloop) + Trace.mark('listen', port=args.port, addr=args.addr) + + # Notify systemd we are ready (no-op when not under systemd). + Trace.sd_notify('READY=1\nSTATUS=listening on %s:%d\n' % + (args.addr, args.port)) try: ioloop.start()