""" `xontrib_tractor_diag`: pytest/tractor diagnostic aliases. All aliases live under the `acli.` namespace so xonsh's prefix-completion treats them as a sub-cmd group — type `acli.` to see the full set. Provides: - `acli.ptree ` psutil-backed proc tree, live + zombies split. - `acli.hung_dump [...]` kernel `wchan`/`stack` + `py-spy dump` (incl `--locals`) for each pid in tree. - `acli.bindspace_scan [|]` find orphaned tractor UDS sock files (no live owner pid). bare name -> `$XDG_RUNTIME_DIR/` (e.g. `piker`, `tractor`); path -> use as-is. default: `$XDG_RUNTIME_DIR/tractor`. - `acli.dump_all [--out-dir] full snapshot bundle — [--label]` ptree + hung_dump + bindspace written to a timestamped dir for sharing / AI introspection. - `acli.reap [opts]` SC-polite zombie-subactor reaper + optional `/dev/shm/` + UDS sock-file sweeps. alias for `scripts/tractor-reap`. - `acli.watch [-n SEC] run a callable alias in [alias-args]` an alt-screen loop with flicker-free repaint (cursor-home + per-line EL + post-draw erase-down). Loading from repo root: xontrib load -p ./xontrib tractor_diag Or source directly: source ./xontrib/tractor_diag.xsh Pipe-to-paste idiom (xonsh): acli.hung_dump pytest |t /tmp/hung.log The diagnostic core lives in `tractor._testing.trace` so it can also be invoked from inside pytest tests (e.g. via `fail_after_w_trace` / `afk_alarm_w_trace` capture-on-hang helpers) — these aliases are just thin terminal wrappers. Requires `psutil` for full functionality (`ptree` and the `hung_dump` tree-walk). Falls back to `pgrep -P` recursion if missing. """ import os import sys import signal import time from typing import ( Callable, ) from pathlib import Path from tractor._testing.trace import ( dump_all as _dump_all, dump_hung_state, dump_proc_tree, resolve_pids, scan_bindspace, ) @aliases.unthreadable def watch( args: list[str], ) -> int: ''' A per-term optimized `watch`-like alias for xonsh that runs an arbitrary callable alias in a loop inside the alt-screen buffer. Ctrl-C returns to a pristine shell, SIGWINCH triggers a full redraw, and the per-frame draw uses cursor-home + per-line EL + post-draw erase-down so the loop is flicker- free even when individual lines shrink or grow between frames. usage: acli.watch [-n SEC] [alias-args]... Examples: acli.watch acli.ptree pytest acli.watch -n 1.0 acli.bindspace_scan piker acli.watch acli.hung_dump pytest Only callable aliases (Python functions registered in `aliases`) are supported. Subprocess-style aliases raise an error — wrap them in a thin callable if you need watching. Output capture: the watched alias's stdout is redirected into a `StringIO` per frame so we can post-process it (insert `\033[K` before each `\n`). Aliases that write directly to `sys.stdout.buffer` or `os.write(1, ...)` bypass capture; for those the EL-fix won't apply but the loop still functions. ''' import argparse, io from contextlib import redirect_stdout parser = argparse.ArgumentParser( prog='acli.watch', description=watch.__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument( '-n', '--interval', type=float, default=0.3, help='poll interval in seconds (default: 0.3)', ) parser.add_argument( 'alias', help='name of a registered xonsh callable alias', ) parser.add_argument( 'alias_args', nargs=argparse.REMAINDER, help='args forwarded to the watched alias', ) try: ns = parser.parse_args(args) except SystemExit as se: return int(se.code) if se.code is not None else 0 raw = aliases.get(ns.alias) if raw is None: print( f'[acli.watch] no such alias: {ns.alias!r}' ) return 1 # xonsh stores callable aliases as a bare callable # OR wraps them in `[fn, *preset_args]` (depending # on registration path / version). Unwrap both. fn: Callable|None = None preset_args: list = [] if callable(raw): fn = raw elif ( isinstance(raw, list) and raw and callable(raw[0]) ): fn = raw[0] preset_args = list(raw[1:]) if fn is None: kind: str = type(raw).__name__ print( f'[acli.watch] alias {ns.alias!r} is not a ' f'callable alias (got {kind}); ' f'subprocess-style aliases not supported' ) return 1 _FD: int = sys.stdout.fileno() need_full_clear: bool = False def _on_winch(signum, frame): nonlocal need_full_clear need_full_clear = True prev_winch = signal.signal( signal.SIGWINCH, _on_winch, ) prev_sigint = signal.signal( signal.SIGINT, signal.default_int_handler, ) os.write(_FD, b'\033[?1049h\033[?25l') try: while True: buf = io.StringIO() with redirect_stdout(buf): fn(preset_args + ns.alias_args) if need_full_clear: os.write(_FD, b'\033[H\033[2J') need_full_clear = False else: os.write(_FD, b'\033[H') # `\033[K` (EL) before each newline erases # any stale tail chars left by a longer # prior-frame version of the same line. text: str = buf.getvalue() painted: bytes = ( text.replace('\n', '\033[K\n').encode() ) os.write(_FD, painted) os.write(_FD, b'\033[J') time.sleep(ns.interval) except KeyboardInterrupt: pass finally: os.write(_FD, b'\033[?25h\033[?1049l') signal.signal(signal.SIGWINCH, prev_winch) signal.signal(signal.SIGINT, prev_sigint) return 0 # --- ptree ---------------------------------------------------- def _ptree( args: list[str], ): ''' psutil-backed proc tree; per-proc classification into severity-ordered buckets so leaked / defunct procs don't hide in the noise of normal `live` rows. usage: acli.ptree [--tree|-t] [...] See `tractor._testing.trace.dump_proc_tree()` for the bucket semantics + classification details. To watch this live with flicker-free repaint (alt-screen, per-line EL, SIGWINCH-aware): .. code-block:: xonsh acli.watch acli.ptree pytest ''' flag_tree: bool = False pos_args: list = [] for a in args: if a in ('--tree', '-t'): flag_tree = True else: pos_args.append(a) if not pos_args: print('usage: acli.ptree [--tree|-t] [...]') return 1 roots: list = [] for a in pos_args: roots.extend(resolve_pids(a)) roots = sorted(set(roots)) if not roots: print(f'(no procs match: {pos_args})') return 1 print(dump_proc_tree(roots, flag_tree=flag_tree), end='') # --- hung-dump ----------------------------------------------- def _hung_dump(args): ''' kernel + python state for a hung pytest/tractor tree. walks all descendants of each `` arg. usage: acli.hung_dump [...] note: `/proc//stack` and `py-spy dump` typically require CAP_SYS_PTRACE — invoked via `sudo -n`. If sudo isn't cached this alias prompts (via `sudo -v`); for the non-interactive equivalent see `tractor._testing.trace.dump_hung_state(allow_sudo_prompt=False)`. ''' if not args: print('usage: acli.hung_dump [...]') return 1 roots: list = [] for a in args: roots.extend(resolve_pids(a)) roots = sorted(set(roots)) if not roots: print(f'(no procs match: {args})') return 1 print( dump_hung_state(roots, allow_sudo_prompt=True), end='', ) # --- bindspace-scan ------------------------------------------ def _bindspace_scan(args): ''' Scan a tractor UDS bindspace dir for orphan sock files. usage: acli.bindspace_scan [|] See `tractor._testing.trace.scan_bindspace()` for full arg semantics + output-bucket details. ''' arg: str | None = args[0] if args else None print(scan_bindspace(arg), end='') # --- dump-all (snapshot bundle) ------------------------------ def _dump_all_alias(args): ''' Capture a full diag snapshot bundle for a hung proc-tree into a timestamped directory for offline / AI inspection. usage: acli.dump_all [--label