Add `acli.reap`, namespace `tractor_diag` cmds
Group all xontrib aliases under an `acli.` prefix
so xonsh prefix-completion treats them as a sub-cmd
group — `acli.<TAB>` lists the full set. No parent
`acli` cmd exists; the dot is purely naming.
Renames (incl `-` -> `_` in suffixes for shell-
identifier-friendliness):
- `pytree` -> `acli.pytree`
- `hung-dump` -> `acli.hung_dump`
- `bindspace-scan` -> `acli.bindspace_scan`
Add new `acli.reap` wrapping `scripts/tractor-reap`:
Deats,
- 3 opt-in phases via flags:
1. process reap — `find_orphans()` (default,
PPid=1 + cwd=repo + cmdline `python`) or
`find_descendants(--parent PID)`. SIGINT
first, SIGKILL after `--grace` (def 3.0s).
2. `/dev/shm` sweep (`--shm`/`--shm-only`) —
`find_orphaned_shm()` + `reap_shm()`. needed
bc `tractor` disables `mp.resource_tracker`.
3. UDS sock-file sweep (`--uds`/`--uds-only`) —
`find_orphaned_uds()` + `reap_uds()` for stale
`${XDG_RUNTIME_DIR}/tractor/<name>@<pid>.sock`
entries. See #452.
- `--dry-run` lists matches without signalling/
unlinking; survivor pids or sweep errors flip
the alias rc to `1`.
- lazy-imports `tractor._testing._reap` after
`git rev-parse --show-toplevel` (with
`Path(__file__).parent.parent` fallback) so the
contrib is loadable before the venv is on
`sys.path`.
- `argparse.SystemExit` on `-h`/bad-args is
caught + returned as the alias rc instead of
killing xonsh.
(this patch was generated in some part by [`claude-code`][claude-code-gh])
[claude-code-gh]: https://github.com/anthropics/claude-code
subint_forkserver_backend
parent
34f333a026
commit
cec6cc2a56
|
|
@ -1,15 +1,23 @@
|
|||
"""
|
||||
`xontrib_tractor_diag`: pytest/tractor diagnostic aliases.
|
||||
|
||||
All aliases live under the `acli.` namespace so xonsh's
|
||||
prefix-completion treats them as a sub-cmd group — type
|
||||
`acli.<TAB>` to see the full set.
|
||||
|
||||
Provides:
|
||||
- `pytree <pid|pgrep-pat>` psutil-backed proc tree,
|
||||
- `acli.pytree <pid|pgrep-pat>` psutil-backed proc tree,
|
||||
live + zombies split.
|
||||
- `hung-dump <pid|pat> [...]` kernel `wchan`/`stack` +
|
||||
- `acli.hung_dump <pid|pat> [...]` kernel `wchan`/`stack` +
|
||||
`py-spy dump` (incl `--locals`)
|
||||
for each pid in tree.
|
||||
- `bindspace-scan [<dir>]` find orphaned tractor UDS
|
||||
- `acli.bindspace_scan [<dir>]` find orphaned tractor UDS
|
||||
sock files (no live owner pid).
|
||||
default: `$XDG_RUNTIME_DIR/tractor`.
|
||||
- `acli.reap [opts]` SC-polite zombie-subactor
|
||||
reaper + optional `/dev/shm/`
|
||||
+ UDS sock-file sweeps.
|
||||
alias for `scripts/tractor-reap`.
|
||||
|
||||
Loading from repo root:
|
||||
xontrib load -p ./xontrib tractor_diag
|
||||
|
|
@ -36,7 +44,7 @@ except ImportError:
|
|||
psutil = None
|
||||
print(
|
||||
'[tractor-diag] `psutil` missing — '
|
||||
'pytree disabled, hung-dump uses pgrep fallback. '
|
||||
'acli.pytree disabled, acli.hung_dump uses pgrep fallback. '
|
||||
'`uv pip install psutil` for full functionality.'
|
||||
)
|
||||
|
||||
|
|
@ -157,7 +165,7 @@ def _pytree(args):
|
|||
severity-ordered buckets so leaked / defunct procs
|
||||
don't hide in the noise of normal `live` rows.
|
||||
|
||||
usage: pytree [--tree|-t] <pid|pgrep-pattern> [...]
|
||||
usage: acli.pytree [--tree|-t] <pid|pgrep-pattern> [...]
|
||||
|
||||
classification (per-proc, not per-tree):
|
||||
|
||||
|
|
@ -199,7 +207,7 @@ def _pytree(args):
|
|||
pos_args.append(a)
|
||||
|
||||
if not pos_args:
|
||||
print('usage: pytree [--tree|-t] <pid|pgrep-pattern> [...]')
|
||||
print('usage: acli.pytree [--tree|-t] <pid|pgrep-pattern> [...]')
|
||||
return 1
|
||||
if psutil is None:
|
||||
print('pytree requires psutil; install via `uv pip install psutil`')
|
||||
|
|
@ -378,14 +386,14 @@ def _hung_dump(args):
|
|||
kernel + python state for a hung pytest/tractor tree.
|
||||
walks all descendants of each `<pid|pgrep-pat>` arg.
|
||||
|
||||
usage: hung-dump <pid|pgrep-pattern> [...]
|
||||
usage: acli.hung_dump <pid|pgrep-pattern> [...]
|
||||
|
||||
note: `/proc/<pid>/stack` and `py-spy dump` typically
|
||||
require CAP_SYS_PTRACE — invoked via `sudo -n`. run
|
||||
`sudo true` first to cache creds.
|
||||
'''
|
||||
if not args:
|
||||
print('usage: hung-dump <pid|pgrep-pattern> [...]')
|
||||
print('usage: acli.hung_dump <pid|pgrep-pattern> [...]')
|
||||
return 1
|
||||
|
||||
# cache sudo creds upfront so per-pid `sudo -n` calls
|
||||
|
|
@ -465,7 +473,7 @@ def _bindspace_scan(args):
|
|||
(those whose embedded `<pid>` no longer corresponds to
|
||||
a live process).
|
||||
|
||||
usage: bindspace-scan [<dir>]
|
||||
usage: acli.bindspace_scan [<dir>]
|
||||
default: `$XDG_RUNTIME_DIR/tractor`
|
||||
(or `/run/user/<uid>/tractor`)
|
||||
'''
|
||||
|
|
@ -533,11 +541,203 @@ def _bindspace_scan(args):
|
|||
print(f'\nto unlink orphans:\n rm {unlink_cmd}')
|
||||
|
||||
|
||||
# --- acli.reap ------------------------------------------------
|
||||
|
||||
def _tractor_reap(args):
|
||||
'''
|
||||
SC-polite zombie-subactor reaper + optional `/dev/shm/`
|
||||
orphan-segment sweep + optional UDS sock-file sweep.
|
||||
|
||||
usage: acli.reap [-h] [--parent PID] [--grace SEC]
|
||||
[--dry-run] [--shm | --shm-only]
|
||||
[--uds | --uds-only]
|
||||
|
||||
phases (run in order when enabled):
|
||||
|
||||
1. process reap — finds tractor subactor procs left
|
||||
alive after a `pytest`/app run that failed to fully
|
||||
cancel its tree. Default = orphan-mode (PPid==1
|
||||
init-reparented procs whose cwd matches repo root
|
||||
AND cmdline contains `python`). With `--parent`,
|
||||
scopes to descendants of a specific live PID.
|
||||
SIGINT first, then SIGKILL after `--grace` (default
|
||||
3.0s).
|
||||
2. shm sweep (`--shm`/`--shm-only`) — unlinks
|
||||
`/dev/shm/<file>` entries owned by the current uid
|
||||
that no live process has open. Needed because
|
||||
`tractor` disables `mp.resource_tracker`.
|
||||
3. UDS sweep (`--uds`/`--uds-only`) — unlinks
|
||||
`${XDG_RUNTIME_DIR}/tractor/<name>@<pid>.sock`
|
||||
files whose binder pid is dead (or the `1616`
|
||||
registry sentinel). See issue #452.
|
||||
|
||||
Mirrors `scripts/tractor-reap` (use `-n`/`--dry-run`
|
||||
first to see what would be touched).
|
||||
|
||||
'''
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='acli.reap',
|
||||
description=_tractor_reap.__doc__,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument(
|
||||
'--parent', '-p',
|
||||
type=int,
|
||||
default=None,
|
||||
help='descendant-mode: reap procs with PPid==<pid>',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--grace', '-g',
|
||||
type=float,
|
||||
default=3.0,
|
||||
help='SIGINT grace window in seconds (default 3.0)',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run', '-n',
|
||||
action='store_true',
|
||||
help='list matched pids/paths but do not signal/unlink',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--shm',
|
||||
action='store_true',
|
||||
help='also unlink orphaned /dev/shm segments',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--shm-only',
|
||||
action='store_true',
|
||||
help='skip process reap; only do the shm sweep',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--uds',
|
||||
action='store_true',
|
||||
help='also unlink orphaned UDS sock-files',
|
||||
)
|
||||
parser.add_argument(
|
||||
'--uds-only',
|
||||
action='store_true',
|
||||
help='skip process reap + shm; only do the UDS sweep',
|
||||
)
|
||||
|
||||
try:
|
||||
ns = parser.parse_args(args)
|
||||
except SystemExit as se:
|
||||
# `argparse` raises SystemExit on `-h`/bad-args; let
|
||||
# xonsh treat it as a normal alias return code.
|
||||
return int(se.code) if se.code is not None else 0
|
||||
|
||||
skip_proc_reap: bool = (
|
||||
ns.shm_only
|
||||
or
|
||||
ns.uds_only
|
||||
)
|
||||
|
||||
# repo-root resolution: `git rev-parse --show-toplevel`
|
||||
# first, falling back to the xontrib file's parent of
|
||||
# parent. mirrors `scripts/tractor-reap._repo_root()`.
|
||||
try:
|
||||
repo_str: str = sp.check_output(
|
||||
['git', 'rev-parse', '--show-toplevel'],
|
||||
stderr=sp.DEVNULL,
|
||||
text=True,
|
||||
).strip()
|
||||
repo: Path = Path(repo_str)
|
||||
except (sp.CalledProcessError, FileNotFoundError):
|
||||
repo: Path = Path(__file__).resolve().parent.parent
|
||||
|
||||
# lazy-import the reap helpers since the package may not
|
||||
# have been on `sys.path` at xontrib-load time (e.g. the
|
||||
# contrib was sourced before activating the venv).
|
||||
import sys
|
||||
if str(repo) not in sys.path:
|
||||
sys.path.insert(0, str(repo))
|
||||
from tractor._testing._reap import (
|
||||
find_descendants,
|
||||
find_orphans,
|
||||
find_orphaned_shm,
|
||||
find_orphaned_uds,
|
||||
reap,
|
||||
reap_shm,
|
||||
reap_uds,
|
||||
)
|
||||
|
||||
rc: int = 0
|
||||
|
||||
# phase 1: process reap (skipped under `--*-only`)
|
||||
if not skip_proc_reap:
|
||||
if ns.parent is not None:
|
||||
pids: list = find_descendants(ns.parent)
|
||||
mode: str = f'descendants of PPid={ns.parent}'
|
||||
else:
|
||||
pids = find_orphans(repo)
|
||||
mode = f'orphans (PPid=1, cwd={repo})'
|
||||
|
||||
if not pids:
|
||||
print(f'[acli.reap] no {mode} to reap')
|
||||
elif ns.dry_run:
|
||||
print(
|
||||
f'[acli.reap] dry-run — {mode}:\n {pids}'
|
||||
)
|
||||
else:
|
||||
_, survivors = reap(pids, grace=ns.grace)
|
||||
if survivors:
|
||||
rc = 1
|
||||
|
||||
# phase 2: shm sweep (opt-in)
|
||||
if ns.shm or ns.shm_only:
|
||||
leaked: list = find_orphaned_shm()
|
||||
if not leaked:
|
||||
print(
|
||||
'[acli.reap] no orphaned /dev/shm '
|
||||
'segments to sweep'
|
||||
)
|
||||
elif ns.dry_run:
|
||||
print(
|
||||
f'[acli.reap] dry-run — {len(leaked)} '
|
||||
f'orphaned shm segment(s):\n {leaked}'
|
||||
)
|
||||
else:
|
||||
_, errors = reap_shm(leaked)
|
||||
if errors:
|
||||
rc = 1
|
||||
|
||||
# phase 3: UDS sweep (opt-in)
|
||||
if ns.uds or ns.uds_only:
|
||||
leaked_uds: list = find_orphaned_uds()
|
||||
if not leaked_uds:
|
||||
print(
|
||||
'[acli.reap] no orphaned UDS sock-files '
|
||||
'to sweep'
|
||||
)
|
||||
elif ns.dry_run:
|
||||
print(
|
||||
f'[acli.reap] dry-run — {len(leaked_uds)} '
|
||||
f'orphaned UDS sock-file(s):\n {leaked_uds}'
|
||||
)
|
||||
else:
|
||||
_, errors = reap_uds(leaked_uds)
|
||||
if errors:
|
||||
rc = 1
|
||||
|
||||
return rc
|
||||
|
||||
|
||||
# --- registration ---------------------------------------------
|
||||
|
||||
aliases['pytree'] = _pytree
|
||||
aliases['hung-dump'] = _hung_dump
|
||||
aliases['bindspace-scan'] = _bindspace_scan
|
||||
# all aliases under the `acli.` namespace so xonsh's prefix-
|
||||
# completion makes them feel like a sub-cmd group: type
|
||||
# `acli.<TAB>` and the full set is suggested. no parent
|
||||
# `acli` cmd exists — the dot is purely a naming convention.
|
||||
_TCLI_ALIASES: dict = {
|
||||
'acli.pytree': _pytree,
|
||||
'acli.hung_dump': _hung_dump,
|
||||
'acli.bindspace_scan': _bindspace_scan,
|
||||
'acli.reap': _tractor_reap,
|
||||
}
|
||||
|
||||
for _name, _fn in _TCLI_ALIASES.items():
|
||||
aliases[_name] = _fn
|
||||
|
||||
|
||||
# xontrib protocol hooks (for `xontrib load tractor_diag`).
|
||||
|
|
@ -547,6 +747,6 @@ def _load_xontrib_(xsh, **_):
|
|||
|
||||
|
||||
def _unload_xontrib_(xsh, **_):
|
||||
for name in ('pytree', 'hung-dump', 'bindspace-scan'):
|
||||
for name in _TCLI_ALIASES:
|
||||
aliases.pop(name, None)
|
||||
return {}
|
||||
|
|
|
|||
Loading…
Reference in New Issue