tractor/scripts/tractor-reap

238 lines
7.0 KiB
Python
Executable File

#!/usr/bin/env python3
# tractor: structured concurrent "actors".
# Copyright 2018-eternity Tyler Goodlet.
#
# SPDX-License-Identifier: AGPL-3.0-or-later
'''
`tractor-reap` — SC-polite zombie-subactor reaper +
optional `/dev/shm/` orphan-segment sweep.
Two cleanup phases (run in order when both are enabled):
1. **process reap** — finds `tractor` subactor processes
left alive after a `pytest` (or any tractor-app) run
that failed to fully cancel its actor tree, then sends
SIGINT with a bounded grace window before escalating
to SIGKILL.
2. **shm sweep** (`--shm` / `--shm-only`) — unlinks
`/dev/shm/<file>` entries owned by the current uid
that no live process has open (mmap'd or fd-held).
Needed because `tractor` disables
`mp.resource_tracker` (see `tractor.ipc._mp_bs`), so a
hard-crashing actor leaves leaked segments that
nothing else GCs.
3. **UDS sweep** (`--uds` / `--uds-only`) — unlinks
`${XDG_RUNTIME_DIR}/tractor/<name>@<pid>.sock` files
whose binder pid is dead (or the `1616` registry
sentinel). Needed because the IPC server's
`os.unlink()` cleanup lives in a `finally:` block
that doesn't always run on hard exits (SIGKILL,
escaped `KeyboardInterrupt`, etc.) — see issue #452.
Process-reap detection modes (auto-selected):
--parent <pid> : descendant-mode — kill procs whose
PPid == <pid>. Use when a parent
is still alive and you want to
scope the sweep precisely (e.g.
CI wrapper calling in from outside
pytest).
(default) : orphan-mode — kill procs with
PPid==1 (init-reparented) whose
cwd matches the repo root AND
whose cmdline contains `python`.
The cwd filter is what prevents
sweeping unrelated init-children.
Usage:
# process reap only (default)
scripts/tractor-reap
# process reap + shm sweep
scripts/tractor-reap --shm
# only the shm sweep, skip process reap
scripts/tractor-reap --shm-only
# process reap + shm + UDS sweep (the works)
scripts/tractor-reap --shm --uds
# only UDS sweep
scripts/tractor-reap --uds-only
# from inside a still-live supervisor
scripts/tractor-reap --parent 12345
# dry-run: list what would be reaped, don't act
scripts/tractor-reap -n
scripts/tractor-reap --shm --uds -n
'''
import argparse
import pathlib
import subprocess
import sys
def _repo_root() -> pathlib.Path:
'''
Use `git rev-parse --show-toplevel` when available;
fall back to the repo this script lives in.
'''
try:
out: str = subprocess.check_output(
['git', 'rev-parse', '--show-toplevel'],
stderr=subprocess.DEVNULL,
text=True,
).strip()
return pathlib.Path(out)
except (subprocess.CalledProcessError, FileNotFoundError):
return pathlib.Path(__file__).resolve().parent.parent
def main() -> int:
parser = argparse.ArgumentParser(
prog='tractor-reap',
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
'--parent', '-p',
type=int,
default=None,
help='descendant-mode: reap procs with PPid==<pid>',
)
parser.add_argument(
'--grace', '-g',
type=float,
default=3.0,
help='SIGINT grace window in seconds (default 3.0)',
)
parser.add_argument(
'--dry-run', '-n',
action='store_true',
help='list matched pids/paths but do not signal/unlink',
)
parser.add_argument(
'--shm',
action='store_true',
help=(
'after process reap, also unlink orphaned '
'/dev/shm segments owned by the current user '
'that no live process is mapping or holding open'
),
)
parser.add_argument(
'--shm-only',
action='store_true',
help='skip process reap; only do the shm sweep',
)
parser.add_argument(
'--uds',
action='store_true',
help=(
'after process reap, also unlink orphaned '
'${XDG_RUNTIME_DIR}/tractor/*.sock files '
'whose binder pid is dead (or the 1616 '
'registry sentinel). See issue #452.'
),
)
parser.add_argument(
'--uds-only',
action='store_true',
help='skip process reap + shm; only do the UDS sweep',
)
args = parser.parse_args()
# any *-only flag also skips the process reap phase
skip_proc_reap: bool = (
args.shm_only
or
args.uds_only
)
# import lazily so `--help` doesn't require the tractor
# package to be importable (e.g. when running from a
# shell not inside a venv).
repo = _repo_root()
sys.path.insert(0, str(repo))
from tractor._testing._reap import (
find_descendants,
find_orphans,
find_orphaned_shm,
find_orphaned_uds,
reap,
reap_shm,
reap_uds,
)
rc: int = 0
# --- phase 1: process reap (skipped under --*-only) ---
if not skip_proc_reap:
if args.parent is not None:
pids: list[int] = find_descendants(args.parent)
mode: str = f'descendants of PPid={args.parent}'
else:
pids = find_orphans(repo)
mode = f'orphans (PPid=1, cwd={repo})'
if not pids:
print(f'[tractor-reap] no {mode} to reap')
elif args.dry_run:
print(
f'[tractor-reap] dry-run — {mode}:\n {pids}'
)
else:
_, survivors = reap(pids, grace=args.grace)
if survivors:
rc = 1
# --- phase 2: shm sweep (opt-in) ---
if args.shm or args.shm_only:
leaked: list[str] = find_orphaned_shm()
if not leaked:
print(
'[tractor-reap] no orphaned /dev/shm '
'segments to sweep'
)
elif args.dry_run:
print(
f'[tractor-reap] dry-run — {len(leaked)} '
f'orphaned shm segment(s):\n {leaked}'
)
else:
_, errors = reap_shm(leaked)
if errors:
rc = 1
# --- phase 3: UDS sweep (opt-in) ---
if args.uds or args.uds_only:
leaked_uds: list[str] = find_orphaned_uds()
if not leaked_uds:
print(
'[tractor-reap] no orphaned UDS sock-files '
'to sweep'
)
elif args.dry_run:
print(
f'[tractor-reap] dry-run — {len(leaked_uds)} '
f'orphaned UDS sock-file(s):\n {leaked_uds}'
)
else:
_, errors = reap_uds(leaked_uds)
if errors:
rc = 1
# exit 0 if everything cleaned cleanly, else 1 — useful
# for CI health-check chaining.
return rc
if __name__ == '__main__':
raise SystemExit(main())