From eae478f3d55509c9bea4f3449e988efc8f8eb47e Mon Sep 17 00:00:00 2001 From: goodboy Date: Sat, 25 Apr 2026 00:05:58 -0400 Subject: [PATCH] Add `_testing._reap` + auto-reap fixture MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zombie-subactor cleanup for the test suite, SC-polite discipline (`SIGINT` first, bounded grace, `SIGKILL` only on survivors). Two parts: a shared reaper module + an autouse session-end fixture that runs it. Deats, - new `tractor/_testing/_reap.py` (+230 LOC) — Linux- only reaper using `/proc//{status,cwd,cmdline}` inspection. Two detection modes: - `find_descendants(parent_pid)` for the in-session case (PPid-direct-match while pytest is still alive). - `find_orphans(repo_root)` for the CLI / post- mortem case (`PPid==1` reparented to init + `cwd` filter to repo root + `python` cmdline filter). - `reap(pids, *, grace=3.0, poll=0.25)` does the signal ladder: SIGINT all, poll up to `grace` for exit, SIGKILL any survivors. Returns `(signalled, killed)` for caller-side reporting. - new `_reap_orphaned_subactors` session-scoped autouse fixture in `tractor/_testing/pytest.py` — after `yield`, runs `find_descendants(os.getpid())` + `reap(...)` so each pytest session leaves no surviving forks. - companion CLI scaffolding lives at `scripts/tractor-reap` (separate commit) for the pytest-died-mid-session case where the in-session fixture didn't get to run. Also, - promote `from tractor.spawn._spawn import SpawnMethodKey` to module-top in `pytest.py` (was inline-imported inside `pytest_generate_tests`), and reuse it in `pytest_collection_modifyitems` to assert each `skipon_spawn_backend` mark arg is a valid spawn-method literal — catches typos at collection time. - inline `# ?TODO` flags running these through the `try_set_backend` checker for stronger validation. Cross-refs `feedback_sc_graceful_cancel_first.md` for the SIGINT-before-SIGKILL discipline rationale. (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- tractor/_testing/_reap.py | 230 +++++++++++++++++++++++++++++++++++++ tractor/_testing/pytest.py | 45 +++++++- 2 files changed, 273 insertions(+), 2 deletions(-) create mode 100644 tractor/_testing/_reap.py diff --git a/tractor/_testing/_reap.py b/tractor/_testing/_reap.py new file mode 100644 index 00000000..3e2309ff --- /dev/null +++ b/tractor/_testing/_reap.py @@ -0,0 +1,230 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Zombie-subactor reaper — SC-polite (SIGINT first, SIGKILL +as last resort with a bounded grace window). + +Shared implementation between the `tractor-reap` CLI +(`scripts/tractor-reap`) and the pytest session-scoped +auto-fixture that guards the test suite against leftover +subactor processes. + +Design notes +------------ + +- Linux-only: reads `/proc//{status,cwd,cmdline}`. +- Two detection modes: + + 1. **descendant-mode** — when invoked from a still-live + parent (e.g. a pytest session-end fixture), match by + `PPid == parent_pid`. Direct + precise; the target + PIDs are still reparented to the live pytest process + at teardown time, before pytest exits. + + 2. **orphan-mode** — when invoked after the parent died + (e.g. the `tractor-reap` CLI run post-Ctrl+C), match + by `PPid == 1` (reparented to init) AND `cwd == + ` AND cmdline contains `python`. The cwd + filter is what keeps the heuristic from sweeping up + unrelated init-children on the box. + +- Escalation: for every matched PID, SIGINT, poll for up + to `grace` seconds, then SIGKILL any survivors. The + two-phase pattern is the SC-graceful-cancel discipline + documented in `feedback_sc_graceful_cancel_first.md` — + we want the subactor runtime to run its trio cancel + shield + IPC teardown paths where it can. + +''' +from __future__ import annotations + +import os +import pathlib +import signal +import time + + +def _read_status_ppid(pid: int) -> int | None: + ''' + Return the parent-pid from `/proc//status` or + `None` if the proc went away / is unreadable. + + ''' + try: + with open(f'/proc/{pid}/status') as f: + for line in f: + if line.startswith('PPid:'): + return int(line.split()[1]) + except (FileNotFoundError, PermissionError, ProcessLookupError): + return None + return None + + +def _read_cwd(pid: int) -> str | None: + try: + return os.readlink(f'/proc/{pid}/cwd') + except (FileNotFoundError, PermissionError, ProcessLookupError): + return None + + +def _read_cmdline(pid: int) -> str: + try: + with open(f'/proc/{pid}/cmdline', 'rb') as f: + return f.read().replace(b'\0', b' ').decode(errors='replace') + except (FileNotFoundError, PermissionError, ProcessLookupError): + return '' + + +def _iter_live_pids() -> list[int]: + ''' + Enumerate currently-alive pids from `/proc`. + + ''' + try: + entries: list[str] = os.listdir('/proc') + except OSError: + return [] + return [int(e) for e in entries if e.isdigit()] + + +def find_descendants( + parent_pid: int, +) -> list[int]: + ''' + PIDs whose `PPid == parent_pid` — i.e. direct + children of the given pid. Used by the pytest + session-end fixture where `parent_pid` is still + alive as the pytest-python process. + + ''' + return [ + pid + for pid in _iter_live_pids() + if _read_status_ppid(pid) == parent_pid + ] + + +def find_orphans( + repo_root: pathlib.Path, +) -> list[int]: + ''' + PIDs that are: + + - reparented to init (`PPid == 1`), + - have `cwd == `, + - and have a `python` in their cmdline. + + This is the "pytest-died-mid-session" case where the + subactor forks got reparented. The cwd filter is the + critical bit that keeps us from sweeping up unrelated + init-children on the box. + + ''' + repo: str = str(repo_root) + hits: list[int] = [] + for pid in _iter_live_pids(): + if _read_status_ppid(pid) != 1: + continue + cwd: str | None = _read_cwd(pid) + if cwd != repo: + continue + cmd: str = _read_cmdline(pid) + if 'python' not in cmd: + continue + hits.append(pid) + return hits + + +def reap( + pids: list[int], + *, + grace: float = 3.0, + poll: float = 0.25, + log=print, +) -> tuple[list[int], list[int]]: + ''' + Deliver SIGINT to each pid, wait up to `grace` + seconds for them to exit, then SIGKILL any that + survive. + + Returns `(signalled, survivors_killed)` so callers + can report / assert. + + `log` is the logger function for user-visible + progress lines — default `print`; pytest fixture + swaps it for a `pytest`-friendly writer. + + ''' + if not pids: + return ([], []) + + signalled: list[int] = [] + for pid in pids: + try: + os.kill(pid, signal.SIGINT) + signalled.append(pid) + except ProcessLookupError: + # raced — already gone + pass + + if signalled: + log( + f'[tractor-reap] SIGINT → {len(signalled)} ' + f'proc(s): {signalled}' + ) + + deadline: float = time.monotonic() + grace + while time.monotonic() < deadline: + time.sleep(poll) + alive: list[int] = [ + pid for pid in signalled if _is_alive(pid) + ] + if not alive: + return (signalled, []) + + survivors: list[int] = [ + pid for pid in signalled if _is_alive(pid) + ] + if survivors: + log( + f'[tractor-reap] SIGKILL (after {grace}s ' + f'grace) → {survivors}' + ) + for pid in survivors: + try: + os.kill(pid, signal.SIGKILL) + except ProcessLookupError: + pass + + return (signalled, survivors) + + +def _is_alive(pid: int) -> bool: + ''' + True iff `/proc/` still exists AND the proc + isn't already a zombie (Z state). + + ''' + try: + with open(f'/proc/{pid}/status') as f: + for line in f: + if line.startswith('State:'): + # e.g. 'State:\tZ (zombie)' + return 'Z' not in line.split()[1] + except (FileNotFoundError, ProcessLookupError): + return False + return True diff --git a/tractor/_testing/pytest.py b/tractor/_testing/pytest.py index ef3cc9a7..c707e0db 100644 --- a/tractor/_testing/pytest.py +++ b/tractor/_testing/pytest.py @@ -32,6 +32,7 @@ from typing import ( import pytest import tractor +from tractor.spawn._spawn import SpawnMethodKey import trio @@ -274,7 +275,12 @@ def pytest_collection_modifyitems( default_reason: str = f'Borked on --spawn-backend={backend!r}' for item in items: for mark in item.iter_markers(name='skipon_spawn_backend'): - if backend in mark.args: + skip_backends: tuple[str] = mark.args + for skip_backend in skip_backends: + assert skip_backend in get_args(SpawnMethodKey) + # ?TODO, run these through the try-set-backend checker to + # avoid typos? + if backend in skip_backends: reason: str = mark.kwargs.get( 'reason', default_reason, @@ -285,6 +291,42 @@ def pytest_collection_modifyitems( break +@pytest.fixture( + scope='session', + autouse=True, +) +def _reap_orphaned_subactors(): + ''' + Session-scoped autouse fixture: after the whole test + session finishes, SIGINT any subactor processes still + parented to this `pytest` process, wait a bounded + grace window, then SIGKILL survivors. + + Rationale: under fork-based spawn backends (notably + `subint_forkserver`), a test that times out or bails + mid-teardown can leave subactor forks alive. Without + this reap, they linger across sessions and compete + for ports / inherit pytest's capture-pipe fds — which + flakifies later tests. SC-polite discipline: SIGINT + first to let the subactor's trio cancel shield + IPC + teardown paths run before we escalate. + + Matching companion CLI: `scripts/tractor-reap` for + the pytest-died-mid-session case. + + ''' + import os + parent_pid: int = os.getpid() + yield + from tractor._testing._reap import ( + find_descendants, + reap, + ) + pids: list[int] = find_descendants(parent_pid) + if pids: + reap(pids, grace=3.0) + + @pytest.fixture(scope='session') def debug_mode( request: pytest.FixtureRequest, @@ -398,7 +440,6 @@ def pytest_generate_tests( # drive the valid-backend set from the canonical `Literal` so # adding a new spawn backend (e.g. `'subint'`) doesn't require # touching the harness. - from tractor.spawn._spawn import SpawnMethodKey assert spawn_backend in get_args(SpawnMethodKey) # NOTE: used-to-be-used-to dyanmically parametrize tests for when