Add `_testing._reap` + auto-reap fixture
Zombie-subactor cleanup for the test suite, SC-polite discipline
(`SIGINT` first, bounded grace, `SIGKILL` only on survivors). Two parts:
a shared reaper module + an autouse session-end fixture that runs it.
Deats,
- new `tractor/_testing/_reap.py` (+230 LOC) — Linux- only reaper using
`/proc/<pid>/{status,cwd,cmdline}` inspection. Two detection modes:
- `find_descendants(parent_pid)` for the in-session case
(PPid-direct-match while pytest is still alive).
- `find_orphans(repo_root)` for the CLI / post- mortem case (`PPid==1`
reparented to init + `cwd` filter to repo root + `python` cmdline
filter).
- `reap(pids, *, grace=3.0, poll=0.25)` does the signal ladder: SIGINT
all, poll up to `grace` for exit, SIGKILL any survivors. Returns
`(signalled, killed)` for caller-side reporting.
- new `_reap_orphaned_subactors` session-scoped autouse fixture in
`tractor/_testing/pytest.py` — after `yield`, runs
`find_descendants(os.getpid())` + `reap(...)` so each pytest session
leaves no surviving forks.
- companion CLI scaffolding lives at `scripts/tractor-reap` (separate
commit) for the pytest-died-mid-session case where the in-session
fixture didn't get to run.
Also,
- promote `from tractor.spawn._spawn import SpawnMethodKey` to
module-top in `pytest.py` (was inline-imported inside
`pytest_generate_tests`), and reuse it in
`pytest_collection_modifyitems` to assert each `skipon_spawn_backend`
mark arg is a valid spawn-method literal — catches typos at collection
time.
- inline `# ?TODO` flags running these through the `try_set_backend`
checker for stronger validation.
Cross-refs `feedback_sc_graceful_cancel_first.md` for the
SIGINT-before-SIGKILL discipline rationale.
(this patch was generated in some part by [`claude-code`][claude-code-gh])
[claude-code-gh]: https://github.com/anthropics/claude-code
subint_forkserver_backend
parent
44bdb1697c
commit
eae478f3d5
|
|
@ -0,0 +1,230 @@
|
||||||
|
# tractor: structured concurrent "actors".
|
||||||
|
# Copyright 2018-eternity Tyler Goodlet.
|
||||||
|
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
'''
|
||||||
|
Zombie-subactor reaper — SC-polite (SIGINT first, SIGKILL
|
||||||
|
as last resort with a bounded grace window).
|
||||||
|
|
||||||
|
Shared implementation between the `tractor-reap` CLI
|
||||||
|
(`scripts/tractor-reap`) and the pytest session-scoped
|
||||||
|
auto-fixture that guards the test suite against leftover
|
||||||
|
subactor processes.
|
||||||
|
|
||||||
|
Design notes
|
||||||
|
------------
|
||||||
|
|
||||||
|
- Linux-only: reads `/proc/<pid>/{status,cwd,cmdline}`.
|
||||||
|
- Two detection modes:
|
||||||
|
|
||||||
|
1. **descendant-mode** — when invoked from a still-live
|
||||||
|
parent (e.g. a pytest session-end fixture), match by
|
||||||
|
`PPid == parent_pid`. Direct + precise; the target
|
||||||
|
PIDs are still reparented to the live pytest process
|
||||||
|
at teardown time, before pytest exits.
|
||||||
|
|
||||||
|
2. **orphan-mode** — when invoked after the parent died
|
||||||
|
(e.g. the `tractor-reap` CLI run post-Ctrl+C), match
|
||||||
|
by `PPid == 1` (reparented to init) AND `cwd ==
|
||||||
|
<repo-root>` AND cmdline contains `python`. The cwd
|
||||||
|
filter is what keeps the heuristic from sweeping up
|
||||||
|
unrelated init-children on the box.
|
||||||
|
|
||||||
|
- Escalation: for every matched PID, SIGINT, poll for up
|
||||||
|
to `grace` seconds, then SIGKILL any survivors. The
|
||||||
|
two-phase pattern is the SC-graceful-cancel discipline
|
||||||
|
documented in `feedback_sc_graceful_cancel_first.md` —
|
||||||
|
we want the subactor runtime to run its trio cancel
|
||||||
|
shield + IPC teardown paths where it can.
|
||||||
|
|
||||||
|
'''
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import signal
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
def _read_status_ppid(pid: int) -> int | None:
|
||||||
|
'''
|
||||||
|
Return the parent-pid from `/proc/<pid>/status` or
|
||||||
|
`None` if the proc went away / is unreadable.
|
||||||
|
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
with open(f'/proc/{pid}/status') as f:
|
||||||
|
for line in f:
|
||||||
|
if line.startswith('PPid:'):
|
||||||
|
return int(line.split()[1])
|
||||||
|
except (FileNotFoundError, PermissionError, ProcessLookupError):
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _read_cwd(pid: int) -> str | None:
|
||||||
|
try:
|
||||||
|
return os.readlink(f'/proc/{pid}/cwd')
|
||||||
|
except (FileNotFoundError, PermissionError, ProcessLookupError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _read_cmdline(pid: int) -> str:
|
||||||
|
try:
|
||||||
|
with open(f'/proc/{pid}/cmdline', 'rb') as f:
|
||||||
|
return f.read().replace(b'\0', b' ').decode(errors='replace')
|
||||||
|
except (FileNotFoundError, PermissionError, ProcessLookupError):
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
|
def _iter_live_pids() -> list[int]:
|
||||||
|
'''
|
||||||
|
Enumerate currently-alive pids from `/proc`.
|
||||||
|
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
entries: list[str] = os.listdir('/proc')
|
||||||
|
except OSError:
|
||||||
|
return []
|
||||||
|
return [int(e) for e in entries if e.isdigit()]
|
||||||
|
|
||||||
|
|
||||||
|
def find_descendants(
|
||||||
|
parent_pid: int,
|
||||||
|
) -> list[int]:
|
||||||
|
'''
|
||||||
|
PIDs whose `PPid == parent_pid` — i.e. direct
|
||||||
|
children of the given pid. Used by the pytest
|
||||||
|
session-end fixture where `parent_pid` is still
|
||||||
|
alive as the pytest-python process.
|
||||||
|
|
||||||
|
'''
|
||||||
|
return [
|
||||||
|
pid
|
||||||
|
for pid in _iter_live_pids()
|
||||||
|
if _read_status_ppid(pid) == parent_pid
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def find_orphans(
|
||||||
|
repo_root: pathlib.Path,
|
||||||
|
) -> list[int]:
|
||||||
|
'''
|
||||||
|
PIDs that are:
|
||||||
|
|
||||||
|
- reparented to init (`PPid == 1`),
|
||||||
|
- have `cwd == <repo_root>`,
|
||||||
|
- and have a `python` in their cmdline.
|
||||||
|
|
||||||
|
This is the "pytest-died-mid-session" case where the
|
||||||
|
subactor forks got reparented. The cwd filter is the
|
||||||
|
critical bit that keeps us from sweeping up unrelated
|
||||||
|
init-children on the box.
|
||||||
|
|
||||||
|
'''
|
||||||
|
repo: str = str(repo_root)
|
||||||
|
hits: list[int] = []
|
||||||
|
for pid in _iter_live_pids():
|
||||||
|
if _read_status_ppid(pid) != 1:
|
||||||
|
continue
|
||||||
|
cwd: str | None = _read_cwd(pid)
|
||||||
|
if cwd != repo:
|
||||||
|
continue
|
||||||
|
cmd: str = _read_cmdline(pid)
|
||||||
|
if 'python' not in cmd:
|
||||||
|
continue
|
||||||
|
hits.append(pid)
|
||||||
|
return hits
|
||||||
|
|
||||||
|
|
||||||
|
def reap(
|
||||||
|
pids: list[int],
|
||||||
|
*,
|
||||||
|
grace: float = 3.0,
|
||||||
|
poll: float = 0.25,
|
||||||
|
log=print,
|
||||||
|
) -> tuple[list[int], list[int]]:
|
||||||
|
'''
|
||||||
|
Deliver SIGINT to each pid, wait up to `grace`
|
||||||
|
seconds for them to exit, then SIGKILL any that
|
||||||
|
survive.
|
||||||
|
|
||||||
|
Returns `(signalled, survivors_killed)` so callers
|
||||||
|
can report / assert.
|
||||||
|
|
||||||
|
`log` is the logger function for user-visible
|
||||||
|
progress lines — default `print`; pytest fixture
|
||||||
|
swaps it for a `pytest`-friendly writer.
|
||||||
|
|
||||||
|
'''
|
||||||
|
if not pids:
|
||||||
|
return ([], [])
|
||||||
|
|
||||||
|
signalled: list[int] = []
|
||||||
|
for pid in pids:
|
||||||
|
try:
|
||||||
|
os.kill(pid, signal.SIGINT)
|
||||||
|
signalled.append(pid)
|
||||||
|
except ProcessLookupError:
|
||||||
|
# raced — already gone
|
||||||
|
pass
|
||||||
|
|
||||||
|
if signalled:
|
||||||
|
log(
|
||||||
|
f'[tractor-reap] SIGINT → {len(signalled)} '
|
||||||
|
f'proc(s): {signalled}'
|
||||||
|
)
|
||||||
|
|
||||||
|
deadline: float = time.monotonic() + grace
|
||||||
|
while time.monotonic() < deadline:
|
||||||
|
time.sleep(poll)
|
||||||
|
alive: list[int] = [
|
||||||
|
pid for pid in signalled if _is_alive(pid)
|
||||||
|
]
|
||||||
|
if not alive:
|
||||||
|
return (signalled, [])
|
||||||
|
|
||||||
|
survivors: list[int] = [
|
||||||
|
pid for pid in signalled if _is_alive(pid)
|
||||||
|
]
|
||||||
|
if survivors:
|
||||||
|
log(
|
||||||
|
f'[tractor-reap] SIGKILL (after {grace}s '
|
||||||
|
f'grace) → {survivors}'
|
||||||
|
)
|
||||||
|
for pid in survivors:
|
||||||
|
try:
|
||||||
|
os.kill(pid, signal.SIGKILL)
|
||||||
|
except ProcessLookupError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return (signalled, survivors)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_alive(pid: int) -> bool:
|
||||||
|
'''
|
||||||
|
True iff `/proc/<pid>` still exists AND the proc
|
||||||
|
isn't already a zombie (Z state).
|
||||||
|
|
||||||
|
'''
|
||||||
|
try:
|
||||||
|
with open(f'/proc/{pid}/status') as f:
|
||||||
|
for line in f:
|
||||||
|
if line.startswith('State:'):
|
||||||
|
# e.g. 'State:\tZ (zombie)'
|
||||||
|
return 'Z' not in line.split()[1]
|
||||||
|
except (FileNotFoundError, ProcessLookupError):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
@ -32,6 +32,7 @@ from typing import (
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import tractor
|
import tractor
|
||||||
|
from tractor.spawn._spawn import SpawnMethodKey
|
||||||
import trio
|
import trio
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -274,7 +275,12 @@ def pytest_collection_modifyitems(
|
||||||
default_reason: str = f'Borked on --spawn-backend={backend!r}'
|
default_reason: str = f'Borked on --spawn-backend={backend!r}'
|
||||||
for item in items:
|
for item in items:
|
||||||
for mark in item.iter_markers(name='skipon_spawn_backend'):
|
for mark in item.iter_markers(name='skipon_spawn_backend'):
|
||||||
if backend in mark.args:
|
skip_backends: tuple[str] = mark.args
|
||||||
|
for skip_backend in skip_backends:
|
||||||
|
assert skip_backend in get_args(SpawnMethodKey)
|
||||||
|
# ?TODO, run these through the try-set-backend checker to
|
||||||
|
# avoid typos?
|
||||||
|
if backend in skip_backends:
|
||||||
reason: str = mark.kwargs.get(
|
reason: str = mark.kwargs.get(
|
||||||
'reason',
|
'reason',
|
||||||
default_reason,
|
default_reason,
|
||||||
|
|
@ -285,6 +291,42 @@ def pytest_collection_modifyitems(
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(
|
||||||
|
scope='session',
|
||||||
|
autouse=True,
|
||||||
|
)
|
||||||
|
def _reap_orphaned_subactors():
|
||||||
|
'''
|
||||||
|
Session-scoped autouse fixture: after the whole test
|
||||||
|
session finishes, SIGINT any subactor processes still
|
||||||
|
parented to this `pytest` process, wait a bounded
|
||||||
|
grace window, then SIGKILL survivors.
|
||||||
|
|
||||||
|
Rationale: under fork-based spawn backends (notably
|
||||||
|
`subint_forkserver`), a test that times out or bails
|
||||||
|
mid-teardown can leave subactor forks alive. Without
|
||||||
|
this reap, they linger across sessions and compete
|
||||||
|
for ports / inherit pytest's capture-pipe fds — which
|
||||||
|
flakifies later tests. SC-polite discipline: SIGINT
|
||||||
|
first to let the subactor's trio cancel shield + IPC
|
||||||
|
teardown paths run before we escalate.
|
||||||
|
|
||||||
|
Matching companion CLI: `scripts/tractor-reap` for
|
||||||
|
the pytest-died-mid-session case.
|
||||||
|
|
||||||
|
'''
|
||||||
|
import os
|
||||||
|
parent_pid: int = os.getpid()
|
||||||
|
yield
|
||||||
|
from tractor._testing._reap import (
|
||||||
|
find_descendants,
|
||||||
|
reap,
|
||||||
|
)
|
||||||
|
pids: list[int] = find_descendants(parent_pid)
|
||||||
|
if pids:
|
||||||
|
reap(pids, grace=3.0)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope='session')
|
@pytest.fixture(scope='session')
|
||||||
def debug_mode(
|
def debug_mode(
|
||||||
request: pytest.FixtureRequest,
|
request: pytest.FixtureRequest,
|
||||||
|
|
@ -398,7 +440,6 @@ def pytest_generate_tests(
|
||||||
# drive the valid-backend set from the canonical `Literal` so
|
# drive the valid-backend set from the canonical `Literal` so
|
||||||
# adding a new spawn backend (e.g. `'subint'`) doesn't require
|
# adding a new spawn backend (e.g. `'subint'`) doesn't require
|
||||||
# touching the harness.
|
# touching the harness.
|
||||||
from tractor.spawn._spawn import SpawnMethodKey
|
|
||||||
assert spawn_backend in get_args(SpawnMethodKey)
|
assert spawn_backend in get_args(SpawnMethodKey)
|
||||||
|
|
||||||
# NOTE: used-to-be-used-to dyanmically parametrize tests for when
|
# NOTE: used-to-be-used-to dyanmically parametrize tests for when
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue