diff --git a/tractor/runtime/_runtime.py b/tractor/runtime/_runtime.py
index 9dcca501..38889229 100644
--- a/tractor/runtime/_runtime.py
+++ b/tractor/runtime/_runtime.py
@@ -873,9 +873,12 @@ class Actor:
if self._spawn_method in (
'trio',
'subint',
- # `subint_forkserver` parent-side sends a
- # `SpawnSpec` over IPC just like the other two
- # — fork child-side runtime is trio-native.
+ # `main_thread_forkserver` (and the future
+ # variant-2 `subint_forkserver`) parent-side
+ # sends a `SpawnSpec` over IPC just like the
+ # other two — fork child-side runtime is
+ # trio-native.
+ 'main_thread_forkserver',
'subint_forkserver',
):
diff --git a/tractor/spawn/_main_thread_forkserver.py b/tractor/spawn/_main_thread_forkserver.py
index b04bcb69..35c5efba 100644
--- a/tractor/spawn/_main_thread_forkserver.py
+++ b/tractor/spawn/_main_thread_forkserver.py
@@ -15,16 +15,19 @@
# along with this program. If not, see .
'''
-Fork-from-main-interp-worker-thread primitives.
+Variant-1 "main-thread forkserver" spawn backend (today's
+working impl) + the generic fork-from-main-interp-worker-thread
+primitives it's built on.
-Generic, tractor-spawn-backend-agnostic primitives for forking a
-child OS process via `os.fork()` from a regular `threading.Thread`
-attached to the main CPython interpreter. Builds the lowest layer
-that any "subint forkserver"-style spawn backend wants to compose
-on top of.
+Spawn-method key: `'main_thread_forkserver'`. The legacy
+`'subint_forkserver'` key currently aliases here too — see
+`tractor.spawn._subint_forkserver` for the future variant-2
+(subint-isolated-child runtime, gated on
+[jcrist/msgspec#1026](https://github.com/jcrist/msgspec/issues/1026))
+that key is reserved for.
-Why this module exists
-----------------------
+Background
+----------
Two empirical CPython properties drive the design:
@@ -42,27 +45,227 @@ Two empirical CPython properties drive the design:
`ai/conc-anal/subint_fork_from_main_thread_smoketest.py` on
py3.14.
-This module provides the working primitive set: spawn a worker
-thread, fork in it, retrieve the child pid back to the caller
-trio task, and offer a `trio.Process`-shaped shim around the raw
-pid so the existing `soft_kill`/`hard_reap` patterns from
-`_spawn.py` keep working unchanged.
+The fork-from-main-thread primitives below codify property (2)
+into a reusable surface: spawn a worker thread, fork in it,
+retrieve the child pid back to the caller trio task, and offer a
+`trio.Process`-shaped shim around the raw pid so the existing
+`soft_kill`/`hard_reap` patterns from `_spawn.py` keep working
+unchanged.
-Companion module
-----------------
+Design rationale — why a forkserver, and why in-process
+-------------------------------------------------------
-`tractor.spawn._subint_forkserver` builds tractor's
-`subint_forkserver` spawn backend on top of these primitives —
-the spawn-backend coroutine, the subint-specific `proc_kwargs`
-handling, the `_actor_child_main` invocation in the fork-child,
-and the eventual subint-hosted-trio-runtime arch (gated on
-[jcrist/msgspec#1026](https://github.com/jcrist/msgspec/issues/1026)).
-That module imports only the pieces it needs from here.
+Two design questions worth pinning down up front, since the
+naming intentionally evokes the stdlib `multiprocessing.forkserver`
+for comparison:
-What lives here vs. there
--------------------------
+**(1) Why a forkserver pattern at all, vs. forking directly
+from the trio task?**
-Here (truly generic, no tractor or subint dep):
+`os.fork()` is fundamentally hostile to trio: trio owns
+file descriptors, signal-wakeup-fds, threadpools, and an
+event loop with non-trivial post-fork lifecycle invariants
+(see python-trio/trio#1614 et al.). Forking a trio-running
+thread duplicates all that state into the child, which then
+either needs surgical reset (fragile) or has to immediately
+`exec()` (defeats the point of fork-without-exec). The
+*forkserver* sidesteps this by isolating the `os.fork()`
+call in a worker that has provably never entered trio — so
+the child inherits a clean, trio-free image.
+
+**(2) Why an in-process forkserver, vs. stdlib
+`multiprocessing.forkserver`?**
+
+The stdlib design solves the same "fork from clean state"
+problem by spinning up a **separate sidecar process** at
+first use of `mp.set_start_method('forkserver')`. The parent
+then IPC's each spawn request to that sidecar over a unix
+socket; the sidecar is the process that actually calls
+`os.fork()`. This works but pays for cleanliness with three
+costs:
+
+- **Sidecar lifecycle**: a second long-lived process per
+ parent, with its own start/stop/health-check semantics.
+- **IPC overhead per spawn**: every actor-spawn round-trips
+ an `mp` request message through a unix socket before any
+ child code runs.
+- **State isolation by process boundary**: the sidecar can't
+ share parent state at all — every spawn is a "cold" child
+ re-importing modules from disk.
+
+Once the variant-2 (subint-isolated child runtime) lands the
+in-process forkserver collapses all three costs:
+
+- no sidecar — the forkserver is just another thread,
+- spawn signal is a thread-local event/condition, not IPC,
+- child inherits the warm parent state (loaded modules,
+ populated caches, etc.) for free.
+
+For the full variant-2 picture see
+`tractor.spawn._subint_forkserver`'s docstring. Today (variant
+1) we already get costs 1 + 2 collapsed; cost 3 will land
+when msgspec#1026 unblocks isolated-mode subints.
+
+What survives the fork? — POSIX semantics
+-----------------------------------------
+
+A natural worry when forking from a parent that's running
+`trio.run()` on another thread: does that trio thread (and
+any other threads in the parent) keep running in the child?
+
+**No.** POSIX `fork()` only preserves the *calling* thread
+in the child. Every other thread in the parent — trio's
+runner thread, any `to_thread` cache threads, anything else
+— is gone the instant `fork()` returns in the child.
+
+Concretely, after the forkserver worker calls `os.fork()`:
+
+| thread | parent | child |
+|-----------------------|-----------|---------------|
+| forkserver worker | continues | sole survivor |
+| `trio.run()` thread | continues | gone |
+| any other thread | continues | gone |
+
+The forkserver worker becomes the new "main" execution
+context in the child; `trio.run()` and every other parent
+thread never executes a single instruction post-fork in the
+child.
+
+This is exactly *why* `os.fork()` is delegated to a
+dedicated worker thread that has provably never entered
+trio: we want that trio-free thread to be the surviving
+one in the child.
+
+That said, dead-thread *artifacts* still cross the fork
+boundary (canonical "fork in a multithreaded program is
+dangerous" — see `man pthread_atfork`). What persists, and
+how we handle each:
+
+- **Inherited file descriptors** — the dead trio thread's
+ epoll fd, signal-wakeup-fd, eventfds, sockets, IPC
+ pipes, pytest's capture-fds, etc. are all still in the
+ child's fd table (kernel-level inheritance). Handled by
+ `_close_inherited_fds()` in the child prelude — walks
+ `/proc/self/fd` and closes everything except stdio +
+ the channel pipe to the forkserver.
+- **Memory image** — trio's internal data structures
+ (scheduler, task queues, runner state) sit in COW
+ memory but nobody's executing them. Get GC'd /
+ overwritten when the child's fresh `trio.run()` boots.
+- **Python thread state** — handled automatically by
+ CPython. `PyOS_AfterFork_Child()` calls
+ `_PyThreadState_DeleteExceptCurrent()`, so dead
+ `PyThreadState` objects are cleaned and
+ `threading.enumerate()` returns just the surviving
+ thread.
+- **User-level locks (`threading.Lock`)** —
+ held-by-dead-thread state is the canonical fork hazard.
+ Not an issue in practice for tractor: trio doesn't hold
+ cross-thread locks across fork (its synchronization is
+ within the trio task system, which doesn't survive in
+ either direction). CPython's GIL is auto-reset by the
+ fork callback.
+
+FYI: how this dodges the `trio.run()` × `fork()` hazards
+--------------------------------------------------------
+
+`os.fork()` is famously hostile to `trio` (see
+python-trio/trio#1614 et al.) because trio owns several
+classes of process-global state that all break across the
+fork boundary in different ways. The forkserver-thread
+design dodges each class explicitly:
+
+- **Signal-wakeup-fd**: trio installs a wakeup-fd via
+ `signal.set_wakeup_fd()` on `trio.run()` startup so
+ signals can interrupt `epoll_wait`. The child inherits
+ this fd, but trio's runner that owns it is gone — so
+ any signal delivery in the child writes to a dead
+ reader. *Dodge*: the inherited wakeup-fd is closed by
+ `_close_inherited_fds()`, then the child's own
+ `trio.run()` installs a fresh one.
+- **`epoll`/`kqueue` instance**: trio's I/O backend holds
+ one. Inherited as a dead fd; same fix as above.
+- **Threadpool cache threads** (`trio.to_thread`): worker
+ threads with cached tstate. Don't exist in the child
+ (POSIX); cache state is meaningless garbage that gets
+ reset when the child's trio.run() initializes its own
+ thread cache.
+- **Cancel scopes / nurseries / open `trio.Process` /
+ open sockets**: these are trio-runtime objects, not
+ kernel objects. The runtime that owns them is gone in
+ the child, so the Python objects exist as zombie data
+ in COW memory and get overwritten as the child runs.
+ Inherited *kernel* fds those objects wrapped (sockets,
+ proc pipes) are caught by `_close_inherited_fds()`.
+- **`atexit` handlers**: trio doesn't register any that
+ would mis-fire post-fork; trio's lifetime-stack is
+ all `with`-block-scoped and dies with the runner.
+- **Foreign-language I/O state** (libcurl, OpenSSL session
+ caches, etc.): out of scope — same hazard as any
+ fork-without-exec; users layering those on top of
+ tractor need their own pthread_atfork handlers.
+
+Net effect: for the runtime surface tractor controls
+(trio + IPC layer + msgspec), the forkserver-thread
+isolation + `_close_inherited_fds()` cleanup gives the
+forked child a clean trio environment. Everything else
+falls under the standard fork-without-exec disclaimer.
+
+Implementation status
+---------------------
+
+- A dedicated main-interp worker thread owns all `os.fork()`
+ calls (never enters a subint). ✓ landed.
+- Parent actor's `trio.run()` lives **on the main interp**
+ for now (not a subint yet). The subint-hosted root
+ runtime is the variant-2 step gated on jcrist/msgspec#1026.
+- Spawn-request signal: trio task `→ to_thread.run_sync` to
+ the forkserver-worker thread. ✓ landed.
+- Forked child: runs `_actor_child_main` against a normal
+ trio runtime. ✓ landed.
+
+Validated by `tests/spawn/test_subint_forkserver.py` (file
+will be renamed to `test_main_thread_forkserver.py` in a
+follow-up) including the
+`test_subint_forkserver_spawn_basic` backend-tier check.
+
+Still-open work (tracked on tractor #379):
+
+- no cancellation / hard-kill stress coverage yet
+ (counterpart to `tests/test_subint_cancellation.py` for
+ the plain `subint` backend),
+- `child_sigint='trio'` mode (flag scaffolded below; default
+ is `'ipc'`). Originally intended as a manual SIGINT →
+ trio-cancel bridge, but investigation showed trio's
+ handler IS already correctly installed in the fork-child
+ subactor — the orphan-SIGINT hang is actually a separate
+ bug where trio's event loop stays wedged in `epoll_wait`
+ despite delivery. See
+ `ai/conc-anal/subint_forkserver_orphan_sigint_hang_issue.md`
+ for the full trace + fix directions. Once that root cause
+ is fixed, this flag may end up a no-op / doc-only mode.
+
+TODO — cleanup gated on msgspec PEP 684 support
+-----------------------------------------------
+
+Both worker-thread primitives below allocate a dedicated
+`threading.Thread` rather than using
+`trio.to_thread.run_sync()`. That's a cautious design
+rooted in three distinct-but-entangled issues (GIL
+starvation from legacy-config subints, tstate-recycling
+destroy race on trio cache threads, fork-from-main-tstate
+invariant). Some of those dissolve under PEP 684
+isolated-mode subints; one requires empirical re-testing
+to know.
+
+Full analysis + audit plan in
+`ai/conc-anal/subint_forkserver_thread_constraints_on_pep684_issue.md`,
+tracked at #450; gated on jcrist/msgspec#1026.
+
+What lives here
+---------------
+
+Truly generic primitives (tractor-spawn-backend-agnostic):
- `_close_inherited_fds()` — fd hygiene primitive
- `_format_child_exit()` — `waitpid()` status renderer
@@ -70,17 +273,20 @@ Here (truly generic, no tractor or subint dep):
- `fork_from_worker_thread()` — the core fork primitive
- `_ForkedProc` — trio-cancellable child-wait shim
-There (tractor-specific):
+The variant-1 spawn-backend coroutine on top:
-- `run_subint_in_worker_thread()` — subint primitive (companion
- to `fork_from_worker_thread` for the future-arch where the
- parent's trio runs in a subint)
-- `subint_forkserver_proc()` — the spawn-backend coroutine
- itself (SpawnSpec handshake, IPC wiring, lifecycle)
+- `main_thread_forkserver_proc()` — SpawnSpec handshake, IPC
+ wiring, lifecycle. Registered as the
+ `'main_thread_forkserver'` (and currently the legacy
+ `'subint_forkserver'`-aliased) entry in
+ `tractor.spawn._spawn._methods`.
See also
--------
+- `tractor.spawn._subint_forkserver` — variant-2 placeholder
+ module; reserved for the future subint-isolated-child
+ runtime once jcrist/msgspec#1026 unblocks.
- `tractor.spawn._subint_fork` — the stub for the
fork-from-non-main-subint strategy that DIDN'T work (kept
in-tree as documentation of the attempt + the CPython-level
@@ -95,17 +301,69 @@ See also
from __future__ import annotations
import os
import signal
+import sys
import threading
-from typing import Callable
+from functools import partial
+from typing import (
+ Any,
+ Callable,
+ Literal,
+ TYPE_CHECKING,
+)
import trio
+from trio import TaskStatus
from tractor.log import get_logger
+from tractor.msg import (
+ types as msgtypes,
+ pretty_struct,
+)
+from tractor.runtime._state import current_actor
+from tractor.runtime._portal import Portal
+from ._spawn import (
+ cancel_on_completion,
+ soft_kill,
+)
+from ._subint import _has_subints
+
+if TYPE_CHECKING:
+ from tractor.discovery._addr import UnwrappedAddress
+ from tractor.ipc import (
+ _server,
+ )
+ from tractor.runtime._runtime import Actor
+ from tractor.runtime._supervise import ActorNursery
log = get_logger('tractor')
+# Configurable child-side SIGINT handling for forkserver-spawned
+# subactors. Threaded through `main_thread_forkserver_proc`'s
+# `proc_kwargs` under the `'child_sigint'` key.
+#
+# - `'ipc'` (default, currently the only implemented mode):
+# child has NO trio-level SIGINT handler — trio.run() is on
+# the fork-inherited non-main thread, `signal.set_wakeup_fd()`
+# is main-thread-only. Cancellation flows exclusively via
+# the parent's `Portal.cancel_actor()` IPC path. Safe +
+# deterministic for nursery-structured apps where the parent
+# is always the cancel authority. Known gap: orphan
+# (post-parent-SIGKILL) children don't respond to SIGINT
+# — see `test_orphaned_subactor_sigint_cleanup_DRAFT`.
+#
+# - `'trio'` (**not yet implemented**): install a manual
+# SIGINT → trio-cancel bridge in the child's fork prelude
+# (pre-`trio.run()`) so external Ctrl-C reaches stuck
+# grandchildren even with a dead parent. Adds signal-
+# handling surface the `'ipc'` default cleanly avoids; only
+# pay for it when externally-interruptible children actually
+# matter (e.g. CLI tool grandchildren).
+ChildSigintMode = Literal['ipc', 'trio']
+_DEFAULT_CHILD_SIGINT: ChildSigintMode = 'ipc'
+
+
def _close_inherited_fds(
keep: frozenset[int] = frozenset({0, 1, 2}),
) -> int:
@@ -478,3 +736,238 @@ class _ForkedProc:
f'<_ForkedProc pid={self.pid} '
f'returncode={self._returncode}>'
)
+
+
+async def main_thread_forkserver_proc(
+ name: str,
+ actor_nursery: ActorNursery,
+ subactor: Actor,
+ errors: dict[tuple[str, str], Exception],
+
+ # passed through to actor main
+ bind_addrs: list[UnwrappedAddress],
+ parent_addr: UnwrappedAddress,
+ _runtime_vars: dict[str, Any],
+ *,
+ infect_asyncio: bool = False,
+ task_status: TaskStatus[Portal] = trio.TASK_STATUS_IGNORED,
+ proc_kwargs: dict[str, any] = {},
+
+) -> None:
+ '''
+ Spawn a subactor via `os.fork()` from a non-trio worker
+ thread (see `fork_from_worker_thread()`), with the forked
+ child running `tractor._child._actor_child_main()` and
+ connecting back via tractor's normal IPC handshake.
+
+ Supervision model mirrors `trio_proc()` — we manage a
+ real OS subprocess, so `Portal.cancel_actor()` +
+ `soft_kill()` on graceful teardown and `os.kill(SIGKILL)`
+ on hard-reap both apply directly (no
+ `_interpreters.destroy()` voodoo needed since the child
+ is in its own process).
+
+ The only real difference from `trio_proc` is the spawn
+ mechanism: fork from a known-clean main-interp worker
+ thread instead of `trio.lowlevel.open_process()`.
+
+ '''
+ if not _has_subints:
+ raise RuntimeError(
+ f'The {"main_thread_forkserver"!r} spawn backend '
+ f'requires Python 3.14+.\n'
+ f'Current runtime: {sys.version}'
+ )
+
+ # Backend-scoped config pulled from `proc_kwargs`. Using
+ # `proc_kwargs` (vs a first-class kwarg on this function)
+ # matches how other backends expose per-spawn tuning
+ # (`trio_proc` threads it to `trio.lowlevel.open_process`,
+ # etc.) and keeps `ActorNursery.start_actor(proc_kwargs=...)`
+ # as the single ergonomic entry point.
+ child_sigint: ChildSigintMode = proc_kwargs.get(
+ 'child_sigint',
+ _DEFAULT_CHILD_SIGINT,
+ )
+ if child_sigint not in ('ipc', 'trio'):
+ raise ValueError(
+ f'Invalid `child_sigint={child_sigint!r}` for '
+ f'`main_thread_forkserver` backend.\n'
+ f'Expected one of: {ChildSigintMode}.'
+ )
+ if child_sigint == 'trio':
+ raise NotImplementedError(
+ "`child_sigint='trio'` mode — trio-native SIGINT "
+ "plumbing in the fork-child — is scaffolded but "
+ "not yet implemented. See the xfail'd "
+ "`test_orphaned_subactor_sigint_cleanup_DRAFT` "
+ "and the TODO in this module's docstring."
+ )
+
+ uid: tuple[str, str] = subactor.aid.uid
+ loglevel: str | None = subactor.loglevel
+
+ # Closure captured into the fork-child's memory image.
+ # In the child this is the first post-fork Python code to
+ # run, on what was the fork-worker thread in the parent.
+ # `child_sigint` is captured here so the impl lands inside
+ # this function once the `'trio'` mode is wired up —
+ # nothing above this comment needs to change.
+ def _child_target() -> int:
+ # Dispatch on the captured SIGINT-mode closure var.
+ # Today only `'ipc'` is reachable (the `'trio'` branch
+ # is fenced off at the backend-entry guard above); the
+ # match is in place so the future `'trio'` impl slots
+ # in as a plain case arm without restructuring.
+ match child_sigint:
+ case 'ipc':
+ pass # <- current behavior: no child-side
+ # SIGINT plumbing; rely on parent
+ # `Portal.cancel_actor()` IPC path.
+ case 'trio':
+ # Unreachable today (see entry-guard above);
+ # this stub exists so that lifting the guard
+ # is the only change required to enable
+ # `'trio'` mode once the SIGINT wakeup-fd
+ # bridge is implemented.
+ raise NotImplementedError(
+ "`child_sigint='trio'` fork-prelude "
+ "plumbing not yet wired."
+ )
+ # Lazy import so the parent doesn't pay for it on
+ # every spawn — it's module-level in `_child` but
+ # cheap enough to re-resolve here.
+ from tractor._child import _actor_child_main
+ # XXX, `os.fork()` inherits the parent's entire memory
+ # image, including `tractor.runtime._state._runtime_vars`
+ # (which in the parent encodes "this process IS the root
+ # actor"). A fresh `exec`-based child starts cold; we
+ # replicate that here by explicitly resetting runtime
+ # vars to their fresh-process defaults — otherwise
+ # `Actor.__init__` takes the `is_root_process() == True`
+ # branch, pre-populates `self.enable_modules`, and trips
+ # the `assert not self.enable_modules` gate at the top
+ # of `Actor._from_parent()` on the subsequent parent→
+ # child `SpawnSpec` handshake. (`_state._current_actor`
+ # is unconditionally overwritten by `_trio_main` → no
+ # reset needed for it.)
+ from tractor.runtime._state import (
+ get_runtime_vars,
+ set_runtime_vars,
+ )
+ set_runtime_vars(get_runtime_vars(clear_values=True))
+ _actor_child_main(
+ uid=uid,
+ loglevel=loglevel,
+ parent_addr=parent_addr,
+ infect_asyncio=infect_asyncio,
+ # The child's runtime is trio-native (uses
+ # `_trio_main` + receives `SpawnSpec` over IPC),
+ # but label it with the actual parent-side spawn
+ # mechanism so `Actor.pformat()` / log lines
+ # reflect reality. Downstream runtime gates that
+ # key on `_spawn_method` group `main_thread_forkserver`
+ # alongside `trio`/`subint` where the SpawnSpec
+ # IPC handshake is concerned — see
+ # `runtime._runtime.Actor._from_parent()`.
+ spawn_method='main_thread_forkserver',
+ )
+ return 0
+
+ cancelled_during_spawn: bool = False
+ proc: _ForkedProc | None = None
+ ipc_server: _server.Server = actor_nursery._actor.ipc_server
+
+ try:
+ try:
+ pid: int = await trio.to_thread.run_sync(
+ partial(
+ fork_from_worker_thread,
+ _child_target,
+ thread_name=(
+ f'main-thread-forkserver[{name}]'
+ ),
+ ),
+ abandon_on_cancel=False,
+ )
+ proc = _ForkedProc(pid)
+ log.runtime(
+ f'Forked subactor via main-thread-forkserver\n'
+ f'(>\n'
+ f' |_{proc}\n'
+ )
+
+ event, chan = await ipc_server.wait_for_peer(uid)
+
+ except trio.Cancelled:
+ cancelled_during_spawn = True
+ raise
+
+ assert proc is not None
+
+ portal = Portal(chan)
+ actor_nursery._children[uid] = (
+ subactor,
+ proc,
+ portal,
+ )
+
+ sspec = msgtypes.SpawnSpec(
+ _parent_main_data=subactor._parent_main_data,
+ enable_modules=subactor.enable_modules,
+ reg_addrs=subactor.reg_addrs,
+ bind_addrs=bind_addrs,
+ _runtime_vars=_runtime_vars,
+ )
+ log.runtime(
+ f'Sending spawn spec to forkserver child\n'
+ f'{{}}=> {chan.aid.reprol()!r}\n'
+ f'\n'
+ f'{pretty_struct.pformat(sspec)}\n'
+ )
+ await chan.send(sspec)
+
+ curr_actor: Actor = current_actor()
+ curr_actor._actoruid2nursery[uid] = actor_nursery
+
+ task_status.started(portal)
+
+ with trio.CancelScope(shield=True):
+ await actor_nursery._join_procs.wait()
+
+ async with trio.open_nursery() as nursery:
+ if portal in actor_nursery._cancel_after_result_on_exit:
+ nursery.start_soon(
+ cancel_on_completion,
+ portal,
+ subactor,
+ errors,
+ )
+
+ # reuse `trio_proc`'s soft-kill dance — `proc`
+ # is our `_ForkedProc` shim which implements the
+ # same `.poll()` / `.wait()` / `.kill()` surface
+ # `soft_kill` expects.
+ await soft_kill(
+ proc,
+ _ForkedProc.wait,
+ portal,
+ )
+ nursery.cancel_scope.cancel()
+
+ finally:
+ # Hard reap: SIGKILL + waitpid. Cheap since we have
+ # the real OS pid, unlike `subint_proc` which has to
+ # fuss with `_interpreters.destroy()` races.
+ if proc is not None and proc.poll() is None:
+ log.cancel(
+ f'Hard killing main-thread-forkserver subactor\n'
+ f'>x)\n'
+ f' |_{proc}\n'
+ )
+ with trio.CancelScope(shield=True):
+ proc.kill()
+ await proc.wait()
+
+ if not cancelled_during_spawn:
+ actor_nursery._children.pop(uid, None)
diff --git a/tractor/spawn/_spawn.py b/tractor/spawn/_spawn.py
index 14b1aafb..fbdb6e5c 100644
--- a/tractor/spawn/_spawn.py
+++ b/tractor/spawn/_spawn.py
@@ -77,7 +77,14 @@ SpawnMethodKey = Literal[
# is CPython-legal and works cleanly; forked child runs
# `tractor._child._actor_child_main()` against a trio
# runtime, exactly like `trio_proc` but via fork instead
- # of subproc-exec. See `tractor.spawn._subint_forkserver`.
+ # of subproc-exec. See `tractor.spawn._main_thread_forkserver`.
+ 'main_thread_forkserver',
+ # RESERVED for the future variant-2 subint-isolated-child
+ # runtime — gated on jcrist/msgspec#1026 + PEP 684. Today
+ # this key aliases to `main_thread_forkserver_proc`; once
+ # the upstream unblocks land it'll dispatch to the
+ # subint-hosted-trio impl. See
+ # `tractor.spawn._subint_forkserver`.
'subint_forkserver',
]
_spawn_method: SpawnMethodKey = 'trio'
@@ -131,14 +138,23 @@ def try_set_start_method(
case 'trio':
_ctx = None
- case 'subint' | 'subint_fork' | 'subint_forkserver':
+ case (
+ 'subint'
+ | 'subint_fork'
+ | 'main_thread_forkserver'
+ | 'subint_forkserver'
+ ):
# All subint-family backends need no `mp.context`;
- # all three feature-gate on the py3.14 public
+ # all four feature-gate on the py3.14 public
# `concurrent.interpreters` wrapper (PEP 734). See
# `tractor.spawn._subint` for the detailed
# reasoning. `subint_fork` is blocked at the
# CPython level (raises `NotImplementedError`);
- # `subint_forkserver` is the working workaround.
+ # `main_thread_forkserver` is the working
+ # variant-1 backend; `subint_forkserver` aliases
+ # to it today, reserved for the future variant-2
+ # subint-isolated-child runtime once upstream
+ # msgspec#1026 unblocks.
from ._subint import _has_subints
if not _has_subints:
raise RuntimeError(
@@ -477,7 +493,7 @@ from ._trio import trio_proc
from ._mp import mp_proc
from ._subint import subint_proc
from ._subint_fork import subint_fork_proc
-from ._subint_forkserver import subint_forkserver_proc
+from ._main_thread_forkserver import main_thread_forkserver_proc
# proc spawning backend target map
@@ -492,8 +508,19 @@ _methods: dict[SpawnMethodKey, Callable] = {
# clean `NotImplementedError` with pointer to the analysis,
# rather than an "invalid backend" error.
'subint_fork': subint_fork_proc,
- # WIP — fork-from-non-trio-worker-thread, works on py3.14+
- # (validated via `ai/conc-anal/subint_fork_from_main_thread_smoketest.py`).
- # See `tractor.spawn._subint_forkserver`.
- 'subint_forkserver': subint_forkserver_proc,
+ # Variant-1 (working today): fork from a regular main-interp
+ # worker thread, child runs trio on its own main interp.
+ # Validated by
+ # `ai/conc-anal/subint_fork_from_main_thread_smoketest.py`.
+ # See `tractor.spawn._main_thread_forkserver`.
+ 'main_thread_forkserver': main_thread_forkserver_proc,
+ # Variant-2 (future, reserved): same fork machinery but
+ # child enters a sub-interpreter to host its `trio.run()`
+ # — gated on jcrist/msgspec#1026 unblocking PEP 684
+ # isolated-mode subints. Today aliases to the variant-1
+ # impl so `--spawn-backend=subint_forkserver` keeps
+ # working; flipped to a `NotImplementedError` stub in a
+ # follow-up commit. See
+ # `tractor.spawn._subint_forkserver`.
+ 'subint_forkserver': main_thread_forkserver_proc,
}
diff --git a/tractor/spawn/_subint_forkserver.py b/tractor/spawn/_subint_forkserver.py
index 0d5e234c..113a41bd 100644
--- a/tractor/spawn/_subint_forkserver.py
+++ b/tractor/spawn/_subint_forkserver.py
@@ -15,364 +15,130 @@
# along with this program. If not, see .
'''
-Forkserver-style `os.fork()` primitives for the `subint`-hosted
-actor model.
+Variant-2 (future) "subint forkserver" placeholder — reserved
+for the eventual subint-isolated-child runtime variant.
-Background
-----------
-CPython refuses `os.fork()` from a non-main sub-interpreter:
-`PyOS_AfterFork_Child()` →
-`_PyInterpreterState_DeleteExceptMain()` gates on the calling
-thread's tstate belonging to the main interpreter and aborts
-the forked child otherwise. The full walkthrough (with source
-refs) lives in
-`ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md`.
-
-However `os.fork()` from a regular `threading.Thread` attached
-to the *main* interpreter — i.e. a worker thread that has
-never entered a subint — works cleanly. Empirically validated
-across four scenarios by
-`ai/conc-anal/subint_fork_from_main_thread_smoketest.py` on
-py3.14.
-
-This submodule lifts the validated primitives out of the
-smoke-test and into tractor proper as the
-`subint_forkserver` spawn backend.
-
-Design rationale — why a forkserver, and why in-process
--------------------------------------------------------
-
-There are two design questions worth pinning down up front,
-since the name "subint_forkserver" intentionally evokes the
-stdlib `multiprocessing.forkserver` for comparison:
-
-**(1) Why a forkserver pattern at all, vs. forking directly
-from the trio task?**
-
-`os.fork()` is fundamentally hostile to trio: trio owns
-file descriptors, signal-wakeup-fds, threadpools, and an
-event loop with non-trivial post-fork lifecycle invariants
-(see python-trio/trio#1614 et al.). Forking a trio-running
-thread duplicates all that state into the child, which then
-either needs surgical reset (fragile) or has to immediately
-`exec()` (defeats the point of fork-without-exec). The
-*forkserver* sidesteps this by isolating the `os.fork()`
-call in a worker that has provably never entered trio — so
-the child inherits a clean, trio-free image.
-
-**(2) Why an in-process forkserver, vs. stdlib
-`multiprocessing.forkserver`?**
-
-The stdlib design solves the same "fork from clean state"
-problem by spinning up a **separate sidecar process** at
-first use of `mp.set_start_method('forkserver')`. The parent
-then IPC's each spawn request to that sidecar over a unix
-socket; the sidecar is the process that actually calls
-`os.fork()`. This works but pays for cleanliness with three
-costs:
-
-- **Sidecar lifecycle**: a second long-lived process per
- parent, with its own start/stop/health-check semantics.
-- **IPC overhead per spawn**: every actor-spawn round-trips
- an `mp` request message through a unix socket before any
- child code runs.
-- **State isolation by process boundary**: the sidecar can't
- share parent state at all — every spawn is a "cold" child
- re-importing modules from disk.
-
-The subint architecture lets us keep the forkserver
-**in-process** because subints already provide the
-state-isolation guarantee that `mp.forkserver`'s sidecar
-buys via the process boundary. Concretely: in the envisioned
-arch (currently partially landed — see "Status" below),
-
-- the **main interpreter** stays trio-free and hosts the
- forkserver worker thread that owns `os.fork()`,
-- the parent actor's **`trio.run()`** lives in a separate
- *sub-interpreter* (a different worker thread) — fully
- isolated `sys.modules` / `__main__` / globals from main,
-- when a spawn is requested, the trio task signals the
- forkserver thread (intra-process, ~free) and the
- forkserver forks; the child inherits the parent's full
- in-memory state cheaply.
-
-That collapses the three costs above:
-
-- no sidecar — the forkserver is just another thread,
-- spawn signal is a thread-local event/condition, not IPC,
-- child inherits the warm parent state (loaded modules,
- populated caches, etc.) for free.
-
-The tradeoff we accept in exchange: this design is
-3.14-only (legacy-config subints still share the GIL, so
-the parent's trio loop and the forkserver worker contend
-on it; once PEP 684 isolated-mode + msgspec
-[jcrist/msgspec#1026](https://github.com/jcrist/msgspec/issues/1026)
-land, this constraint relaxes). And the dedicated worker
-threads here are heavier than `trio.to_thread.run_sync`
-calls — see the "TODO" section further down for the audit
-plan once those upstream pieces land.
+> **Status:** placeholder. Today
+> `--spawn-backend=subint_forkserver` aliases to
+> `main_thread_forkserver_proc` (variant 1, see
+> `tractor.spawn._main_thread_forkserver`). A follow-up commit
+> in this PR series flips the alias to a `NotImplementedError`
+> stub reserving the `'subint_forkserver'` key for the literal
+> subint-hosted-child variant once
+> [jcrist/msgspec#1026](https://github.com/jcrist/msgspec/issues/1026)
+> unblocks PEP 684 isolated-mode subints upstream.
Future arch — what subints would buy us
---------------------------------------
-The `subint` in this module's name is **family-naming
-today** — currently the implementation only uses a regular
-worker thread on the main interp; no subinterpreter is
-created anywhere in the parent or child. The naming becomes
-*literal* once jcrist/msgspec#1026 unblocks isolated-mode
-subints (PEP 684 per-interp GIL). Three concrete wins land
-at that point:
+When msgspec#1026 unblocks isolated-mode subints (PEP 684
+per-interp GIL), three concrete wins land — these are the
+reason the `'subint_forkserver'` key is reserved as a
+distinct backend rather than just folded into
+`'main_thread_forkserver'`:
**(1) Cheaper forks (smaller main-interp COW image)**
-Today the parent's main interp carries the full tractor
-stack: trio runtime, msgspec codecs, IPC layer, every
-user module the actor imported. When the forkserver
-worker calls `os.fork()` the child inherits ALL of that
-as COW memory — even though most gets overwritten when
-the child boots its own `trio.run()`.
+Today (variant 1) the parent's main interp carries the full
+tractor stack: trio runtime, msgspec codecs, IPC layer,
+every user module the actor imported. When the forkserver
+worker calls `os.fork()` the child inherits ALL of that as
+COW memory — even though most gets overwritten when the
+child boots its own `trio.run()`.
-Move the parent's `trio.run()` into a subint (its own
-`sys.modules` / `__main__` / globals) and the main
-interp **stays minimal** — just the forkserver-thread
-plumbing + bare CPython. The main interp becomes the
-*literal* forkserver: an intentionally-empty execution
-context whose only job is to call `os.fork()` cleanly.
-Inherited COW image shrinks proportionally.
+Variant 2 moves the parent's `trio.run()` into a subint (its
+own `sys.modules` / `__main__` / globals). The main interp
+**stays minimal** — just the forkserver-thread plumbing +
+bare CPython. The main interp becomes the *literal*
+forkserver: an intentionally-empty execution context whose
+only job is to call `os.fork()` cleanly. Inherited COW image
+shrinks proportionally.
**(2) True parallelism between forkserver and trio
(per-interp GIL)**
-Today the forkserver worker and the trio.run() thread
-share the main GIL — when one runs the other waits.
-Spawn requests briefly stall trio while the worker
-takes the GIL to call `os.fork()`. PEP 684 isolated-
-mode gives each subint its own GIL: forkserver thread
-on main + trio on subint actually run in parallel.
-Spawn latency drops, trio loop doesn't notice the
-fork happening.
+Variant-1 today: the forkserver worker and the trio.run()
+thread share the main GIL — when one runs the other waits.
+Spawn requests briefly stall trio while the worker takes
+the GIL to call `os.fork()`. PEP 684 isolated-mode gives
+each subint its own GIL: forkserver thread on main + trio
+on subint actually run in parallel. Spawn latency drops,
+trio loop doesn't notice the fork happening.
**(3) Multi-actor-per-process (the architectural prize)**
-The bigger payoff and the reason `_subint.py` (the
-in-thread `subint` backend) exists in parallel with
-this module. With per-interp-GIL subints, one process
-can host:
+The bigger payoff and the reason `_subint.py` (the in-thread
+`subint` backend) exists in parallel with this module. With
+per-interp-GIL subints, one process can host:
- main interp: forkserver thread + bookkeeping
- subint A: actor 1's `trio.run()`
- subint B: actor 2's `trio.run()`
- subint C: ...
-`os.fork()` becomes the **last-resort** spawn — used
-only when a new OS process is actually required
-(cgroups, namespaces, security boundary, multi-host
-distribution). Within a single process, subint-per-
-actor is radically cheaper: no fork, no COW, no
-inherited-fd cleanup — just `_interpreters.create()`
-+ `_interpreters.exec()`.
+`os.fork()` becomes the **last-resort** spawn — used only
+when a new OS process is actually required (cgroups,
+namespaces, security boundary, multi-host distribution).
+Within a single process, subint-per-actor is radically
+cheaper: no fork, no COW, no inherited-fd cleanup — just
+`_interpreters.create()` + `_interpreters.exec()`.
-The two backends converge on a coherent story:
-`subint` → in-process spawn (cheap, GIL-isolated),
-`subint_forkserver` → cross-process spawn (when you
-truly need OS-level isolation). The forkserver isn't
-the default mechanism; it's the bridge to a new
-process when subint isolation isn't enough.
+The three backends converge on a coherent story:
-Implementation status — what's wired today
------------------------------------------
+- `subint` → in-process spawn (cheap, GIL-isolated),
+- `main_thread_forkserver` → cross-process spawn today
+ (variant 1, working),
+- `subint_forkserver` → cross-process spawn with
+ isolated-subint child (variant 2, this module, future).
-The "envisioned arch" above is the eventual target; the
-**currently-landed** flow is a partial step toward it:
+What lives here today
+---------------------
-- A dedicated main-interp worker thread owns all `os.fork()`
- calls (never enters a subint). ✓ landed.
-- Parent actor's `trio.run()` lives **on the main interp**
- for now (not a subint yet). The subint-hosted root
- runtime is gated on jcrist/msgspec#1026 (see
- `_subint.py` docstring).
-- Spawn-request signal: trio task `→ to_thread.run_sync`
- to the forkserver-worker thread. ✓ landed.
-- Forked child: runs `_actor_child_main` against a normal
- trio runtime. ✓ landed.
+- `run_subint_in_worker_thread()` — companion primitive to
+ `_main_thread_forkserver.fork_from_worker_thread()`. Creates
+ a fresh `legacy`-config sub-interpreter and drives a given
+ bootstrap code string through `_interpreters.exec()` on a
+ dedicated worker thread; destroys the subint after the
+ thread joins. Used today by the
+ `subint_fork_from_main_thread_smoketest.py` feasibility
+ check; will be wired into the variant-2
+ `subint_forkserver_proc` spawn-coroutine when it lands.
+- (legacy re-exports of fork primitives kept for backward-
+ compatible imports until external consumers migrate to
+ `_main_thread_forkserver`)
-The "subint" in the backend name refers to the *family* —
-this backend ships in the same PR series as `_subint.py`
-(in-thread subint backend) and `_subint_fork.py` (the RFC
-stub for fork-from-non-main-subint, blocked upstream).
-Once the parent's trio also lives in a subint we'll have
-the full envisioned arch; until then the forkserver
-half is independently useful and ship-able.
+What will live here when variant 2 ships
+----------------------------------------
-What survives the fork? — POSIX semantics
------------------------------------------
-
-A natural worry when forking from a parent that's running
-`trio.run()` on another thread: does that trio thread (and
-any other threads in the parent) keep running in the child?
-
-**No.** POSIX `fork()` only preserves the *calling* thread
-in the child. Every other thread in the parent — trio's
-runner thread, any `to_thread` cache threads, anything else
-— is gone the instant `fork()` returns in the child.
-
-Concretely, after the forkserver worker calls `os.fork()`:
-
-| thread | parent | child |
-|-----------------------|-----------|---------------|
-| forkserver worker | continues | sole survivor |
-| `trio.run()` thread | continues | gone |
-| any other thread | continues | gone |
-
-The forkserver worker becomes the new "main" execution
-context in the child; `trio.run()` and every other
-parent thread never executes a single instruction
-post-fork in the child.
-
-This is exactly *why* `os.fork()` is delegated to a
-dedicated worker thread that has provably never entered
-trio: we want that trio-free thread to be the surviving
-one in the child.
-
-That said, dead-thread *artifacts* still cross the fork
-boundary (canonical "fork in a multithreaded program is
-dangerous" — see `man pthread_atfork`). What persists, and
-how we handle each:
-
-- **Inherited file descriptors** — the dead trio thread's
- epoll fd, signal-wakeup-fd, eventfds, sockets, IPC
- pipes, pytest's capture-fds, etc. are all still in the
- child's fd table (kernel-level inheritance). Handled by
- `_close_inherited_fds()` in the child prelude — walks
- `/proc/self/fd` and closes everything except stdio +
- the channel pipe to the forkserver.
-- **Memory image** — trio's internal data structures
- (scheduler, task queues, runner state) sit in COW
- memory but nobody's executing them. Get GC'd /
- overwritten when the child's fresh `trio.run()` boots.
-- **Python thread state** — handled automatically by
- CPython. `PyOS_AfterFork_Child()` calls
- `_PyThreadState_DeleteExceptCurrent()`, so dead
- `PyThreadState` objects are cleaned and
- `threading.enumerate()` returns just the surviving
- thread.
-- **User-level locks (`threading.Lock`)** —
- held-by-dead-thread state is the canonical fork hazard.
- Not an issue in practice for tractor: trio doesn't hold
- cross-thread locks across fork (its synchronization is
- within the trio task system, which doesn't survive in
- either direction). CPython's GIL is auto-reset by the
- fork callback.
-
-FYI: how this dodges the `trio.run()` × `fork()` hazards
---------------------------------------------------------
-
-`os.fork()` is famously hostile to `trio` (see
-python-trio/trio#1614 et al.) because trio owns several
-classes of process-global state that all break across the
-fork boundary in different ways. The forkserver-thread
-design dodges each class explicitly:
-
-- **Signal-wakeup-fd**: trio installs a wakeup-fd via
- `signal.set_wakeup_fd()` on `trio.run()` startup so
- signals can interrupt `epoll_wait`. The child inherits
- this fd, but trio's runner that owns it is gone — so
- any signal delivery in the child writes to a dead
- reader. *Dodge*: the inherited wakeup-fd is closed by
- `_close_inherited_fds()`, then the child's own
- `trio.run()` installs a fresh one.
-- **`epoll`/`kqueue` instance**: trio's I/O backend holds
- one. Inherited as a dead fd; same fix as above.
-- **Threadpool cache threads** (`trio.to_thread`): worker
- threads with cached tstate. Don't exist in the child
- (POSIX); cache state is meaningless garbage that gets
- reset when the child's trio.run() initializes its own
- thread cache.
-- **Cancel scopes / nurseries / open `trio.Process` /
- open sockets**: these are trio-runtime objects, not
- kernel objects. The runtime that owns them is gone in
- the child, so the Python objects exist as zombie data
- in COW memory and get overwritten as the child runs.
- Inherited *kernel* fds those objects wrapped (sockets,
- proc pipes) are caught by `_close_inherited_fds()`.
-- **`atexit` handlers**: trio doesn't register any that
- would mis-fire post-fork; trio's lifetime-stack is
- all `with`-block-scoped and dies with the runner.
-- **Foreign-language I/O state** (libcurl, OpenSSL session
- caches, etc.): out of scope — same hazard as any
- fork-without-exec; users layering those on top of
- tractor need their own pthread_atfork handlers.
-
-Net effect: for the runtime surface tractor controls
-(trio + IPC layer + msgspec), the forkserver-thread
-isolation + `_close_inherited_fds()` cleanup gives the
-forked child a clean trio environment. Everything else
-falls under the standard fork-without-exec disclaimer.
-
-Status
-------
-**EXPERIMENTAL** — wired as the `'subint_forkserver'` entry
-in `tractor.spawn._spawn._methods` and selectable via
-`try_set_start_method('subint_forkserver')` / `--spawn-backend
-=subint_forkserver`. Parent-side spawn, child-side runtime
-bring-up and normal portal-RPC teardown are validated by the
-backend-tier test in
-`tests/spawn/test_subint_forkserver.py::test_subint_forkserver_spawn_basic`.
-
-Still-open work (tracked on tractor #379):
-
-- no cancellation / hard-kill stress coverage yet (counterpart
- to `tests/test_subint_cancellation.py` for the plain
- `subint` backend),
-- `child_sigint='trio'` mode (flag scaffolded below; default
- is `'ipc'`). Originally intended as a manual SIGINT →
- trio-cancel bridge, but investigation showed trio's handler
- IS already correctly installed in the fork-child subactor
- — the orphan-SIGINT hang is actually a separate bug where
- trio's event loop stays wedged in `epoll_wait` despite
- delivery. See
- `ai/conc-anal/subint_forkserver_orphan_sigint_hang_issue.md`
- for the full trace + fix directions. Once that root cause
- is fixed, this flag may end up a no-op / doc-only mode.
-- child-side "subint-hosted root runtime" mode (the second
- half of the envisioned arch — currently the forked child
- runs plain `_trio_main` via `spawn_method='trio'`; the
- subint-hosted variant is still the future step gated on
- msgspec PEP 684 support),
-- thread-hygiene audit of the two `threading.Thread`
- primitives below, gated on the same msgspec unblock
- (see TODO section further down).
-
-TODO — cleanup gated on msgspec PEP 684 support
------------------------------------------------
-Both primitives below allocate a dedicated
-`threading.Thread` rather than using
-`trio.to_thread.run_sync()`. That's a cautious design
-rooted in three distinct-but-entangled issues (GIL
-starvation from legacy-config subints, tstate-recycling
-destroy race on trio cache threads, fork-from-main-tstate
-invariant). Some of those dissolve under PEP 684
-isolated-mode subints; one requires empirical re-testing
-to know.
-
-Full analysis + audit plan for when we can revisit is in
-`ai/conc-anal/subint_forkserver_thread_constraints_on_pep684_issue.md`.
-Intent: file a follow-up GH issue linked to #379 once
-[jcrist/msgspec#1026](https://github.com/jcrist/msgspec/issues/1026)
-unblocks isolated-mode subints in tractor.
+- `subint_forkserver_proc()` — the variant-2 spawn-backend
+ coroutine. Same fork machinery as variant 1, but the
+ fork-child enters a fresh subint (via
+ `run_subint_in_worker_thread`) before booting its
+ `trio.run()`. Net effect: child runtime is GIL-isolated
+ from the parent + any sibling actors in the same process.
+- A stub `subint_forkserver_proc` is added in a follow-up
+ commit that raises `NotImplementedError(...)` pointing at
+ this docstring + jcrist/msgspec#1026 + tractor #379, so
+ `--spawn-backend=subint_forkserver` errors cleanly today
+ rather than silently aliasing variant 1.
See also
--------
-- `tractor.spawn._subint_fork` — the stub for the
- fork-from-subint strategy that DIDN'T work (kept as
- in-tree documentation of the attempt + CPython-level
- block).
-- `ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md`
- — the CPython source walkthrough.
-- `ai/conc-anal/subint_fork_from_main_thread_smoketest.py`
- — the standalone feasibility check (now delegates to
- this module for the primitives it exercises).
+
+- `tractor.spawn._main_thread_forkserver` — variant 1,
+ working today; for the full design rationale, fork-
+ semantics analysis, and trio×fork hazard breakdown.
+- `tractor.spawn._subint` — the in-thread `subint` backend
+ (one process, one actor per subint, no fork).
+- `tractor.spawn._subint_fork` — RFC stub for the
+ fork-from-non-main-subint strategy that is blocked at the
+ CPython level.
+- [#379](https://github.com/goodboy/tractor/issues/379)
+ — subint backend umbrella tracking issue.
+- [jcrist/msgspec#1026](https://github.com/jcrist/msgspec/issues/1026)
+ — upstream blocker for PEP 684 isolated-mode subints.
+- [#450](https://github.com/goodboy/tractor/issues/450) —
+ thread-constraints audit follow-up tied to msgspec#1026.
'''
from __future__ import annotations
@@ -542,254 +308,3 @@ def run_subint_in_worker_thread(
)
if err is not None:
raise err
-
-
-async def subint_forkserver_proc(
- name: str,
- actor_nursery: ActorNursery,
- subactor: Actor,
- errors: dict[tuple[str, str], Exception],
-
- # passed through to actor main
- bind_addrs: list[UnwrappedAddress],
- parent_addr: UnwrappedAddress,
- _runtime_vars: dict[str, Any],
- *,
- infect_asyncio: bool = False,
- task_status: TaskStatus[Portal] = trio.TASK_STATUS_IGNORED,
- proc_kwargs: dict[str, any] = {},
-
-) -> None:
- '''
- Spawn a subactor via `os.fork()` from a non-trio worker
- thread (see `fork_from_worker_thread()`), with the forked
- child running `tractor._child._actor_child_main()` and
- connecting back via tractor's normal IPC handshake.
-
- Supervision model mirrors `trio_proc()` — we manage a
- real OS subprocess, so `Portal.cancel_actor()` +
- `soft_kill()` on graceful teardown and `os.kill(SIGKILL)`
- on hard-reap both apply directly (no
- `_interpreters.destroy()` voodoo needed since the child
- is in its own process).
-
- The only real difference from `trio_proc` is the spawn
- mechanism: fork from a known-clean main-interp worker
- thread instead of `trio.lowlevel.open_process()`.
-
- '''
- if not _has_subints:
- raise RuntimeError(
- f'The {"subint_forkserver"!r} spawn backend '
- f'requires Python 3.14+.\n'
- f'Current runtime: {sys.version}'
- )
-
- # Backend-scoped config pulled from `proc_kwargs`. Using
- # `proc_kwargs` (vs a first-class kwarg on this function)
- # matches how other backends expose per-spawn tuning
- # (`trio_proc` threads it to `trio.lowlevel.open_process`,
- # etc.) and keeps `ActorNursery.start_actor(proc_kwargs=...)`
- # as the single ergonomic entry point.
- child_sigint: ChildSigintMode = proc_kwargs.get(
- 'child_sigint',
- _DEFAULT_CHILD_SIGINT,
- )
- if child_sigint not in ('ipc', 'trio'):
- raise ValueError(
- f'Invalid `child_sigint={child_sigint!r}` for '
- f'`subint_forkserver` backend.\n'
- f'Expected one of: {ChildSigintMode}.'
- )
- if child_sigint == 'trio':
- raise NotImplementedError(
- "`child_sigint='trio'` mode — trio-native SIGINT "
- "plumbing in the fork-child — is scaffolded but "
- "not yet implemented. See the xfail'd "
- "`test_orphaned_subactor_sigint_cleanup_DRAFT` "
- "and the TODO in this module's docstring."
- )
-
- uid: tuple[str, str] = subactor.aid.uid
- loglevel: str | None = subactor.loglevel
-
- # Closure captured into the fork-child's memory image.
- # In the child this is the first post-fork Python code to
- # run, on what was the fork-worker thread in the parent.
- # `child_sigint` is captured here so the impl lands inside
- # this function once the `'trio'` mode is wired up —
- # nothing above this comment needs to change.
- def _child_target() -> int:
- # Dispatch on the captured SIGINT-mode closure var.
- # Today only `'ipc'` is reachable (the `'trio'` branch
- # is fenced off at the backend-entry guard above); the
- # match is in place so the future `'trio'` impl slots
- # in as a plain case arm without restructuring.
- match child_sigint:
- case 'ipc':
- pass # <- current behavior: no child-side
- # SIGINT plumbing; rely on parent
- # `Portal.cancel_actor()` IPC path.
- case 'trio':
- # Unreachable today (see entry-guard above);
- # this stub exists so that lifting the guard
- # is the only change required to enable
- # `'trio'` mode once the SIGINT wakeup-fd
- # bridge is implemented.
- raise NotImplementedError(
- "`child_sigint='trio'` fork-prelude "
- "plumbing not yet wired."
- )
- # Lazy import so the parent doesn't pay for it on
- # every spawn — it's module-level in `_child` but
- # cheap enough to re-resolve here.
- from tractor._child import _actor_child_main
- # XXX, `os.fork()` inherits the parent's entire memory
- # image, including `tractor.runtime._state._runtime_vars`
- # (which in the parent encodes "this process IS the root
- # actor"). A fresh `exec`-based child starts cold; we
- # replicate that here by explicitly resetting runtime
- # vars to their fresh-process defaults — otherwise
- # `Actor.__init__` takes the `is_root_process() == True`
- # branch, pre-populates `self.enable_modules`, and trips
- # the `assert not self.enable_modules` gate at the top
- # of `Actor._from_parent()` on the subsequent parent→
- # child `SpawnSpec` handshake. (`_state._current_actor`
- # is unconditionally overwritten by `_trio_main` → no
- # reset needed for it.)
- from tractor.runtime._state import (
- get_runtime_vars,
- set_runtime_vars,
- )
- set_runtime_vars(get_runtime_vars(clear_values=True))
- # If stdout/stderr point at a PIPE (not a TTY or
- # regular file), we're almost certainly running under
- # pytest's default `--capture=fd` or some other
- # capturing harness. Under high-volume subactor error-
- # log output (e.g. the cancel cascade spew in nested
- # `run_in_actor` failures) the Linux 64KB pipe buffer
- # fills faster than the reader drains → child `write()`
- # blocks → child can't finish teardown → parent's
- # `_ForkedProc.wait` blocks → cascade deadlock.
- # Sever inheritance by redirecting fds 1,2 to
- # `/dev/null` in that specific case. TTY/file stdio
- # is preserved so interactive runs still see subactor
- # output. See `.claude/skills/run-tests/SKILL.md`
- # section 9 and
- # `ai/conc-anal/subint_forkserver_test_cancellation_leak_issue.md`
- # for the post-mortem.
- _actor_child_main(
- uid=uid,
- loglevel=loglevel,
- parent_addr=parent_addr,
- infect_asyncio=infect_asyncio,
- # The child's runtime is trio-native (uses
- # `_trio_main` + receives `SpawnSpec` over IPC),
- # but label it with the actual parent-side spawn
- # mechanism so `Actor.pformat()` / log lines
- # reflect reality. Downstream runtime gates that
- # key on `_spawn_method` group `subint_forkserver`
- # alongside `trio`/`subint` where the SpawnSpec
- # IPC handshake is concerned — see
- # `runtime._runtime.Actor._from_parent()`.
- spawn_method='subint_forkserver',
- )
- return 0
-
- cancelled_during_spawn: bool = False
- proc: _ForkedProc | None = None
- ipc_server: _server.Server = actor_nursery._actor.ipc_server
-
- try:
- try:
- pid: int = await trio.to_thread.run_sync(
- partial(
- fork_from_worker_thread,
- _child_target,
- thread_name=(
- f'subint-forkserver[{name}]'
- ),
- ),
- abandon_on_cancel=False,
- )
- proc = _ForkedProc(pid)
- log.runtime(
- f'Forked subactor via forkserver\n'
- f'(>\n'
- f' |_{proc}\n'
- )
-
- event, chan = await ipc_server.wait_for_peer(uid)
-
- except trio.Cancelled:
- cancelled_during_spawn = True
- raise
-
- assert proc is not None
-
- portal = Portal(chan)
- actor_nursery._children[uid] = (
- subactor,
- proc,
- portal,
- )
-
- sspec = msgtypes.SpawnSpec(
- _parent_main_data=subactor._parent_main_data,
- enable_modules=subactor.enable_modules,
- reg_addrs=subactor.reg_addrs,
- bind_addrs=bind_addrs,
- _runtime_vars=_runtime_vars,
- )
- log.runtime(
- f'Sending spawn spec to forkserver child\n'
- f'{{}}=> {chan.aid.reprol()!r}\n'
- f'\n'
- f'{pretty_struct.pformat(sspec)}\n'
- )
- await chan.send(sspec)
-
- curr_actor: Actor = current_actor()
- curr_actor._actoruid2nursery[uid] = actor_nursery
-
- task_status.started(portal)
-
- with trio.CancelScope(shield=True):
- await actor_nursery._join_procs.wait()
-
- async with trio.open_nursery() as nursery:
- if portal in actor_nursery._cancel_after_result_on_exit:
- nursery.start_soon(
- cancel_on_completion,
- portal,
- subactor,
- errors,
- )
-
- # reuse `trio_proc`'s soft-kill dance — `proc`
- # is our `_ForkedProc` shim which implements the
- # same `.poll()` / `.wait()` / `.kill()` surface
- # `soft_kill` expects.
- await soft_kill(
- proc,
- _ForkedProc.wait,
- portal,
- )
- nursery.cancel_scope.cancel()
-
- finally:
- # Hard reap: SIGKILL + waitpid. Cheap since we have
- # the real OS pid, unlike `subint_proc` which has to
- # fuss with `_interpreters.destroy()` races.
- if proc is not None and proc.poll() is None:
- log.cancel(
- f'Hard killing forkserver subactor\n'
- f'>x)\n'
- f' |_{proc}\n'
- )
- with trio.CancelScope(shield=True):
- proc.kill()
- await proc.wait()
-
- if not cancelled_during_spawn:
- actor_nursery._children.pop(uid, None)