Fix `SIGUSR1` tree-dump ordering in `_stackscope`
Factor the sub-actor relay loop out of `dump_tree_on_sig()` into `_relay_sig_to_subactors()` and chain both dump + relay in a single `run_sync_soon` callback (`_dump_then_relay`) so the parent's task-tree flushes BEFORE any sub receives the signal — fixes a hierarchical-ordering race where subs could dump ahead of the parent in the muxed pty stream. Also, - gate file/tty sink writes behind `write_file` + `write_tty` params on `dump_task_tree()`. - use `actor.aid.uid` instead of deprecated `.uid`. - update `test_shield_pause` expects to match the new sequential parent -> relay-log -> sub ordering. (this commit msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-codesubint_forkserver_backend
parent
61d4525137
commit
e2b790a70d
|
|
@ -49,9 +49,11 @@ async def main(
|
||||||
tractor.open_nursery(
|
tractor.open_nursery(
|
||||||
debug_mode=True,
|
debug_mode=True,
|
||||||
enable_stack_on_sig=True,
|
enable_stack_on_sig=True,
|
||||||
# maybe_enable_greenback=False,
|
loglevel='devx', # XXX REQUIRED log level!
|
||||||
loglevel='devx',
|
|
||||||
enable_transports=[tpt],
|
enable_transports=[tpt],
|
||||||
|
# maybe_enable_greenback=True,
|
||||||
|
# ^TODO? maybe a "smarter" way todo all this is how
|
||||||
|
# `modden` does with a rtv serialized through the osenv?
|
||||||
) as an,
|
) as an,
|
||||||
):
|
):
|
||||||
ptl: tractor.Portal = await an.start_actor(
|
ptl: tractor.Portal = await an.start_actor(
|
||||||
|
|
@ -63,7 +65,9 @@ async def main(
|
||||||
start_n_shield_hang,
|
start_n_shield_hang,
|
||||||
) as (ctx, cpid):
|
) as (ctx, cpid):
|
||||||
|
|
||||||
_, proc, _ = an._children[ptl.chan.uid]
|
_, proc, _ = an._children[
|
||||||
|
ptl.chan.aid.uid
|
||||||
|
]
|
||||||
assert cpid == proc.pid
|
assert cpid == proc.pid
|
||||||
|
|
||||||
print(
|
print(
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ import os
|
||||||
import signal
|
import signal
|
||||||
import time
|
import time
|
||||||
from typing import (
|
from typing import (
|
||||||
|
Callable,
|
||||||
TYPE_CHECKING,
|
TYPE_CHECKING,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -47,7 +48,10 @@ if TYPE_CHECKING:
|
||||||
|
|
||||||
@no_macos
|
@no_macos
|
||||||
def test_shield_pause(
|
def test_shield_pause(
|
||||||
spawn: PexpectSpawner,
|
spawn: Callable[
|
||||||
|
...,
|
||||||
|
PexpectSpawner,
|
||||||
|
],
|
||||||
):
|
):
|
||||||
'''
|
'''
|
||||||
Verify the `tractor.pause()/.post_mortem()` API works inside an
|
Verify the `tractor.pause()/.post_mortem()` API works inside an
|
||||||
|
|
@ -55,8 +59,10 @@ def test_shield_pause(
|
||||||
next checkpoint wherein the cancelled will get raised.
|
next checkpoint wherein the cancelled will get raised.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
child = spawn(
|
child: PexpectSpawner = spawn(
|
||||||
'shield_hang_in_sub'
|
'shield_hang_in_sub',
|
||||||
|
loglevel='devx',
|
||||||
|
# ^XXX REQUIRED for below patt matching!
|
||||||
)
|
)
|
||||||
expect(
|
expect(
|
||||||
child,
|
child,
|
||||||
|
|
@ -86,38 +92,62 @@ def test_shield_pause(
|
||||||
# end-of-tree delimiter
|
# end-of-tree delimiter
|
||||||
"end-of-\('root'",
|
"end-of-\('root'",
|
||||||
)
|
)
|
||||||
assert_before(
|
_before: str = assert_before(
|
||||||
child,
|
child,
|
||||||
[
|
[
|
||||||
# 'Srying to dump `stackscope` tree..',
|
# 'Srying to dump `stackscope` tree..',
|
||||||
# 'Dumping `stackscope` tree for actor',
|
# 'Dumping `stackscope` tree for actor',
|
||||||
"('root'", # uid line
|
"('root'", # uid line
|
||||||
|
|
||||||
# TODO!? this used to show?
|
# TODO!? this in-task-code used to show??
|
||||||
# -[ ] mk reproducable for @oremanj?
|
# -[ ] mk reproducable for @oremanj?
|
||||||
|
# => SOLVED? by our `trio_token.run_sync_soon()`
|
||||||
|
# approach?
|
||||||
#
|
#
|
||||||
# parent block point (non-shielded)
|
# parent block point (non-shielded)
|
||||||
# 'await trio.sleep_forever() # in root',
|
# 'await trio.sleep_forever() # in root',
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
expect(
|
|
||||||
child,
|
|
||||||
# end-of-tree delimiter
|
|
||||||
"end-of-\('hanger'",
|
|
||||||
)
|
|
||||||
assert_before(
|
|
||||||
child,
|
|
||||||
[
|
|
||||||
# relay to the sub should be reported
|
|
||||||
'Relaying `SIGUSR1`[10] to sub-actor',
|
|
||||||
|
|
||||||
"('hanger'", # uid line
|
# NOTE, hierarchical-ordering invariant restored by
|
||||||
|
# `_dump_then_relay` (co-scheduled dump+relay on the
|
||||||
|
# trio loop, see `tractor.devx._stackscope`): the
|
||||||
|
# parent's full task-tree prints BEFORE the 'Relaying
|
||||||
|
# `SIGUSR1`' log msg, which prints BEFORE any sub-
|
||||||
|
# actor receives the signal and dumps its own tree.
|
||||||
|
# So the relay log appears BETWEEN `end-of-('root'`
|
||||||
|
# (above) and `end-of-('hanger'` (below).
|
||||||
|
handle_out_of_order: bool = False
|
||||||
|
|
||||||
|
if (
|
||||||
|
handle_out_of_order
|
||||||
|
and
|
||||||
|
"end-of-('hanger'" in _before
|
||||||
|
):
|
||||||
|
assert "('hanger'" in _before
|
||||||
|
assert 'Relaying `SIGUSR1`[10] to sub-actor' in _before
|
||||||
|
|
||||||
|
else:
|
||||||
|
expect(
|
||||||
|
child,
|
||||||
|
'Relaying `SIGUSR1`\\[10\\] to sub-actor',
|
||||||
|
)
|
||||||
|
expect(
|
||||||
|
child,
|
||||||
|
# end-of-subactor's-tree delimiter
|
||||||
|
"end-of-\('hanger'",
|
||||||
|
)
|
||||||
|
_before: str = assert_before(
|
||||||
|
child,
|
||||||
|
[
|
||||||
|
"('hanger'", # uid line
|
||||||
|
|
||||||
|
# TODO!? SEE ABOVE
|
||||||
|
# hanger LOC where it's shield-halted
|
||||||
|
# 'await trio.sleep_forever() # in subactor',
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
# TODO!? SEE ABOVE
|
|
||||||
# hanger LOC where it's shield-halted
|
|
||||||
# 'await trio.sleep_forever() # in subactor',
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# simulate the user sending a ctl-c to the hanging program.
|
# simulate the user sending a ctl-c to the hanging program.
|
||||||
# this should result in the terminator kicking in since
|
# this should result in the terminator kicking in since
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ disjoint, parallel executing tasks in separate actors.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
# from functools import partial
|
from functools import partial
|
||||||
from threading import (
|
from threading import (
|
||||||
current_thread,
|
current_thread,
|
||||||
Thread,
|
Thread,
|
||||||
|
|
@ -63,7 +63,10 @@ if TYPE_CHECKING:
|
||||||
|
|
||||||
|
|
||||||
@trio.lowlevel.disable_ki_protection
|
@trio.lowlevel.disable_ki_protection
|
||||||
def dump_task_tree() -> None:
|
def dump_task_tree(
|
||||||
|
write_file: bool = False,
|
||||||
|
write_tty: bool = False,
|
||||||
|
) -> None:
|
||||||
'''
|
'''
|
||||||
Do a classic `stackscope.extract()` task-tree dump to console at
|
Do a classic `stackscope.extract()` task-tree dump to console at
|
||||||
`.devx()` level.
|
`.devx()` level.
|
||||||
|
|
@ -121,7 +124,7 @@ def dump_task_tree() -> None:
|
||||||
)
|
)
|
||||||
full_dump: str = (
|
full_dump: str = (
|
||||||
f'Dumping `stackscope` tree for actor\n'
|
f'Dumping `stackscope` tree for actor\n'
|
||||||
f'(>: {actor.uid!r}\n'
|
f'(>: {actor.aid.uid!r}\n'
|
||||||
f' |_{mp.current_process()}\n'
|
f' |_{mp.current_process()}\n'
|
||||||
f' |_{thr}\n'
|
f' |_{thr}\n'
|
||||||
# TODO, use the nest_from_op
|
# TODO, use the nest_from_op
|
||||||
|
|
@ -134,11 +137,11 @@ def dump_task_tree() -> None:
|
||||||
f'capture-bypass tee: {fpath}\n'
|
f'capture-bypass tee: {fpath}\n'
|
||||||
f'(`tail -f {fpath}` to follow across signals)\n'
|
f'(`tail -f {fpath}` to follow across signals)\n'
|
||||||
f'\n'
|
f'\n'
|
||||||
f'------ start-of-{actor.uid!r} ------\n'
|
f'------ start-of-{actor.aid.uid!r} ------\n'
|
||||||
f'|\n'
|
f'|\n'
|
||||||
f'{tree_str}'
|
f'{tree_str}'
|
||||||
f'|\n'
|
f'|\n'
|
||||||
f'|_____ end-of-{actor.uid!r} ______\n'
|
f'|_____ end-of-{actor.aid.uid!r} ______\n'
|
||||||
)
|
)
|
||||||
log.devx(full_dump)
|
log.devx(full_dump)
|
||||||
|
|
||||||
|
|
@ -146,28 +149,30 @@ def dump_task_tree() -> None:
|
||||||
# `--capture=fd` swallows `log.devx()` above; the
|
# `--capture=fd` swallows `log.devx()` above; the
|
||||||
# following two writes guarantee the dump reaches the
|
# following two writes guarantee the dump reaches the
|
||||||
# human even when stdio is captured.
|
# human even when stdio is captured.
|
||||||
try:
|
if write_file:
|
||||||
with open(fpath, 'a') as f:
|
try:
|
||||||
f.write(full_dump + '\n')
|
with open(fpath, 'a') as f:
|
||||||
except OSError:
|
f.write(full_dump + '\n')
|
||||||
log.exception(
|
except OSError:
|
||||||
f'Failed to tee stackscope dump to {fpath!r}'
|
log.exception(
|
||||||
)
|
f'Failed to tee stackscope dump to {fpath!r}'
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
if write_tty:
|
||||||
with open('/dev/tty', 'w') as tty:
|
try:
|
||||||
tty.write(full_dump + '\n')
|
with open('/dev/tty', 'w') as tty:
|
||||||
except OSError:
|
tty.write(full_dump + '\n')
|
||||||
# no controlling tty (CI / nohup / detached) —
|
except OSError:
|
||||||
# silently fall through; the file sink covers it.
|
# no controlling tty (CI / nohup / detached) —
|
||||||
pass
|
# silently fall through; the file sink covers it.
|
||||||
|
pass
|
||||||
|
|
||||||
_handler_lock = RLock()
|
_handler_lock = RLock()
|
||||||
_tree_dumped: bool = False
|
_tree_dumped: bool = False
|
||||||
|
|
||||||
# Captured at `enable_stack_on_sig()` time when running
|
# Captured at `enable_stack_on_sig()` time when running
|
||||||
# inside a trio task. `dump_tree_on_sig` uses this to
|
# inside a trio task. `dump_tree_on_sig` uses this to
|
||||||
# schedule `dump_task_tree` ON the trio loop via
|
# schedule `dump_task_tree()` ON the trio loop via
|
||||||
# `token.run_sync_soon` so stackscope sees a real current
|
# `token.run_sync_soon` so stackscope sees a real current
|
||||||
# task and can recurse into nursery children. Without
|
# task and can recurse into nursery children. Without
|
||||||
# it (signal handler running in a non-trio stack frame),
|
# it (signal handler running in a non-trio stack frame),
|
||||||
|
|
@ -176,13 +181,70 @@ _tree_dumped: bool = False
|
||||||
_trio_token: trio.lowlevel.TrioToken|None = None
|
_trio_token: trio.lowlevel.TrioToken|None = None
|
||||||
|
|
||||||
|
|
||||||
def _safe_dump_task_tree() -> None:
|
def _relay_sig_to_subactors(sig: int) -> None:
|
||||||
'''
|
'''
|
||||||
`run_sync_soon`-friendly wrapper that swallows any
|
Forward `sig` to every live sub-actor's underlying
|
||||||
exception from `dump_task_tree`. Trio prints
|
process so each runs its own `dump_tree_on_sig`
|
||||||
+ crashes on uncaught exceptions in scheduled
|
handler.
|
||||||
callbacks; we'd rather log + keep the test running so
|
|
||||||
the user can re-trigger the dump.
|
Factored out of `dump_tree_on_sig` so the
|
||||||
|
`run_sync_soon`-deferred path can call it AFTER
|
||||||
|
the parent's `dump_task_tree()` completes — see
|
||||||
|
`_dump_then_relay` below for why ordering matters.
|
||||||
|
|
||||||
|
'''
|
||||||
|
an: ActorNursery
|
||||||
|
for an in _state.current_actor()._actoruid2nursery.values():
|
||||||
|
subproc: ProcessType
|
||||||
|
subactor: Actor
|
||||||
|
for (
|
||||||
|
subactor,
|
||||||
|
subproc,
|
||||||
|
_,
|
||||||
|
) in an._children.values():
|
||||||
|
log.warning(
|
||||||
|
f'Relaying `SIGUSR1`[{sig}] to sub-actor\n'
|
||||||
|
f'{subactor}\n'
|
||||||
|
f' |_{subproc}\n'
|
||||||
|
)
|
||||||
|
# bc of course stdlib can't have a std API.. XD
|
||||||
|
match subproc:
|
||||||
|
case trio.Process():
|
||||||
|
subproc.send_signal(sig)
|
||||||
|
|
||||||
|
case mp.Process():
|
||||||
|
subproc._send_signal(sig)
|
||||||
|
|
||||||
|
|
||||||
|
def _dump_then_relay(
|
||||||
|
sig: int|None,
|
||||||
|
) -> None:
|
||||||
|
'''
|
||||||
|
`run_sync_soon`-friendly callback: dump THIS actor's
|
||||||
|
task tree first, THEN relay `sig` to subactors so
|
||||||
|
their dumps can't race ahead of ours.
|
||||||
|
|
||||||
|
Hierarchical-ordering preservation: the legacy
|
||||||
|
direct-call path (pre-`run_sync_soon`) ran the dump
|
||||||
|
synchronously inside the signal handler, then
|
||||||
|
relayed — guaranteeing parent-output-before-child
|
||||||
|
in the multiplexed pty stream. The pure-deferred
|
||||||
|
path (schedule dump only, relay sync from handler)
|
||||||
|
inverts that: relay fires while the parent's
|
||||||
|
dump is still queued, subs receive SIGUSR1 and
|
||||||
|
schedule their own dumps, all dumps then race in
|
||||||
|
arbitrary order through stdio.
|
||||||
|
|
||||||
|
Co-scheduling fixes that: by chaining relay AFTER
|
||||||
|
`dump_task_tree()` inside the same trio-loop
|
||||||
|
callback, parent output flushes before any sub
|
||||||
|
receives the signal, restoring the
|
||||||
|
parent → relay-log → sub-dump ordering humans
|
||||||
|
expect when reading hang-investigation traces.
|
||||||
|
|
||||||
|
Trio prints + crashes on uncaught exceptions in
|
||||||
|
scheduled callbacks; we swallow + log so the test
|
||||||
|
keeps running and the user can re-trigger.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
try:
|
try:
|
||||||
|
|
@ -193,6 +255,17 @@ def _safe_dump_task_tree() -> None:
|
||||||
'`run_sync_soon`); continuing.\n'
|
'`run_sync_soon`); continuing.\n'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if sig is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
_relay_sig_to_subactors(sig)
|
||||||
|
except BaseException:
|
||||||
|
log.exception(
|
||||||
|
f'`_relay_sig_to_subactors({sig})` raised '
|
||||||
|
f'(scheduled via `run_sync_soon`); continuing.\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def dump_tree_on_sig(
|
def dump_tree_on_sig(
|
||||||
sig: int,
|
sig: int,
|
||||||
|
|
@ -223,8 +296,23 @@ def dump_tree_on_sig(
|
||||||
# only the `<init>` task. Falls back to a direct
|
# only the `<init>` task. Falls back to a direct
|
||||||
# call when no token was captured (e.g. signal
|
# call when no token was captured (e.g. signal
|
||||||
# delivered outside a trio.run).
|
# delivered outside a trio.run).
|
||||||
|
#
|
||||||
|
# Co-schedule the relay-to-subs in the SAME
|
||||||
|
# callback so parent's dump prints BEFORE any
|
||||||
|
# sub receives SIGUSR1 — see `_dump_then_relay`
|
||||||
|
# for the full hierarchical-ordering rationale.
|
||||||
if _trio_token is not None:
|
if _trio_token is not None:
|
||||||
_trio_token.run_sync_soon(_safe_dump_task_tree)
|
_trio_token.run_sync_soon(
|
||||||
|
partial(
|
||||||
|
_dump_then_relay,
|
||||||
|
sig=sig if relay_to_subs else None,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# NOTE, `_dump_then_relay` handles the relay
|
||||||
|
# internally; bail out before the
|
||||||
|
# direct-path relay below.
|
||||||
|
return
|
||||||
|
|
||||||
else:
|
else:
|
||||||
dump_task_tree()
|
dump_task_tree()
|
||||||
|
|
||||||
|
|
@ -246,27 +334,15 @@ def dump_tree_on_sig(
|
||||||
# 'Supposedly we dumped just fine..?'
|
# 'Supposedly we dumped just fine..?'
|
||||||
# )
|
# )
|
||||||
|
|
||||||
|
# Direct-path relay (only reached when `_trio_token`
|
||||||
|
# was None — the run_sync_soon path returned above
|
||||||
|
# to let `_dump_then_relay` handle the relay
|
||||||
|
# in-callback).
|
||||||
if not relay_to_subs:
|
if not relay_to_subs:
|
||||||
|
log.devx(f'Skipping {sig!r} relay to subactors..')
|
||||||
return
|
return
|
||||||
|
|
||||||
an: ActorNursery
|
_relay_sig_to_subactors(sig)
|
||||||
for an in _state.current_actor()._actoruid2nursery.values():
|
|
||||||
subproc: ProcessType
|
|
||||||
subactor: Actor
|
|
||||||
for subactor, subproc, _ in an._children.values():
|
|
||||||
log.warning(
|
|
||||||
f'Relaying `SIGUSR1`[{sig}] to sub-actor\n'
|
|
||||||
f'{subactor}\n'
|
|
||||||
f' |_{subproc}\n'
|
|
||||||
)
|
|
||||||
|
|
||||||
# bc of course stdlib can't have a std API.. XD
|
|
||||||
match subproc:
|
|
||||||
case trio.Process():
|
|
||||||
subproc.send_signal(sig)
|
|
||||||
|
|
||||||
case mp.Process():
|
|
||||||
subproc._send_signal(sig)
|
|
||||||
|
|
||||||
|
|
||||||
def enable_stack_on_sig(
|
def enable_stack_on_sig(
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue