Officially test proto-ed `stackscope` integration

By re-purposing our `pexpect`-based console matching with a new
`debugging/shield_hang_in_sub.py` example, this tests a few "hanging
actor" conditions more formally:

- that despite a hanging actor's task we can dump
  a `stackscope.extract()` tree on relay of `SIGUSR1`.
- the actor tree will terminate despite a shielded forever-sleep by our
  "T-800" zombie reaper machinery activating and hard killing the
  underlying subprocess.

Some test deats:
- simulates the expect actions of a real user by manually using
  `os.kill()` to send both signals to the actor-tree program.
- `pexpect`-matches against `log.devx()` emissions under normal
  `debug_mode == True` usage.
- ensure we get the actual "T-800 deployed" `log.error()` msg and
  that the actor tree eventually terminates!

Surrounding (re-org/impl/test-suite) changes:
- allow disabling usage via a `maybe_enable_greenback: bool` to
  `open_root_actor()` but enable by def.
- pretty up the actual `.devx()` content from `.devx._stackscope`
  including be extra pedantic about the conc-primitives for each signal
  event.
- try to avoid double handles of `SIGUSR1` even though it seems the
  original (what i thought was a) problem was actually just double
  logging in the handler..
  |_ avoid double applying the handler func via `signal.signal()`,
  |_ use a global to avoid double handle func calls and,
  |_ a `threading.RLock` around handling.
- move common fixtures and helper routines from `test_debugger` to
  `tests/devx/conftest.py` and import them for use in both test mods.
aio_abandons
Tyler Goodlet 2024-07-10 18:17:42 -04:00
parent d216068713
commit a628eabb30
7 changed files with 523 additions and 203 deletions

View File

@ -0,0 +1,81 @@
'''
Verify we can dump a `stackscope` tree on a hang.
'''
import os
import signal
import trio
import tractor
@tractor.context
async def start_n_shield_hang(
ctx: tractor.Context,
):
# actor: tractor.Actor = tractor.current_actor()
# sync to parent-side task
await ctx.started(os.getpid())
print('Entering shield sleep..')
with trio.CancelScope(shield=True):
await trio.sleep_forever() # in subactor
# XXX NOTE ^^^ since this shields, we expect
# the zombie reaper (aka T800) to engage on
# SIGINT from the user and eventually hard-kill
# this subprocess!
async def main(
from_test: bool = False,
) -> None:
async with (
tractor.open_nursery(
debug_mode=True,
enable_stack_on_sig=True,
# maybe_enable_greenback=False,
loglevel='devx',
) as an,
):
ptl: tractor.Portal = await an.start_actor(
'hanger',
enable_modules=[__name__],
debug_mode=True,
)
async with ptl.open_context(
start_n_shield_hang,
) as (ctx, cpid):
_, proc, _ = an._children[ptl.chan.uid]
assert cpid == proc.pid
print(
'Yo my child hanging..?\n'
'Sending SIGUSR1 to see a tree-trace!\n'
)
# XXX simulate the wrapping test's "user actions"
# (i.e. if a human didn't run this manually but wants to
# know what they should do to reproduce test behaviour)
if from_test:
os.kill(
cpid,
signal.SIGUSR1,
)
# simulate user cancelling program
await trio.sleep(0.5)
os.kill(
os.getpid(),
signal.SIGINT,
)
else:
# actually let user send the ctl-c
await trio.sleep_forever() # in root
if __name__ == '__main__':
trio.run(main)

View File

View File

@ -0,0 +1,167 @@
'''
`tractor.devx.*` tooling sub-pkg test space.
'''
from typing import (
Callable,
)
import pytest
from pexpect.exceptions import (
TIMEOUT,
)
from tractor._testing import (
mk_cmd,
)
@pytest.fixture
def spawn(
start_method,
testdir: pytest.Testdir,
reg_addr: tuple[str, int],
) -> Callable[[str], None]:
'''
Use the `pexpect` module shipped via `testdir.spawn()` to
run an `./examples/..` script by name.
'''
if start_method != 'trio':
pytest.skip(
'`pexpect` based tests only supported on `trio` backend'
)
def _spawn(
cmd: str,
**mkcmd_kwargs,
):
return testdir.spawn(
cmd=mk_cmd(
cmd,
**mkcmd_kwargs,
),
expect_timeout=3,
)
# such that test-dep can pass input script name.
return _spawn
@pytest.fixture(
params=[False, True],
ids='ctl-c={}'.format,
)
def ctlc(
request,
ci_env: bool,
) -> bool:
use_ctlc = request.param
node = request.node
markers = node.own_markers
for mark in markers:
if mark.name == 'has_nested_actors':
pytest.skip(
f'Test {node} has nested actors and fails with Ctrl-C.\n'
f'The test can sometimes run fine locally but until'
' we solve' 'this issue this CI test will be xfail:\n'
'https://github.com/goodboy/tractor/issues/320'
)
if use_ctlc:
# XXX: disable pygments highlighting for auto-tests
# since some envs (like actions CI) will struggle
# the the added color-char encoding..
from tractor.devx._debug import TractorConfig
TractorConfig.use_pygements = False
yield use_ctlc
def expect(
child,
# normally a `pdb` prompt by default
patt: str,
**kwargs,
) -> None:
'''
Expect wrapper that prints last seen console
data before failing.
'''
try:
child.expect(
patt,
**kwargs,
)
except TIMEOUT:
before = str(child.before.decode())
print(before)
raise
def in_prompt_msg(
prompt: str,
parts: list[str],
pause_on_false: bool = False,
err_on_false: bool = False,
print_prompt_on_false: bool = True,
) -> bool:
'''
Predicate check if (the prompt's) std-streams output has all
`str`-parts in it.
Can be used in test asserts for bulk matching expected
log/REPL output for a given `pdb` interact point.
'''
__tracebackhide__: bool = False
for part in parts:
if part not in prompt:
if pause_on_false:
import pdbp
pdbp.set_trace()
if print_prompt_on_false:
print(prompt)
if err_on_false:
raise ValueError(
f'Could not find pattern: {part!r} in `before` output?'
)
return False
return True
# TODO: todo support terminal color-chars stripping so we can match
# against call stack frame output from the the 'll' command the like!
# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789
def assert_before(
child,
patts: list[str],
**kwargs,
) -> None:
__tracebackhide__: bool = False
# as in before the prompt end
before: str = str(child.before.decode())
assert in_prompt_msg(
prompt=before,
parts=patts,
# since this is an "assert" helper ;)
err_on_false=True,
**kwargs
)

View File

@ -16,7 +16,6 @@ import platform
import time
import pytest
import pexpect
from pexpect.exceptions import (
TIMEOUT,
EOF,
@ -27,12 +26,14 @@ from tractor.devx._debug import (
_crash_msg,
_repl_fail_msg,
)
from tractor._testing import (
mk_cmd,
)
from conftest import (
_ci_env,
)
from .conftest import (
expect,
in_prompt_msg,
assert_before,
)
# TODO: The next great debugger audit could be done by you!
# - recurrent entry to breakpoint() from single actor *after* and an
@ -69,154 +70,9 @@ has_nested_actors = pytest.mark.has_nested_actors
# )
@pytest.fixture
def spawn(
start_method,
testdir,
reg_addr,
) -> 'pexpect.spawn':
'''
Use the `pexpect` module shipped via `testdir.spawn()` to
run an `./examples/..` script by name.
'''
if start_method != 'trio':
pytest.skip(
'`pexpect` based tests only supported on `trio` backend'
)
def _spawn(
cmd: str,
**mkcmd_kwargs,
):
return testdir.spawn(
cmd=mk_cmd(
cmd,
**mkcmd_kwargs,
),
expect_timeout=3,
)
# such that test-dep can pass input script name.
return _spawn
PROMPT = r"\(Pdb\+\)"
def expect(
child,
# prompt by default
patt: str = PROMPT,
**kwargs,
) -> None:
'''
Expect wrapper that prints last seen console
data before failing.
'''
try:
child.expect(
patt,
**kwargs,
)
except TIMEOUT:
before = str(child.before.decode())
print(before)
raise
def in_prompt_msg(
prompt: str,
parts: list[str],
pause_on_false: bool = False,
print_prompt_on_false: bool = True,
) -> bool:
'''
Predicate check if (the prompt's) std-streams output has all
`str`-parts in it.
Can be used in test asserts for bulk matching expected
log/REPL output for a given `pdb` interact point.
'''
__tracebackhide__: bool = False
for part in parts:
if part not in prompt:
if pause_on_false:
import pdbp
pdbp.set_trace()
if print_prompt_on_false:
print(prompt)
return False
return True
# TODO: todo support terminal color-chars stripping so we can match
# against call stack frame output from the the 'll' command the like!
# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789
def assert_before(
child,
patts: list[str],
**kwargs,
) -> None:
__tracebackhide__: bool = False
# as in before the prompt end
before: str = str(child.before.decode())
assert in_prompt_msg(
prompt=before,
parts=patts,
**kwargs
)
@pytest.fixture(
params=[False, True],
ids='ctl-c={}'.format,
)
def ctlc(
request,
ci_env: bool,
) -> bool:
use_ctlc = request.param
node = request.node
markers = node.own_markers
for mark in markers:
if mark.name == 'has_nested_actors':
pytest.skip(
f'Test {node} has nested actors and fails with Ctrl-C.\n'
f'The test can sometimes run fine locally but until'
' we solve' 'this issue this CI test will be xfail:\n'
'https://github.com/goodboy/tractor/issues/320'
)
if use_ctlc:
# XXX: disable pygments highlighting for auto-tests
# since some envs (like actions CI) will struggle
# the the added color-char encoding..
from tractor.devx._debug import TractorConfig
TractorConfig.use_pygements = False
yield use_ctlc
@pytest.mark.parametrize(
'user_in_out',
[
@ -281,7 +137,7 @@ def test_root_actor_bp(spawn, user_in_out):
child.expect('\r\n')
# process should exit
child.expect(pexpect.EOF)
child.expect(EOF)
if expect_err_str is None:
assert 'Error' not in str(child.before)
@ -365,7 +221,7 @@ def test_root_actor_bp_forever(
# quit out of the loop
child.sendline('q')
child.expect(pexpect.EOF)
child.expect(EOF)
@pytest.mark.parametrize(
@ -430,7 +286,7 @@ def test_subactor_error(
child.expect('\r\n')
# process should exit
child.expect(pexpect.EOF)
child.expect(EOF)
def test_subactor_breakpoint(
@ -493,7 +349,7 @@ def test_subactor_breakpoint(
child.sendline('c')
# process should exit
child.expect(pexpect.EOF)
child.expect(EOF)
before = str(child.before.decode())
assert in_prompt_msg(
@ -636,7 +492,7 @@ def test_multi_subactors(
# process should exit
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
# repeat of previous multierror for final output
assert_before(child, [
@ -776,7 +632,7 @@ def test_multi_daemon_subactors(
)
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
@has_nested_actors
@ -852,7 +708,7 @@ def test_multi_subactors_root_errors(
])
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
assert_before(child, [
# "Attaching to pdb in crashed actor: ('root'",
@ -982,7 +838,7 @@ def test_root_nursery_cancels_before_child_releases_tty_lock(
for i in range(3):
try:
child.expect(pexpect.EOF, timeout=0.5)
child.expect(EOF, timeout=0.5)
break
except TIMEOUT:
child.sendline('c')
@ -1024,7 +880,7 @@ def test_root_cancels_child_context_during_startup(
do_ctlc(child)
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
def test_different_debug_mode_per_actor(
@ -1045,7 +901,7 @@ def test_different_debug_mode_per_actor(
do_ctlc(child)
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
before = str(child.before.decode())
@ -1196,7 +1052,7 @@ def test_pause_from_sync(
)
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
def test_post_mortem_api(
@ -1301,7 +1157,7 @@ def test_post_mortem_api(
# )
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
def test_shield_pause(
@ -1376,7 +1232,7 @@ def test_shield_pause(
]
)
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
# TODO: better error for "non-ideal" usage from the root actor.

View File

@ -0,0 +1,120 @@
'''
That "native" runtime-hackin toolset better be dang useful!
Verify the funtion of a variety of "developer-experience" tools we
offer from the `.devx` sub-pkg:
- use of the lovely `stackscope` for dumping actor `trio`-task trees
during operation and hangs.
TODO:
- demonstration of `CallerInfo` call stack frame filtering such that
for logging and REPL purposes a user sees exactly the layers needed
when debugging a problem inside the stack vs. in their app.
'''
import os
import signal
from .conftest import (
expect,
assert_before,
# in_prompt_msg,
)
def test_shield_pause(
spawn,
):
'''
Verify the `tractor.pause()/.post_mortem()` API works inside an
already cancelled `trio.CancelScope` and that you can step to the
next checkpoint wherein the cancelled will get raised.
'''
child = spawn(
'shield_hang_in_sub'
)
expect(
child,
'Yo my child hanging..?',
)
assert_before(
child,
[
'Entering shield sleep..',
'Enabling trace-trees on `SIGUSR1` since `stackscope` is installed @',
]
)
print(
'Sending SIGUSR1 to see a tree-trace!',
)
os.kill(
child.pid,
signal.SIGUSR1,
)
expect(
child,
# end-of-tree delimiter
"------ \('root', ",
)
assert_before(
child,
[
'Trying to dump `stackscope` tree..',
'Dumping `stackscope` tree for actor',
"('root'", # uid line
# parent block point (non-shielded)
'await trio.sleep_forever() # in root',
]
)
# expect(
# child,
# # relay to the sub should be reported
# 'Relaying `SIGUSR1`[10] to sub-actor',
# )
expect(
child,
# end-of-tree delimiter
"------ \('hanger', ",
)
assert_before(
child,
[
# relay to the sub should be reported
'Relaying `SIGUSR1`[10] to sub-actor',
"('hanger'", # uid line
# hanger LOC where it's shield-halted
'await trio.sleep_forever() # in subactor',
]
)
# breakpoint()
# simulate the user sending a ctl-c to the hanging program.
# this should result in the terminator kicking in since
# the sub is shield blocking and can't respond to SIGINT.
os.kill(
child.pid,
signal.SIGINT,
)
expect(
child,
'Shutting down actor runtime',
timeout=6,
)
assert_before(
child,
[
'raise KeyboardInterrupt',
# 'Shutting down actor runtime',
'#T-800 deployed to collect zombie B0',
"'--uid', \"('hanger',",
]
)

View File

@ -80,7 +80,7 @@ async def open_root_actor(
# enables the multi-process debugger support
debug_mode: bool = False,
maybe_enable_greenback: bool = False, # `.pause_from_sync()/breakpoint()` support
maybe_enable_greenback: bool = True, # `.pause_from_sync()/breakpoint()` support
enable_stack_on_sig: bool = False,
# internal logging
@ -233,14 +233,8 @@ async def open_root_actor(
and
enable_stack_on_sig
):
try:
logger.info('Enabling `stackscope` traces on SIGUSR1')
from .devx import enable_stack_on_sig
enable_stack_on_sig()
except ImportError:
logger.warning(
'`stackscope` not installed for use in debug mode!'
)
from .devx._stackscope import enable_stack_on_sig
enable_stack_on_sig()
# closed into below ping task-func
ponged_addrs: list[tuple[str, int]] = []

View File

@ -24,13 +24,24 @@ disjoint, parallel executing tasks in separate actors.
'''
from __future__ import annotations
# from functools import partial
from threading import (
current_thread,
Thread,
RLock,
)
import multiprocessing as mp
from signal import (
signal,
getsignal,
SIGUSR1,
)
import traceback
from typing import TYPE_CHECKING
# import traceback
from types import ModuleType
from typing import (
Callable,
TYPE_CHECKING,
)
import trio
from tractor import (
@ -51,26 +62,45 @@ if TYPE_CHECKING:
@trio.lowlevel.disable_ki_protection
def dump_task_tree() -> None:
import stackscope
from tractor.log import get_console_log
'''
Do a classic `stackscope.extract()` task-tree dump to console at
`.devx()` level.
'''
import stackscope
tree_str: str = str(
stackscope.extract(
trio.lowlevel.current_root_task(),
recurse_child_tasks=True
)
)
log = get_console_log(
name=__name__,
level='cancel',
)
actor: Actor = _state.current_actor()
thr: Thread = current_thread()
log.devx(
f'Dumping `stackscope` tree for actor\n'
f'{actor.name}: {actor}\n'
f' |_{mp.current_process()}\n\n'
f'{actor.uid}:\n'
f'|_{mp.current_process()}\n'
f' |_{thr}\n'
f' |_{actor}\n\n'
# start-of-trace-tree delimiter (mostly for testing)
'------ - ------\n'
'\n'
+
f'{tree_str}\n'
+
# end-of-trace-tree delimiter (mostly for testing)
f'\n'
f'------ {actor.uid!r} ------\n'
)
# TODO: can remove this right?
# -[ ] was original code from author
#
# print(
# 'DUMPING FROM PRINT\n'
# +
# content
# )
# import logging
# try:
# with open("/dev/tty", "w") as tty:
@ -80,58 +110,130 @@ def dump_task_tree() -> None:
# "task_tree"
# ).exception("Error printing task tree")
_handler_lock = RLock()
_tree_dumped: bool = False
def signal_handler(
def dump_tree_on_sig(
sig: int,
frame: object,
relay_to_subs: bool = True,
) -> None:
try:
trio.lowlevel.current_trio_token(
).run_sync_soon(dump_task_tree)
except RuntimeError:
# not in async context -- print a normal traceback
traceback.print_stack()
global _tree_dumped, _handler_lock
with _handler_lock:
if _tree_dumped:
log.warning(
'Already dumped for this actor...??'
)
return
_tree_dumped = True
# actor: Actor = _state.current_actor()
log.devx(
'Trying to dump `stackscope` tree..\n'
)
try:
dump_task_tree()
# await actor._service_n.start_soon(
# partial(
# trio.to_thread.run_sync,
# dump_task_tree,
# )
# )
# trio.lowlevel.current_trio_token().run_sync_soon(
# dump_task_tree
# )
except RuntimeError:
log.exception(
'Failed to dump `stackscope` tree..\n'
)
# not in async context -- print a normal traceback
# traceback.print_stack()
raise
except BaseException:
log.exception(
'Failed to dump `stackscope` tree..\n'
)
raise
log.devx(
'Supposedly we dumped just fine..?'
)
if not relay_to_subs:
return
an: ActorNursery
for an in _state.current_actor()._actoruid2nursery.values():
subproc: ProcessType
subactor: Actor
for subactor, subproc, _ in an._children.values():
log.devx(
log.warning(
f'Relaying `SIGUSR1`[{sig}] to sub-actor\n'
f'{subactor}\n'
f' |_{subproc}\n'
)
if isinstance(subproc, trio.Process):
subproc.send_signal(sig)
# bc of course stdlib can't have a std API.. XD
match subproc:
case trio.Process():
subproc.send_signal(sig)
elif isinstance(subproc, mp.Process):
subproc._send_signal(sig)
case mp.Process():
subproc._send_signal(sig)
def enable_stack_on_sig(
sig: int = SIGUSR1
) -> None:
sig: int = SIGUSR1,
) -> ModuleType:
'''
Enable `stackscope` tracing on reception of a signal; by
default this is SIGUSR1.
HOT TIP: a task/ctx-tree dump can be triggered from a shell with
fancy cmds.
For ex. from `bash` using `pgrep` and cmd-sustitution
(https://www.gnu.org/software/bash/manual/bash.html#Command-Substitution)
you could use:
>> kill -SIGUSR1 $(pgrep -f '<cmd>')
Or with with `xonsh` (which has diff capture-from-subproc syntax)
>> kill -SIGUSR1 @$(pgrep -f '<cmd>')
'''
try:
import stackscope
except ImportError:
log.warning(
'`stackscope` not installed for use in debug mode!'
)
return None
handler: Callable|int = getsignal(sig)
if handler is dump_tree_on_sig:
log.devx(
'A `SIGUSR1` handler already exists?\n'
f'|_ {handler!r}\n'
)
return
signal(
sig,
signal_handler,
dump_tree_on_sig,
)
# NOTE: not the above can be triggered from
# a (xonsh) shell using:
# kill -SIGUSR1 @$(pgrep -f '<cmd>')
#
# for example if you were looking to trace a `pytest` run
# kill -SIGUSR1 @$(pgrep -f 'pytest')
log.devx(
'Enabling trace-trees on `SIGUSR1` '
'since `stackscope` is installed @ \n'
f'{stackscope!r}\n\n'
f'With `SIGUSR1` handler\n'
f'|_{dump_tree_on_sig}\n'
)
return stackscope