From 547b957bbf4b3346442352f54ed51a20ff29f79a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 10 Jul 2024 18:17:42 -0400 Subject: [PATCH] Officially test proto-ed `stackscope` integration By re-purposing our `pexpect`-based console matching with a new `debugging/shield_hang_in_sub.py` example, this tests a few "hanging actor" conditions more formally: - that despite a hanging actor's task we can dump a `stackscope.extract()` tree on relay of `SIGUSR1`. - the actor tree will terminate despite a shielded forever-sleep by our "T-800" zombie reaper machinery activating and hard killing the underlying subprocess. Some test deats: - simulates the expect actions of a real user by manually using `os.kill()` to send both signals to the actor-tree program. - `pexpect`-matches against `log.devx()` emissions under normal `debug_mode == True` usage. - ensure we get the actual "T-800 deployed" `log.error()` msg and that the actor tree eventually terminates! Surrounding (re-org/impl/test-suite) changes: - allow disabling usage via a `maybe_enable_greenback: bool` to `open_root_actor()` but enable by def. - pretty up the actual `.devx()` content from `.devx._stackscope` including be extra pedantic about the conc-primitives for each signal event. - try to avoid double handles of `SIGUSR1` even though it seems the original (what i thought was a) problem was actually just double logging in the handler.. |_ avoid double applying the handler func via `signal.signal()`, |_ use a global to avoid double handle func calls and, |_ a `threading.RLock` around handling. - move common fixtures and helper routines from `test_debugger` to `tests/devx/conftest.py` and import them for use in both test mods. --- tests/devx/__init__.py | 0 tests/devx/conftest.py | 167 +++++++++++++++++++++++++++++++++ tests/devx/test_debugger.py | 180 ++++-------------------------------- tests/devx/test_tooling.py | 120 ++++++++++++++++++++++++ tractor/_root.py | 12 +-- tractor/devx/_stackscope.py | 166 ++++++++++++++++++++++++++------- 6 files changed, 442 insertions(+), 203 deletions(-) create mode 100644 tests/devx/__init__.py create mode 100644 tests/devx/conftest.py create mode 100644 tests/devx/test_tooling.py diff --git a/tests/devx/__init__.py b/tests/devx/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/devx/conftest.py b/tests/devx/conftest.py new file mode 100644 index 0000000..b739569 --- /dev/null +++ b/tests/devx/conftest.py @@ -0,0 +1,167 @@ +''' +`tractor.devx.*` tooling sub-pkg test space. + +''' +from typing import ( + Callable, +) + +import pytest +from pexpect.exceptions import ( + TIMEOUT, +) +from tractor._testing import ( + mk_cmd, +) + + +@pytest.fixture +def spawn( + start_method, + testdir: pytest.Testdir, + reg_addr: tuple[str, int], + +) -> Callable[[str], None]: + ''' + Use the `pexpect` module shipped via `testdir.spawn()` to + run an `./examples/..` script by name. + + ''' + if start_method != 'trio': + pytest.skip( + '`pexpect` based tests only supported on `trio` backend' + ) + + def _spawn( + cmd: str, + **mkcmd_kwargs, + ): + return testdir.spawn( + cmd=mk_cmd( + cmd, + **mkcmd_kwargs, + ), + expect_timeout=3, + ) + + # such that test-dep can pass input script name. + return _spawn + + +@pytest.fixture( + params=[False, True], + ids='ctl-c={}'.format, +) +def ctlc( + request, + ci_env: bool, + +) -> bool: + + use_ctlc = request.param + + node = request.node + markers = node.own_markers + for mark in markers: + if mark.name == 'has_nested_actors': + pytest.skip( + f'Test {node} has nested actors and fails with Ctrl-C.\n' + f'The test can sometimes run fine locally but until' + ' we solve' 'this issue this CI test will be xfail:\n' + 'https://github.com/goodboy/tractor/issues/320' + ) + + if use_ctlc: + # XXX: disable pygments highlighting for auto-tests + # since some envs (like actions CI) will struggle + # the the added color-char encoding.. + from tractor.devx._debug import TractorConfig + TractorConfig.use_pygements = False + + yield use_ctlc + + +def expect( + child, + + # normally a `pdb` prompt by default + patt: str, + + **kwargs, + +) -> None: + ''' + Expect wrapper that prints last seen console + data before failing. + + ''' + try: + child.expect( + patt, + **kwargs, + ) + except TIMEOUT: + before = str(child.before.decode()) + print(before) + raise + + +def in_prompt_msg( + prompt: str, + parts: list[str], + + pause_on_false: bool = False, + err_on_false: bool = False, + print_prompt_on_false: bool = True, + +) -> bool: + ''' + Predicate check if (the prompt's) std-streams output has all + `str`-parts in it. + + Can be used in test asserts for bulk matching expected + log/REPL output for a given `pdb` interact point. + + ''' + __tracebackhide__: bool = False + + for part in parts: + if part not in prompt: + if pause_on_false: + import pdbp + pdbp.set_trace() + + if print_prompt_on_false: + print(prompt) + + if err_on_false: + raise ValueError( + f'Could not find pattern: {part!r} in `before` output?' + ) + return False + + return True + + +# TODO: todo support terminal color-chars stripping so we can match +# against call stack frame output from the the 'll' command the like! +# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789 +def assert_before( + child, + patts: list[str], + + **kwargs, + +) -> None: + __tracebackhide__: bool = False + + # as in before the prompt end + before: str = str(child.before.decode()) + assert in_prompt_msg( + prompt=before, + parts=patts, + + # since this is an "assert" helper ;) + err_on_false=True, + **kwargs + ) diff --git a/tests/devx/test_debugger.py b/tests/devx/test_debugger.py index 273451b..bb59045 100644 --- a/tests/devx/test_debugger.py +++ b/tests/devx/test_debugger.py @@ -16,7 +16,6 @@ import platform import time import pytest -import pexpect from pexpect.exceptions import ( TIMEOUT, EOF, @@ -27,12 +26,14 @@ from tractor.devx._debug import ( _crash_msg, _repl_fail_msg, ) -from tractor._testing import ( - mk_cmd, -) from conftest import ( _ci_env, ) +from .conftest import ( + expect, + in_prompt_msg, + assert_before, +) # TODO: The next great debugger audit could be done by you! # - recurrent entry to breakpoint() from single actor *after* and an @@ -69,154 +70,9 @@ has_nested_actors = pytest.mark.has_nested_actors # ) -@pytest.fixture -def spawn( - start_method, - testdir, - reg_addr, - -) -> 'pexpect.spawn': - ''' - Use the `pexpect` module shipped via `testdir.spawn()` to - run an `./examples/..` script by name. - - ''' - if start_method != 'trio': - pytest.skip( - '`pexpect` based tests only supported on `trio` backend' - ) - - def _spawn( - cmd: str, - **mkcmd_kwargs, - ): - return testdir.spawn( - cmd=mk_cmd( - cmd, - **mkcmd_kwargs, - ), - expect_timeout=3, - ) - - # such that test-dep can pass input script name. - return _spawn - - PROMPT = r"\(Pdb\+\)" -def expect( - child, - - # prompt by default - patt: str = PROMPT, - - **kwargs, - -) -> None: - ''' - Expect wrapper that prints last seen console - data before failing. - - ''' - try: - child.expect( - patt, - **kwargs, - ) - except TIMEOUT: - before = str(child.before.decode()) - print(before) - raise - - -def in_prompt_msg( - prompt: str, - parts: list[str], - - pause_on_false: bool = False, - print_prompt_on_false: bool = True, - -) -> bool: - ''' - Predicate check if (the prompt's) std-streams output has all - `str`-parts in it. - - Can be used in test asserts for bulk matching expected - log/REPL output for a given `pdb` interact point. - - ''' - __tracebackhide__: bool = False - - for part in parts: - if part not in prompt: - if pause_on_false: - import pdbp - pdbp.set_trace() - - if print_prompt_on_false: - print(prompt) - - return False - - return True - - -# TODO: todo support terminal color-chars stripping so we can match -# against call stack frame output from the the 'll' command the like! -# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789 -def assert_before( - child, - patts: list[str], - - **kwargs, - -) -> None: - __tracebackhide__: bool = False - - # as in before the prompt end - before: str = str(child.before.decode()) - assert in_prompt_msg( - prompt=before, - parts=patts, - - **kwargs - ) - - -@pytest.fixture( - params=[False, True], - ids='ctl-c={}'.format, -) -def ctlc( - request, - ci_env: bool, - -) -> bool: - - use_ctlc = request.param - - node = request.node - markers = node.own_markers - for mark in markers: - if mark.name == 'has_nested_actors': - pytest.skip( - f'Test {node} has nested actors and fails with Ctrl-C.\n' - f'The test can sometimes run fine locally but until' - ' we solve' 'this issue this CI test will be xfail:\n' - 'https://github.com/goodboy/tractor/issues/320' - ) - - if use_ctlc: - # XXX: disable pygments highlighting for auto-tests - # since some envs (like actions CI) will struggle - # the the added color-char encoding.. - from tractor.devx._debug import TractorConfig - TractorConfig.use_pygements = False - - yield use_ctlc - - @pytest.mark.parametrize( 'user_in_out', [ @@ -281,7 +137,7 @@ def test_root_actor_bp(spawn, user_in_out): child.expect('\r\n') # process should exit - child.expect(pexpect.EOF) + child.expect(EOF) if expect_err_str is None: assert 'Error' not in str(child.before) @@ -365,7 +221,7 @@ def test_root_actor_bp_forever( # quit out of the loop child.sendline('q') - child.expect(pexpect.EOF) + child.expect(EOF) @pytest.mark.parametrize( @@ -430,7 +286,7 @@ def test_subactor_error( child.expect('\r\n') # process should exit - child.expect(pexpect.EOF) + child.expect(EOF) def test_subactor_breakpoint( @@ -493,7 +349,7 @@ def test_subactor_breakpoint( child.sendline('c') # process should exit - child.expect(pexpect.EOF) + child.expect(EOF) before = str(child.before.decode()) assert in_prompt_msg( @@ -636,7 +492,7 @@ def test_multi_subactors( # process should exit child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) # repeat of previous multierror for final output assert_before(child, [ @@ -776,7 +632,7 @@ def test_multi_daemon_subactors( ) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) @has_nested_actors @@ -852,7 +708,7 @@ def test_multi_subactors_root_errors( ]) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) assert_before(child, [ # "Attaching to pdb in crashed actor: ('root'", @@ -982,7 +838,7 @@ def test_root_nursery_cancels_before_child_releases_tty_lock( for i in range(3): try: - child.expect(pexpect.EOF, timeout=0.5) + child.expect(EOF, timeout=0.5) break except TIMEOUT: child.sendline('c') @@ -1024,7 +880,7 @@ def test_root_cancels_child_context_during_startup( do_ctlc(child) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) def test_different_debug_mode_per_actor( @@ -1045,7 +901,7 @@ def test_different_debug_mode_per_actor( do_ctlc(child) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) before = str(child.before.decode()) @@ -1196,7 +1052,7 @@ def test_pause_from_sync( ) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) def test_post_mortem_api( @@ -1301,7 +1157,7 @@ def test_post_mortem_api( # ) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) def test_shield_pause( @@ -1376,7 +1232,7 @@ def test_shield_pause( ] ) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) # TODO: better error for "non-ideal" usage from the root actor. diff --git a/tests/devx/test_tooling.py b/tests/devx/test_tooling.py new file mode 100644 index 0000000..3e48844 --- /dev/null +++ b/tests/devx/test_tooling.py @@ -0,0 +1,120 @@ +''' +That "native" runtime-hackin toolset better be dang useful! + +Verify the funtion of a variety of "developer-experience" tools we +offer from the `.devx` sub-pkg: + +- use of the lovely `stackscope` for dumping actor `trio`-task trees + during operation and hangs. + +TODO: +- demonstration of `CallerInfo` call stack frame filtering such that + for logging and REPL purposes a user sees exactly the layers needed + when debugging a problem inside the stack vs. in their app. + +''' +import os +import signal + +from .conftest import ( + expect, + assert_before, + # in_prompt_msg, +) + + +def test_shield_pause( + spawn, +): + ''' + Verify the `tractor.pause()/.post_mortem()` API works inside an + already cancelled `trio.CancelScope` and that you can step to the + next checkpoint wherein the cancelled will get raised. + + ''' + child = spawn( + 'shield_hang_in_sub' + ) + expect( + child, + 'Yo my child hanging..?', + ) + assert_before( + child, + [ + 'Entering shield sleep..', + 'Enabling trace-trees on `SIGUSR1` since `stackscope` is installed @', + ] + ) + + print( + 'Sending SIGUSR1 to see a tree-trace!', + ) + os.kill( + child.pid, + signal.SIGUSR1, + ) + expect( + child, + # end-of-tree delimiter + "------ \('root', ", + ) + + assert_before( + child, + [ + 'Trying to dump `stackscope` tree..', + 'Dumping `stackscope` tree for actor', + "('root'", # uid line + + # parent block point (non-shielded) + 'await trio.sleep_forever() # in root', + ] + ) + + # expect( + # child, + # # relay to the sub should be reported + # 'Relaying `SIGUSR1`[10] to sub-actor', + # ) + + expect( + child, + # end-of-tree delimiter + "------ \('hanger', ", + ) + assert_before( + child, + [ + # relay to the sub should be reported + 'Relaying `SIGUSR1`[10] to sub-actor', + + "('hanger'", # uid line + + # hanger LOC where it's shield-halted + 'await trio.sleep_forever() # in subactor', + ] + ) + # breakpoint() + + # simulate the user sending a ctl-c to the hanging program. + # this should result in the terminator kicking in since + # the sub is shield blocking and can't respond to SIGINT. + os.kill( + child.pid, + signal.SIGINT, + ) + expect( + child, + 'Shutting down actor runtime', + timeout=6, + ) + assert_before( + child, + [ + 'raise KeyboardInterrupt', + # 'Shutting down actor runtime', + '#T-800 deployed to collect zombie B0', + "'--uid', \"('hanger',", + ] + ) diff --git a/tractor/_root.py b/tractor/_root.py index 882285a..51dbe5e 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -80,7 +80,7 @@ async def open_root_actor( # enables the multi-process debugger support debug_mode: bool = False, - maybe_enable_greenback: bool = False, # `.pause_from_sync()/breakpoint()` support + maybe_enable_greenback: bool = True, # `.pause_from_sync()/breakpoint()` support enable_stack_on_sig: bool = False, # internal logging @@ -233,14 +233,8 @@ async def open_root_actor( and enable_stack_on_sig ): - try: - logger.info('Enabling `stackscope` traces on SIGUSR1') - from .devx import enable_stack_on_sig - enable_stack_on_sig() - except ImportError: - logger.warning( - '`stackscope` not installed for use in debug mode!' - ) + from .devx._stackscope import enable_stack_on_sig + enable_stack_on_sig() # closed into below ping task-func ponged_addrs: list[tuple[str, int]] = [] diff --git a/tractor/devx/_stackscope.py b/tractor/devx/_stackscope.py index e8e97d1..944ae49 100644 --- a/tractor/devx/_stackscope.py +++ b/tractor/devx/_stackscope.py @@ -24,13 +24,24 @@ disjoint, parallel executing tasks in separate actors. ''' from __future__ import annotations +# from functools import partial +from threading import ( + current_thread, + Thread, + RLock, +) import multiprocessing as mp from signal import ( signal, + getsignal, SIGUSR1, ) -import traceback -from typing import TYPE_CHECKING +# import traceback +from types import ModuleType +from typing import ( + Callable, + TYPE_CHECKING, +) import trio from tractor import ( @@ -51,26 +62,45 @@ if TYPE_CHECKING: @trio.lowlevel.disable_ki_protection def dump_task_tree() -> None: - import stackscope - from tractor.log import get_console_log + ''' + Do a classic `stackscope.extract()` task-tree dump to console at + `.devx()` level. + ''' + import stackscope tree_str: str = str( stackscope.extract( trio.lowlevel.current_root_task(), recurse_child_tasks=True ) ) - log = get_console_log( - name=__name__, - level='cancel', - ) actor: Actor = _state.current_actor() + thr: Thread = current_thread() log.devx( f'Dumping `stackscope` tree for actor\n' - f'{actor.name}: {actor}\n' - f' |_{mp.current_process()}\n\n' + f'{actor.uid}:\n' + f'|_{mp.current_process()}\n' + f' |_{thr}\n' + f' |_{actor}\n\n' + + # start-of-trace-tree delimiter (mostly for testing) + '------ - ------\n' + '\n' + + f'{tree_str}\n' + + + # end-of-trace-tree delimiter (mostly for testing) + f'\n' + f'------ {actor.uid!r} ------\n' ) + # TODO: can remove this right? + # -[ ] was original code from author + # + # print( + # 'DUMPING FROM PRINT\n' + # + + # content + # ) # import logging # try: # with open("/dev/tty", "w") as tty: @@ -80,58 +110,130 @@ def dump_task_tree() -> None: # "task_tree" # ).exception("Error printing task tree") +_handler_lock = RLock() +_tree_dumped: bool = False -def signal_handler( + +def dump_tree_on_sig( sig: int, frame: object, relay_to_subs: bool = True, ) -> None: - try: - trio.lowlevel.current_trio_token( - ).run_sync_soon(dump_task_tree) - except RuntimeError: - # not in async context -- print a normal traceback - traceback.print_stack() + global _tree_dumped, _handler_lock + with _handler_lock: + if _tree_dumped: + log.warning( + 'Already dumped for this actor...??' + ) + return + + _tree_dumped = True + + # actor: Actor = _state.current_actor() + log.devx( + 'Trying to dump `stackscope` tree..\n' + ) + try: + dump_task_tree() + # await actor._service_n.start_soon( + # partial( + # trio.to_thread.run_sync, + # dump_task_tree, + # ) + # ) + # trio.lowlevel.current_trio_token().run_sync_soon( + # dump_task_tree + # ) + + except RuntimeError: + log.exception( + 'Failed to dump `stackscope` tree..\n' + ) + # not in async context -- print a normal traceback + # traceback.print_stack() + raise + + except BaseException: + log.exception( + 'Failed to dump `stackscope` tree..\n' + ) + raise + + log.devx( + 'Supposedly we dumped just fine..?' + ) if not relay_to_subs: return an: ActorNursery for an in _state.current_actor()._actoruid2nursery.values(): - subproc: ProcessType subactor: Actor for subactor, subproc, _ in an._children.values(): - log.devx( + log.warning( f'Relaying `SIGUSR1`[{sig}] to sub-actor\n' f'{subactor}\n' f' |_{subproc}\n' ) - if isinstance(subproc, trio.Process): - subproc.send_signal(sig) + # bc of course stdlib can't have a std API.. XD + match subproc: + case trio.Process(): + subproc.send_signal(sig) - elif isinstance(subproc, mp.Process): - subproc._send_signal(sig) + case mp.Process(): + subproc._send_signal(sig) def enable_stack_on_sig( - sig: int = SIGUSR1 -) -> None: + sig: int = SIGUSR1, +) -> ModuleType: ''' Enable `stackscope` tracing on reception of a signal; by default this is SIGUSR1. + HOT TIP: a task/ctx-tree dump can be triggered from a shell with + fancy cmds. + + For ex. from `bash` using `pgrep` and cmd-sustitution + (https://www.gnu.org/software/bash/manual/bash.html#Command-Substitution) + you could use: + + >> kill -SIGUSR1 $(pgrep -f '') + + Or with with `xonsh` (which has diff capture-from-subproc syntax) + + >> kill -SIGUSR1 @$(pgrep -f '') + ''' + try: + import stackscope + except ImportError: + log.warning( + '`stackscope` not installed for use in debug mode!' + ) + return None + + handler: Callable|int = getsignal(sig) + if handler is dump_tree_on_sig: + log.devx( + 'A `SIGUSR1` handler already exists?\n' + f'|_ {handler!r}\n' + ) + return + signal( sig, - signal_handler, + dump_tree_on_sig, ) - # NOTE: not the above can be triggered from - # a (xonsh) shell using: - # kill -SIGUSR1 @$(pgrep -f '') - # - # for example if you were looking to trace a `pytest` run - # kill -SIGUSR1 @$(pgrep -f 'pytest') + log.devx( + 'Enabling trace-trees on `SIGUSR1` ' + 'since `stackscope` is installed @ \n' + f'{stackscope!r}\n\n' + f'With `SIGUSR1` handler\n' + f'|_{dump_tree_on_sig}\n' + ) + return stackscope