diff --git a/examples/debugging/asyncio_bp.py b/examples/debugging/asyncio_bp.py index baddfe03..296dbccb 100644 --- a/examples/debugging/asyncio_bp.py +++ b/examples/debugging/asyncio_bp.py @@ -1,8 +1,16 @@ +''' +Examples of using the builtin `breakpoint()` from an `asyncio.Task` +running in a subactor spawned with `infect_asyncio=True`. + +''' import asyncio import trio import tractor -from tractor import to_asyncio +from tractor import ( + to_asyncio, + Portal, +) async def aio_sleep_forever(): @@ -17,21 +25,21 @@ async def bp_then_error( ) -> None: - # sync with ``trio``-side (caller) task + # sync with `trio`-side (caller) task to_trio.send_nowait('start') # NOTE: what happens here inside the hook needs some refinement.. # => seems like it's still `._debug._set_trace()` but # we set `Lock.local_task_in_debug = 'sync'`, we probably want - # some further, at least, meta-data about the task/actoq in debug - # in terms of making it clear it's asyncio mucking about. - breakpoint() + # some further, at least, meta-data about the task/actor in debug + # in terms of making it clear it's `asyncio` mucking about. + breakpoint() # asyncio-side # short checkpoint / delay - await asyncio.sleep(0.5) + await asyncio.sleep(0.5) # asyncio-side if raise_after_bp: - raise ValueError('blah') + raise ValueError('asyncio side error!') # TODO: test case with this so that it gets cancelled? else: @@ -49,23 +57,21 @@ async def trio_ctx( # this will block until the ``asyncio`` task sends a "first" # message, see first line in above func. async with ( - to_asyncio.open_channel_from( bp_then_error, - raise_after_bp=not bp_before_started, + # raise_after_bp=not bp_before_started, ) as (first, chan), - trio.open_nursery() as n, + trio.open_nursery() as tn, ): - assert first == 'start' if bp_before_started: - await tractor.breakpoint() + await tractor.pause() # trio-side - await ctx.started(first) + await ctx.started(first) # trio-side - n.start_soon( + tn.start_soon( to_asyncio.run_task, aio_sleep_forever, ) @@ -73,39 +79,50 @@ async def trio_ctx( async def main( - bps_all_over: bool = False, + bps_all_over: bool = True, + + # TODO, WHICH OF THESE HAZ BUGZ? + cancel_from_root: bool = False, + err_from_root: bool = False, ) -> None: async with tractor.open_nursery( - # debug_mode=True, - ) as n: - - p = await n.start_actor( + debug_mode=True, + maybe_enable_greenback=True, + # loglevel='devx', + ) as an: + ptl: Portal = await an.start_actor( 'aio_daemon', enable_modules=[__name__], infect_asyncio=True, debug_mode=True, - loglevel='cancel', + # loglevel='cancel', ) - async with p.open_context( + async with ptl.open_context( trio_ctx, bp_before_started=bps_all_over, ) as (ctx, first): assert first == 'start' - if bps_all_over: - await tractor.breakpoint() + # pause in parent to ensure no cross-actor + # locking problems exist! + await tractor.pause() # trio-root + + if cancel_from_root: + await ctx.cancel() + + if err_from_root: + assert 0 + else: + await trio.sleep_forever() - # await trio.sleep_forever() - await ctx.cancel() - assert 0 # TODO: case where we cancel from trio-side while asyncio task # has debugger lock? - # await p.cancel_actor() + # await ptl.cancel_actor() if __name__ == '__main__': diff --git a/examples/debugging/fast_error_in_root_after_spawn.py b/examples/debugging/fast_error_in_root_after_spawn.py index 570cf7ef..86710788 100644 --- a/examples/debugging/fast_error_in_root_after_spawn.py +++ b/examples/debugging/fast_error_in_root_after_spawn.py @@ -1,5 +1,5 @@ ''' -Fast fail test with a context. +Fast fail test with a `Context`. Ensure the partially initialized sub-actor process doesn't cause a hang on error/cancel of the parent diff --git a/examples/debugging/multi_daemon_subactors.py b/examples/debugging/multi_daemon_subactors.py index 80ef933c..7844ccef 100644 --- a/examples/debugging/multi_daemon_subactors.py +++ b/examples/debugging/multi_daemon_subactors.py @@ -7,7 +7,7 @@ async def breakpoint_forever(): try: while True: yield 'yo' - await tractor.breakpoint() + await tractor.pause() except BaseException: tractor.log.get_console_log().exception( 'Cancelled while trying to enter pause point!' @@ -25,7 +25,8 @@ async def main(): """ async with tractor.open_nursery( debug_mode=True, - loglevel='cancel', + # loglevel='cancel', + # loglevel='devx', ) as n: p0 = await n.start_actor('bp_forever', enable_modules=[__name__]) diff --git a/examples/debugging/multi_nested_subactors_error_up_through_nurseries.py b/examples/debugging/multi_nested_subactors_error_up_through_nurseries.py index 8df52e3b..b63f1945 100644 --- a/examples/debugging/multi_nested_subactors_error_up_through_nurseries.py +++ b/examples/debugging/multi_nested_subactors_error_up_through_nurseries.py @@ -10,7 +10,7 @@ async def name_error(): async def breakpoint_forever(): "Indefinitely re-enter debugger in child actor." while True: - await tractor.breakpoint() + await tractor.pause() # NOTE: if the test never sent 'q'/'quit' commands # on the pdb repl, without this checkpoint line the diff --git a/examples/debugging/multi_subactors.py b/examples/debugging/multi_subactors.py index 22b13ac8..57634cc3 100644 --- a/examples/debugging/multi_subactors.py +++ b/examples/debugging/multi_subactors.py @@ -6,7 +6,7 @@ async def breakpoint_forever(): "Indefinitely re-enter debugger in child actor." while True: await trio.sleep(0.1) - await tractor.breakpoint() + await tractor.pause() async def name_error(): diff --git a/examples/debugging/restore_builtin_breakpoint.py b/examples/debugging/restore_builtin_breakpoint.py index 6e141dfc..89605075 100644 --- a/examples/debugging/restore_builtin_breakpoint.py +++ b/examples/debugging/restore_builtin_breakpoint.py @@ -6,19 +6,46 @@ import tractor async def main() -> None: - async with tractor.open_nursery(debug_mode=True) as an: - assert os.environ['PYTHONBREAKPOINT'] == 'tractor._debug._set_trace' + # intially unset, no entry. + orig_pybp_var: int = os.environ.get('PYTHONBREAKPOINT') + assert orig_pybp_var in {None, "0"} + + async with tractor.open_nursery( + debug_mode=True, + ) as an: + assert an + assert ( + (pybp_var := os.environ['PYTHONBREAKPOINT']) + == + 'tractor.devx._debug._sync_pause_from_builtin' + ) # TODO: an assert that verifies the hook has indeed been, hooked # XD - assert sys.breakpointhook is not tractor._debug._set_trace + assert ( + (pybp_hook := sys.breakpointhook) + is not tractor.devx._debug._set_trace + ) + print( + f'$PYTHONOBREAKPOINT: {pybp_var!r}\n' + f'`sys.breakpointhook`: {pybp_hook!r}\n' + ) breakpoint() + pass # first bp, tractor hook set. - # TODO: an assert that verifies the hook is unhooked.. + # XXX AFTER EXIT (of actor-runtime) verify the hook is unset.. + # + # YES, this is weird but it's how stdlib docs say to do it.. + # https://docs.python.org/3/library/sys.html#sys.breakpointhook + assert os.environ.get('PYTHONBREAKPOINT') is orig_pybp_var assert sys.breakpointhook + + # now ensure a regular builtin pause still works breakpoint() + pass # last bp, stdlib hook restored + if __name__ == '__main__': trio.run(main) diff --git a/examples/debugging/root_actor_breakpoint.py b/examples/debugging/root_actor_breakpoint.py index 5c858d4c..55b4ca56 100644 --- a/examples/debugging/root_actor_breakpoint.py +++ b/examples/debugging/root_actor_breakpoint.py @@ -10,7 +10,7 @@ async def main(): await trio.sleep(0.1) - await tractor.breakpoint() + await tractor.pause() await trio.sleep(0.1) diff --git a/examples/debugging/root_actor_breakpoint_forever.py b/examples/debugging/root_actor_breakpoint_forever.py index 88a6e0e9..04cd7e7e 100644 --- a/examples/debugging/root_actor_breakpoint_forever.py +++ b/examples/debugging/root_actor_breakpoint_forever.py @@ -11,7 +11,7 @@ async def main( # loglevel='runtime', ): while True: - await tractor.breakpoint() + await tractor.pause() if __name__ == '__main__': diff --git a/examples/debugging/shield_hang_in_sub.py b/examples/debugging/shield_hang_in_sub.py new file mode 100644 index 00000000..5387353f --- /dev/null +++ b/examples/debugging/shield_hang_in_sub.py @@ -0,0 +1,83 @@ +''' +Verify we can dump a `stackscope` tree on a hang. + +''' +import os +import signal + +import trio +import tractor + +@tractor.context +async def start_n_shield_hang( + ctx: tractor.Context, +): + # actor: tractor.Actor = tractor.current_actor() + + # sync to parent-side task + await ctx.started(os.getpid()) + + print('Entering shield sleep..') + with trio.CancelScope(shield=True): + await trio.sleep_forever() # in subactor + + # XXX NOTE ^^^ since this shields, we expect + # the zombie reaper (aka T800) to engage on + # SIGINT from the user and eventually hard-kill + # this subprocess! + + +async def main( + from_test: bool = False, +) -> None: + + async with ( + tractor.open_nursery( + debug_mode=True, + enable_stack_on_sig=True, + # maybe_enable_greenback=False, + loglevel='devx', + ) as an, + ): + ptl: tractor.Portal = await an.start_actor( + 'hanger', + enable_modules=[__name__], + debug_mode=True, + ) + async with ptl.open_context( + start_n_shield_hang, + ) as (ctx, cpid): + + _, proc, _ = an._children[ptl.chan.uid] + assert cpid == proc.pid + + print( + 'Yo my child hanging..?\n' + # "i'm a user who wants to see a `stackscope` tree!\n" + ) + + # XXX simulate the wrapping test's "user actions" + # (i.e. if a human didn't run this manually but wants to + # know what they should do to reproduce test behaviour) + if from_test: + print( + f'Sending SIGUSR1 to {cpid!r}!\n' + ) + os.kill( + cpid, + signal.SIGUSR1, + ) + + # simulate user cancelling program + await trio.sleep(0.5) + os.kill( + os.getpid(), + signal.SIGINT, + ) + else: + # actually let user send the ctl-c + await trio.sleep_forever() # in root + + +if __name__ == '__main__': + trio.run(main) diff --git a/examples/debugging/subactor_bp_in_ctx.py b/examples/debugging/subactor_bp_in_ctx.py index a47dbd92..2c5fee8c 100644 --- a/examples/debugging/subactor_bp_in_ctx.py +++ b/examples/debugging/subactor_bp_in_ctx.py @@ -4,9 +4,9 @@ import trio async def gen(): yield 'yo' - await tractor.breakpoint() + await tractor.pause() yield 'yo' - await tractor.breakpoint() + await tractor.pause() @tractor.context @@ -15,7 +15,7 @@ async def just_bp( ) -> None: await ctx.started() - await tractor.breakpoint() + await tractor.pause() # TODO: bps and errors in this call.. async for val in gen(): diff --git a/examples/debugging/sync_bp.py b/examples/debugging/sync_bp.py index 137710fc..95472c93 100644 --- a/examples/debugging/sync_bp.py +++ b/examples/debugging/sync_bp.py @@ -4,6 +4,13 @@ import time import trio import tractor +# TODO: only import these when not running from test harness? +# can we detect `pexpect` usage maybe? +# from tractor.devx._debug import ( +# get_lock, +# get_debug_req, +# ) + def sync_pause( use_builtin: bool = False, @@ -18,7 +25,13 @@ def sync_pause( breakpoint(hide_tb=hide_tb) else: + # TODO: maybe for testing some kind of cm style interface + # where the `._set_trace()` call doesn't happen until block + # exit? + # assert get_lock().ctx_in_debug is None + # assert get_debug_req().repl is None tractor.pause_from_sync() + # assert get_debug_req().repl is None if error: raise RuntimeError('yoyo sync code error') @@ -41,10 +54,11 @@ async def start_n_sync_pause( async def main() -> None: async with ( tractor.open_nursery( - # NOTE: required for pausing from sync funcs - maybe_enable_greenback=True, debug_mode=True, - # loglevel='cancel', + maybe_enable_greenback=True, + enable_stack_on_sig=True, + # loglevel='warning', + # loglevel='devx', ) as an, trio.open_nursery() as tn, ): @@ -138,7 +152,9 @@ async def main() -> None: # the case 2. from above still exists! use_builtin=True, ), - abandon_on_cancel=False, + # TODO: with this `False` we can hang!??! + # abandon_on_cancel=False, + abandon_on_cancel=True, thread_name='inline_root_bg_thread', ) diff --git a/notes_to_self/howtorelease.md b/notes_to_self/howtorelease.md new file mode 100644 index 00000000..a1b52d7a --- /dev/null +++ b/notes_to_self/howtorelease.md @@ -0,0 +1,18 @@ +First generate a built disti: + +``` +python -m pip install --upgrade build +python -m build --sdist --outdir dist/alpha5/ +``` + +Then try a test ``pypi`` upload: + +``` +python -m twine upload --repository testpypi dist/alpha5/* +``` + +The push to `pypi` for realz. + +``` +python -m twine upload --repository testpypi dist/alpha5/* +``` diff --git a/tests/conftest.py b/tests/conftest.py index 5ce84425..810b642a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -150,6 +150,18 @@ def pytest_generate_tests(metafunc): metafunc.parametrize("start_method", [spawn_backend], scope='module') +# TODO: a way to let test scripts (like from `examples/`) +# guarantee they won't registry addr collide! +# @pytest.fixture +# def open_test_runtime( +# reg_addr: tuple, +# ) -> AsyncContextManager: +# return partial( +# tractor.open_nursery, +# registry_addrs=[reg_addr], +# ) + + def sig_prog(proc, sig): "Kill the actor-process with ``sig``." proc.send_signal(sig) diff --git a/tests/devx/__init__.py b/tests/devx/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/devx/conftest.py b/tests/devx/conftest.py new file mode 100644 index 00000000..c45265dc --- /dev/null +++ b/tests/devx/conftest.py @@ -0,0 +1,243 @@ +''' +`tractor.devx.*` tooling sub-pkg test space. + +''' +import time +from typing import ( + Callable, +) + +import pytest +from pexpect.exceptions import ( + TIMEOUT, +) +from pexpect.spawnbase import SpawnBase + +from tractor._testing import ( + mk_cmd, +) +from tractor.devx._debug import ( + _pause_msg as _pause_msg, + _crash_msg as _crash_msg, + _repl_fail_msg as _repl_fail_msg, + _ctlc_ignore_header as _ctlc_ignore_header, +) +from ..conftest import ( + _ci_env, +) + + +@pytest.fixture +def spawn( + start_method, + testdir: pytest.Pytester, + reg_addr: tuple[str, int], + +) -> Callable[[str], None]: + ''' + Use the `pexpect` module shipped via `testdir.spawn()` to + run an `./examples/..` script by name. + + ''' + if start_method != 'trio': + pytest.skip( + '`pexpect` based tests only supported on `trio` backend' + ) + + def unset_colors(): + ''' + Python 3.13 introduced colored tracebacks that break patt + matching, + + https://docs.python.org/3/using/cmdline.html#envvar-PYTHON_COLORS + https://docs.python.org/3/using/cmdline.html#using-on-controlling-color + + ''' + import os + os.environ['PYTHON_COLORS'] = '0' + + def _spawn( + cmd: str, + **mkcmd_kwargs, + ): + unset_colors() + return testdir.spawn( + cmd=mk_cmd( + cmd, + **mkcmd_kwargs, + ), + expect_timeout=3, + # preexec_fn=unset_colors, + # ^TODO? get `pytest` core to expose underlying + # `pexpect.spawn()` stuff? + ) + + # such that test-dep can pass input script name. + return _spawn + + +@pytest.fixture( + params=[False, True], + ids='ctl-c={}'.format, +) +def ctlc( + request, + ci_env: bool, + +) -> bool: + + use_ctlc = request.param + + node = request.node + markers = node.own_markers + for mark in markers: + if mark.name == 'has_nested_actors': + pytest.skip( + f'Test {node} has nested actors and fails with Ctrl-C.\n' + f'The test can sometimes run fine locally but until' + ' we solve' 'this issue this CI test will be xfail:\n' + 'https://github.com/goodboy/tractor/issues/320' + ) + + if mark.name == 'ctlcs_bish': + pytest.skip( + f'Test {node} prolly uses something from the stdlib (namely `asyncio`..)\n' + f'The test and/or underlying example script can *sometimes* run fine ' + f'locally but more then likely until the cpython peeps get their sh#$ together, ' + f'this test will definitely not behave like `trio` under SIGINT..\n' + ) + + if use_ctlc: + # XXX: disable pygments highlighting for auto-tests + # since some envs (like actions CI) will struggle + # the the added color-char encoding.. + from tractor.devx._debug import TractorConfig + TractorConfig.use_pygements = False + + yield use_ctlc + + +def expect( + child, + + # normally a `pdb` prompt by default + patt: str, + + **kwargs, + +) -> None: + ''' + Expect wrapper that prints last seen console + data before failing. + + ''' + try: + child.expect( + patt, + **kwargs, + ) + except TIMEOUT: + before = str(child.before.decode()) + print(before) + raise + + +PROMPT = r"\(Pdb\+\)" + + +def in_prompt_msg( + child: SpawnBase, + parts: list[str], + + pause_on_false: bool = False, + err_on_false: bool = False, + print_prompt_on_false: bool = True, + +) -> bool: + ''' + Predicate check if (the prompt's) std-streams output has all + `str`-parts in it. + + Can be used in test asserts for bulk matching expected + log/REPL output for a given `pdb` interact point. + + ''' + __tracebackhide__: bool = False + + before: str = str(child.before.decode()) + for part in parts: + if part not in before: + if pause_on_false: + import pdbp + pdbp.set_trace() + + if print_prompt_on_false: + print(before) + + if err_on_false: + raise ValueError( + f'Could not find pattern in `before` output?\n' + f'part: {part!r}\n' + ) + return False + + return True + + +# TODO: todo support terminal color-chars stripping so we can match +# against call stack frame output from the the 'll' command the like! +# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789 +def assert_before( + child: SpawnBase, + patts: list[str], + + **kwargs, + +) -> None: + __tracebackhide__: bool = False + + assert in_prompt_msg( + child=child, + parts=patts, + + # since this is an "assert" helper ;) + err_on_false=True, + **kwargs + ) + + +def do_ctlc( + child, + count: int = 3, + delay: float = 0.1, + patt: str|None = None, + + # expect repl UX to reprint the prompt after every + # ctrl-c send. + # XXX: no idea but, in CI this never seems to work even on 3.10 so + # needs some further investigation potentially... + expect_prompt: bool = not _ci_env, + +) -> str|None: + + before: str|None = None + + # make sure ctl-c sends don't do anything but repeat output + for _ in range(count): + time.sleep(delay) + child.sendcontrol('c') + + # TODO: figure out why this makes CI fail.. + # if you run this test manually it works just fine.. + if expect_prompt: + time.sleep(delay) + child.expect(PROMPT) + before = str(child.before.decode()) + time.sleep(delay) + + if patt: + # should see the last line on console + assert patt in before + + # return the console content up to the final prompt + return before diff --git a/tests/test_debugger.py b/tests/devx/test_debugger.py similarity index 74% rename from tests/test_debugger.py rename to tests/devx/test_debugger.py index e4f28548..8b723c6f 100644 --- a/tests/test_debugger.py +++ b/tests/devx/test_debugger.py @@ -13,26 +13,25 @@ TODO: from functools import partial import itertools import platform -import pathlib import time import pytest -import pexpect from pexpect.exceptions import ( TIMEOUT, EOF, ) -from tractor._testing import ( - examples_dir, -) -from tractor.devx._debug import ( +from .conftest import ( + do_ctlc, + PROMPT, _pause_msg, _crash_msg, _repl_fail_msg, ) from .conftest import ( - _ci_env, + expect, + in_prompt_msg, + assert_before, ) # TODO: The next great debugger audit could be done by you! @@ -52,15 +51,6 @@ if platform.system() == 'Windows': ) -def mk_cmd(ex_name: str) -> str: - ''' - Generate a command suitable to pass to ``pexpect.spawn()``. - - ''' - script_path: pathlib.Path = examples_dir() / 'debugging' / f'{ex_name}.py' - return ' '.join(['python', str(script_path)]) - - # TODO: was trying to this xfail style but some weird bug i see in CI # that's happening at collect time.. pretty soon gonna dump actions i'm # thinkin... @@ -79,142 +69,6 @@ has_nested_actors = pytest.mark.has_nested_actors # ) -@pytest.fixture -def spawn( - start_method, - testdir, - reg_addr, -) -> 'pexpect.spawn': - - if start_method != 'trio': - pytest.skip( - "Debugger tests are only supported on the trio backend" - ) - - def _spawn(cmd): - return testdir.spawn( - cmd=mk_cmd(cmd), - expect_timeout=3, - ) - - return _spawn - - -PROMPT = r"\(Pdb\+\)" - - -def expect( - child, - - # prompt by default - patt: str = PROMPT, - - **kwargs, - -) -> None: - ''' - Expect wrapper that prints last seen console - data before failing. - - ''' - try: - child.expect( - patt, - **kwargs, - ) - except TIMEOUT: - before = str(child.before.decode()) - print(before) - raise - - -def in_prompt_msg( - prompt: str, - parts: list[str], - - pause_on_false: bool = False, - print_prompt_on_false: bool = True, - -) -> bool: - ''' - Predicate check if (the prompt's) std-streams output has all - `str`-parts in it. - - Can be used in test asserts for bulk matching expected - log/REPL output for a given `pdb` interact point. - - ''' - __tracebackhide__: bool = False - - for part in parts: - if part not in prompt: - if pause_on_false: - import pdbp - pdbp.set_trace() - - if print_prompt_on_false: - print(prompt) - - return False - - return True - - -# TODO: todo support terminal color-chars stripping so we can match -# against call stack frame output from the the 'll' command the like! -# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789 -def assert_before( - child, - patts: list[str], - - **kwargs, - -) -> None: - __tracebackhide__: bool = False - - # as in before the prompt end - before: str = str(child.before.decode()) - assert in_prompt_msg( - prompt=before, - parts=patts, - - **kwargs - ) - - -@pytest.fixture( - params=[False, True], - ids='ctl-c={}'.format, -) -def ctlc( - request, - ci_env: bool, - -) -> bool: - - use_ctlc = request.param - - node = request.node - markers = node.own_markers - for mark in markers: - if mark.name == 'has_nested_actors': - pytest.skip( - f'Test {node} has nested actors and fails with Ctrl-C.\n' - f'The test can sometimes run fine locally but until' - ' we solve' 'this issue this CI test will be xfail:\n' - 'https://github.com/goodboy/tractor/issues/320' - ) - - if use_ctlc: - # XXX: disable pygments highlighting for auto-tests - # since some envs (like actions CI) will struggle - # the the added color-char encoding.. - from tractor.devx._debug import TractorConfig - TractorConfig.use_pygements = False - - yield use_ctlc - - @pytest.mark.parametrize( 'user_in_out', [ @@ -238,14 +92,15 @@ def test_root_actor_error( # scan for the prompt expect(child, PROMPT) - before = str(child.before.decode()) - # make sure expected logging and error arrives assert in_prompt_msg( - before, - [_crash_msg, "('root'"] + child, + [ + _crash_msg, + "('root'", + 'AssertionError', + ] ) - assert 'AssertionError' in before # send user command child.sendline(user_input) @@ -264,8 +119,10 @@ def test_root_actor_error( ids=lambda item: f'{item[0]} -> {item[1]}', ) def test_root_actor_bp(spawn, user_in_out): - """Demonstrate breakpoint from in root actor. - """ + ''' + Demonstrate breakpoint from in root actor. + + ''' user_input, expect_err_str = user_in_out child = spawn('root_actor_breakpoint') @@ -279,7 +136,7 @@ def test_root_actor_bp(spawn, user_in_out): child.expect('\r\n') # process should exit - child.expect(pexpect.EOF) + child.expect(EOF) if expect_err_str is None: assert 'Error' not in str(child.before) @@ -287,38 +144,6 @@ def test_root_actor_bp(spawn, user_in_out): assert expect_err_str in str(child.before) -def do_ctlc( - child, - count: int = 3, - delay: float = 0.1, - patt: str|None = None, - - # expect repl UX to reprint the prompt after every - # ctrl-c send. - # XXX: no idea but, in CI this never seems to work even on 3.10 so - # needs some further investigation potentially... - expect_prompt: bool = not _ci_env, - -) -> None: - - # make sure ctl-c sends don't do anything but repeat output - for _ in range(count): - time.sleep(delay) - child.sendcontrol('c') - - # TODO: figure out why this makes CI fail.. - # if you run this test manually it works just fine.. - if expect_prompt: - before = str(child.before.decode()) - time.sleep(delay) - child.expect(PROMPT) - time.sleep(delay) - - if patt: - # should see the last line on console - assert patt in before - - def test_root_actor_bp_forever( spawn, ctlc: bool, @@ -358,7 +183,7 @@ def test_root_actor_bp_forever( # quit out of the loop child.sendline('q') - child.expect(pexpect.EOF) + child.expect(EOF) @pytest.mark.parametrize( @@ -380,10 +205,12 @@ def test_subactor_error( # scan for the prompt child.expect(PROMPT) - before = str(child.before.decode()) assert in_prompt_msg( - before, - [_crash_msg, "('name_error'"] + child, + [ + _crash_msg, + "('name_error'", + ] ) if do_next: @@ -402,17 +229,15 @@ def test_subactor_error( child.sendline('continue') child.expect(PROMPT) - before = str(child.before.decode()) - - # root actor gets debugger engaged assert in_prompt_msg( - before, - [_crash_msg, "('root'"] - ) - # error is a remote error propagated from the subactor - assert in_prompt_msg( - before, - [_crash_msg, "('name_error'"] + child, + [ + _crash_msg, + # root actor gets debugger engaged + "('root'", + # error is a remote error propagated from the subactor + "('name_error'", + ] ) # another round @@ -423,7 +248,7 @@ def test_subactor_error( child.expect('\r\n') # process should exit - child.expect(pexpect.EOF) + child.expect(EOF) def test_subactor_breakpoint( @@ -433,14 +258,11 @@ def test_subactor_breakpoint( "Single subactor with an infinite breakpoint loop" child = spawn('subactor_breakpoint') - - # scan for the prompt child.expect(PROMPT) - - before = str(child.before.decode()) assert in_prompt_msg( - before, - [_pause_msg, "('breakpoint_forever'"] + child, + [_pause_msg, + "('breakpoint_forever'",] ) # do some "next" commands to demonstrate recurrent breakpoint @@ -456,9 +278,8 @@ def test_subactor_breakpoint( for _ in range(5): child.sendline('continue') child.expect(PROMPT) - before = str(child.before.decode()) assert in_prompt_msg( - before, + child, [_pause_msg, "('breakpoint_forever'"] ) @@ -471,9 +292,8 @@ def test_subactor_breakpoint( # child process should exit but parent will capture pdb.BdbQuit child.expect(PROMPT) - before = str(child.before.decode()) assert in_prompt_msg( - before, + child, ['RemoteActorError:', "('breakpoint_forever'", 'bdb.BdbQuit',] @@ -486,11 +306,10 @@ def test_subactor_breakpoint( child.sendline('c') # process should exit - child.expect(pexpect.EOF) + child.expect(EOF) - before = str(child.before.decode()) assert in_prompt_msg( - before, + child, ['RemoteActorError:', "('breakpoint_forever'", 'bdb.BdbQuit',] @@ -514,7 +333,7 @@ def test_multi_subactors( before = str(child.before.decode()) assert in_prompt_msg( - before, + child, [_pause_msg, "('breakpoint_forever'"] ) @@ -535,12 +354,14 @@ def test_multi_subactors( # first name_error failure child.expect(PROMPT) - before = str(child.before.decode()) assert in_prompt_msg( - before, - [_crash_msg, "('name_error'"] + child, + [ + _crash_msg, + "('name_error'", + "NameError", + ] ) - assert "NameError" in before if ctlc: do_ctlc(child) @@ -564,9 +385,8 @@ def test_multi_subactors( # breakpoint loop should re-engage child.sendline('c') child.expect(PROMPT) - before = str(child.before.decode()) assert in_prompt_msg( - before, + child, [_pause_msg, "('breakpoint_forever'"] ) @@ -629,7 +449,7 @@ def test_multi_subactors( # process should exit child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) # repeat of previous multierror for final output assert_before(child, [ @@ -659,25 +479,28 @@ def test_multi_daemon_subactors( # the root's tty lock first so anticipate either crash # message on the first entry. - bp_forev_parts = [_pause_msg, "('bp_forever'"] + bp_forev_parts = [ + _pause_msg, + "('bp_forever'", + ] bp_forev_in_msg = partial( in_prompt_msg, parts=bp_forev_parts, ) - name_error_msg = "NameError: name 'doggypants' is not defined" - name_error_parts = [name_error_msg] + name_error_msg: str = "NameError: name 'doggypants' is not defined" + name_error_parts: list[str] = [name_error_msg] before = str(child.before.decode()) - if bp_forev_in_msg(prompt=before): + if bp_forev_in_msg(child=child): next_parts = name_error_parts elif name_error_msg in before: next_parts = bp_forev_parts else: - raise ValueError("Neither log msg was found !?") + raise ValueError('Neither log msg was found !?') if ctlc: do_ctlc(child) @@ -746,14 +569,12 @@ def test_multi_daemon_subactors( # wait for final error in root # where it crashs with boxed error while True: - try: - child.sendline('c') - child.expect(PROMPT) - assert_before( - child, - bp_forev_parts - ) - except AssertionError: + child.sendline('c') + child.expect(PROMPT) + if not in_prompt_msg( + child, + bp_forev_parts + ): break assert_before( @@ -769,7 +590,7 @@ def test_multi_daemon_subactors( ) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) @has_nested_actors @@ -845,7 +666,7 @@ def test_multi_subactors_root_errors( ]) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) assert_before(child, [ # "Attaching to pdb in crashed actor: ('root'", @@ -934,10 +755,13 @@ def test_root_nursery_cancels_before_child_releases_tty_lock( child = spawn('root_cancelled_but_child_is_in_tty_lock') child.expect(PROMPT) - - before = str(child.before.decode()) - assert "NameError: name 'doggypants' is not defined" in before - assert "tractor._exceptions.RemoteActorError: ('name_error'" not in before + assert_before( + child, + [ + "NameError: name 'doggypants' is not defined", + "tractor._exceptions.RemoteActorError: ('name_error'", + ], + ) time.sleep(0.5) if ctlc: @@ -975,7 +799,7 @@ def test_root_nursery_cancels_before_child_releases_tty_lock( for i in range(3): try: - child.expect(pexpect.EOF, timeout=0.5) + child.expect(EOF, timeout=0.5) break except TIMEOUT: child.sendline('c') @@ -1017,7 +841,7 @@ def test_root_cancels_child_context_during_startup( do_ctlc(child) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) def test_different_debug_mode_per_actor( @@ -1028,9 +852,8 @@ def test_different_debug_mode_per_actor( child.expect(PROMPT) # only one actor should enter the debugger - before = str(child.before.decode()) assert in_prompt_msg( - before, + child, [_crash_msg, "('debugged_boi'", "RuntimeError"], ) @@ -1038,9 +861,7 @@ def test_different_debug_mode_per_actor( do_ctlc(child) child.sendline('c') - child.expect(pexpect.EOF) - - before = str(child.before.decode()) + child.expect(EOF) # NOTE: this debugged actor error currently WON'T show up since the # root will actually cancel and terminate the nursery before the error @@ -1059,103 +880,6 @@ def test_different_debug_mode_per_actor( ) -def test_pause_from_sync( - spawn, - ctlc: bool -): - ''' - Verify we can use the `pdbp` REPL from sync functions AND from - any thread spawned with `trio.to_thread.run_sync()`. - - `examples/debugging/sync_bp.py` - - ''' - child = spawn('sync_bp') - - # first `sync_pause()` after nurseries open - child.expect(PROMPT) - assert_before( - child, - [ - # pre-prompt line - _pause_msg, - " None: + try: + yield + except TIMEOUT: + # breakpoint() + if ctlc: + pytest.xfail( + 'Some kinda redic threading SIGINT bug i think?\n' + 'See the notes in `examples/debugging/sync_bp.py`..\n' + ) + raise + + +@pytest.mark.ctlcs_bish +def test_pause_from_sync( + spawn, + ctlc: bool, +): + ''' + Verify we can use the `pdbp` REPL from sync functions AND from + any thread spawned with `trio.to_thread.run_sync()`. + + `examples/debugging/sync_bp.py` + + ''' + child = spawn('sync_bp') + + # first `sync_pause()` after nurseries open + child.expect(PROMPT) + assert_before( + child, + [ + # pre-prompt line + _pause_msg, + " similar to the `delay` input to `do_ctlc()` below, setting + # this too low can cause the test to fail since the `subactor` + # suffers a race where the root/parent sends an actor-cancel + # prior to the context task hitting its pause point (and thus + # engaging the `sigint_shield()` handler in time); this value + # seems be good enuf? + time.sleep(0.6) + + # one of the bg thread or subactor should have + # `Lock.acquire()`-ed + # (NOT both, which will result in REPL clobbering!) + attach_patts: dict[str, list[str]] = { + 'subactor': [ + "'start_n_sync_pause'", + "('subactor'", + ], + 'inline_root_bg_thread': [ + " list[str]: + ''' + Receive any of a `list[str]` of patterns provided in + `attach_patts`. + + Used to test racing prompts from multiple actors and/or + tasks using a common root process' `pdbp` REPL. + + ''' + assert attach_patts + + child.expect(PROMPT) + before = str(child.before.decode()) + + for attach_key in attach_patts: + if attach_key in before: + expected_patts: str = attach_patts.pop(attach_key) + assert_before( + child, + expected_patts + ) + break # from for + else: + pytest.fail( + f'No keys found?\n\n' + f'{attach_patts.keys()}\n\n' + f'{before}\n' + ) + + # ensure no other task/threads engaged a REPL + # at the same time as the one that was detected above. + for key, other_patts in attach_patts.copy().items(): + assert not in_prompt_msg( + child, + other_patts, + ) + + if ctlc: + do_ctlc( + child, + patt=prompt, + # NOTE same as comment above + delay=ctlc_delay, + ) + + return expected_patts + + +@pytest.mark.ctlcs_bish +def test_sync_pause_from_aio_task( + spawn, + + ctlc: bool + # ^TODO, fix for `asyncio`!! +): + ''' + Verify we can use the `pdbp` REPL from an `asyncio.Task` spawned using + APIs in `.to_asyncio`. + + `examples/debugging/asycio_bp.py` + + ''' + child = spawn('asyncio_bp') + + # RACE on whether trio/asyncio task bps first + attach_patts: dict[str, list[str]] = { + + # first pause in guest-mode (aka "infecting") + # `trio.Task`. + 'trio-side': [ + _pause_msg, + " int: + if debug_mode: + return 999 + else: + return 1 + + async def sleep_and_err( sleep_for: float = 0.1, # just signature placeholders for compat with # ``to_asyncio.open_channel_from()`` - to_trio: Optional[trio.MemorySendChannel] = None, - from_trio: Optional[asyncio.Queue] = None, + to_trio: trio.MemorySendChannel|None = None, + from_trio: asyncio.Queue|None = None, ): if to_trio: @@ -36,7 +58,7 @@ async def sleep_and_err( assert 0 -async def sleep_forever(): +async def aio_sleep_forever(): await asyncio.sleep(float('inf')) @@ -44,36 +66,50 @@ async def trio_cancels_single_aio_task(): # spawn an ``asyncio`` task to run a func and return result with trio.move_on_after(.2): - await tractor.to_asyncio.run_task(sleep_forever) + await tractor.to_asyncio.run_task(aio_sleep_forever) -def test_trio_cancels_aio_on_actor_side(reg_addr): +def test_trio_cancels_aio_on_actor_side( + reg_addr: tuple[str, int], + delay: int, + debug_mode: bool, +): ''' Spawn an infected actor that is cancelled by the ``trio`` side task using std cancel scope apis. ''' async def main(): - async with tractor.open_nursery( - registry_addrs=[reg_addr] - ) as n: - await n.run_in_actor( - trio_cancels_single_aio_task, - infect_asyncio=True, - ) + with trio.fail_after(1 + delay): + async with tractor.open_nursery( + registry_addrs=[reg_addr], + debug_mode=debug_mode, + ) as an: + await an.run_in_actor( + trio_cancels_single_aio_task, + infect_asyncio=True, + ) trio.run(main) async def asyncio_actor( - target: str, expect_err: Exception|None = None ) -> None: - assert tractor.current_actor().is_infected_aio() - target = globals()[target] + # ensure internal runtime state is consistent + actor: Actor = tractor.current_actor() + assert ( + actor.is_infected_aio() + and + actor._infected_aio + and + _state._runtime_vars['_is_infected_aio'] + ) + + target: Callable = globals()[target] if '.' in expect_err: modpath, _, name = expect_err.rpartition('.') @@ -89,12 +125,17 @@ async def asyncio_actor( except BaseException as err: if expect_err: - assert isinstance(err, error_type) + assert isinstance(err, error_type), ( + f'{type(err)} is not {error_type}?' + ) raise -def test_aio_simple_error(reg_addr): +def test_aio_simple_error( + reg_addr: tuple[str, int], + debug_mode: bool, +): ''' Verify a simple remote asyncio error propagates back through trio to the parent actor. @@ -103,9 +144,10 @@ def test_aio_simple_error(reg_addr): ''' async def main(): async with tractor.open_nursery( - registry_addrs=[reg_addr] - ) as n: - await n.run_in_actor( + registry_addrs=[reg_addr], + debug_mode=debug_mode, + ) as an: + await an.run_in_actor( asyncio_actor, target='sleep_and_err', expect_err='AssertionError', @@ -128,19 +170,24 @@ def test_aio_simple_error(reg_addr): assert err assert isinstance(err, RemoteActorError) - assert err.boxed_type == AssertionError + assert err.boxed_type is AssertionError -def test_tractor_cancels_aio(reg_addr): +def test_tractor_cancels_aio( + reg_addr: tuple[str, int], + debug_mode: bool, +): ''' Verify we can cancel a spawned asyncio task gracefully. ''' async def main(): - async with tractor.open_nursery() as n: - portal = await n.run_in_actor( + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as an: + portal = await an.run_in_actor( asyncio_actor, - target='sleep_forever', + target='aio_sleep_forever', expect_err='trio.Cancelled', infect_asyncio=True, ) @@ -150,7 +197,9 @@ def test_tractor_cancels_aio(reg_addr): trio.run(main) -def test_trio_cancels_aio(reg_addr): +def test_trio_cancels_aio( + reg_addr: tuple[str, int], +): ''' Much like the above test with ``tractor.Portal.cancel_actor()`` except we just use a standard ``trio`` cancellation api. @@ -161,10 +210,10 @@ def test_trio_cancels_aio(reg_addr): with trio.move_on_after(1): # cancel the nursery shortly after boot - async with tractor.open_nursery() as n: - await n.run_in_actor( + async with tractor.open_nursery() as tn: + await tn.run_in_actor( asyncio_actor, - target='sleep_forever', + target='aio_sleep_forever', expect_err='trio.Cancelled', infect_asyncio=True, ) @@ -181,23 +230,35 @@ async def trio_ctx( # this will block until the ``asyncio`` task sends a "first" # message. - with trio.fail_after(2): - async with ( - trio.open_nursery() as n, + delay: int = 999 if tractor.debug_mode() else 1 + with trio.fail_after(1 + delay): + try: + async with ( + trio.open_nursery( + # TODO, for new `trio` / py3.13 + # strict_exception_groups=False, + ) as tn, + tractor.to_asyncio.open_channel_from( + sleep_and_err, + ) as (first, chan), + ): - tractor.to_asyncio.open_channel_from( - sleep_and_err, - ) as (first, chan), - ): + assert first == 'start' - assert first == 'start' + # spawn another asyncio task for the cuck of it. + tn.start_soon( + tractor.to_asyncio.run_task, + aio_sleep_forever, + ) + await trio.sleep_forever() - # spawn another asyncio task for the cuck of it. - n.start_soon( - tractor.to_asyncio.run_task, - sleep_forever, - ) - await trio.sleep_forever() + # TODO, factor this into a `trionics.collapse()`? + except* BaseException as beg: + # await tractor.pause(shield=True) + if len(excs := beg.exceptions) == 1: + raise excs[0] + else: + raise @pytest.mark.parametrize( @@ -206,8 +267,10 @@ async def trio_ctx( ids='parent_actor_cancels_child={}'.format ) def test_context_spawns_aio_task_that_errors( - reg_addr, + reg_addr: tuple[str, int], + delay: int, parent_cancels: bool, + debug_mode: bool, ): ''' Verify that spawning a task via an intertask channel ctx mngr that @@ -216,14 +279,13 @@ def test_context_spawns_aio_task_that_errors( ''' async def main(): - - with trio.fail_after(2): - async with tractor.open_nursery() as n: - p = await n.start_actor( + with trio.fail_after(1 + delay): + async with tractor.open_nursery() as an: + p = await an.start_actor( 'aio_daemon', enable_modules=[__name__], infect_asyncio=True, - # debug_mode=True, + debug_mode=debug_mode, loglevel='cancel', ) async with ( @@ -272,7 +334,7 @@ def test_context_spawns_aio_task_that_errors( err = excinfo.value assert isinstance(err, expect) - assert err.boxed_type == AssertionError + assert err.boxed_type is AssertionError async def aio_cancel(): @@ -281,23 +343,38 @@ async def aio_cancel(): ''' await asyncio.sleep(0.5) - task = asyncio.current_task() # cancel and enter sleep + task = asyncio.current_task() task.cancel() - await sleep_forever() + await aio_sleep_forever() -def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr): +def test_aio_cancelled_from_aio_causes_trio_cancelled( + reg_addr: tuple, + delay: int, +): + ''' + When the `asyncio.Task` cancels itself the `trio` side should + also cancel and teardown and relay the cancellation cross-process + to the parent caller. + ''' async def main(): - async with tractor.open_nursery() as n: - await n.run_in_actor( + + an: tractor.ActorNursery + async with tractor.open_nursery() as an: + p: tractor.Portal = await an.run_in_actor( asyncio_actor, target='aio_cancel', expect_err='tractor.to_asyncio.AsyncioCancelled', infect_asyncio=True, ) + # NOTE: normally the `an.__aexit__()` waits on the + # portal's result but we do it explicitly here + # to avoid indent levels. + with trio.fail_after(1 + delay): + await p.wait_for_result() with pytest.raises( expected_exception=(RemoteActorError, ExceptionGroup), @@ -305,15 +382,15 @@ def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr): trio.run(main) # might get multiple `trio.Cancelled`s as well inside an inception - err = excinfo.value + err: RemoteActorError|ExceptionGroup = excinfo.value if isinstance(err, ExceptionGroup): - err = next(itertools.dropwhile( - lambda exc: not isinstance(exc, tractor.RemoteActorError), - err.exceptions - )) - assert err + excs = err.exceptions + assert len(excs) == 1 + final_exc = excs[0] + assert isinstance(final_exc, tractor.RemoteActorError) - # ensure boxed error is correct + # relayed boxed error should be our `trio`-task's + # cancel-signal-proxy-equivalent of `asyncio.CancelledError`. assert err.boxed_type == to_asyncio.AsyncioCancelled @@ -323,15 +400,18 @@ async def no_to_trio_in_args(): async def push_from_aio_task( - sequence: Iterable, to_trio: trio.abc.SendChannel, expect_cancel: False, fail_early: bool, + exit_early: bool, ) -> None: try: + # print('trying breakpoint') + # breakpoint() + # sync caller ctx manager to_trio.send_nowait(True) @@ -340,10 +420,27 @@ async def push_from_aio_task( to_trio.send_nowait(i) await asyncio.sleep(0.001) - if i == 50 and fail_early: - raise Exception + if ( + i == 50 + ): + if fail_early: + print('Raising exc from aio side!') + raise Exception - print('asyncio streamer complete!') + if exit_early: + # TODO? really you could enforce the same + # SC-proto we use for actors here with asyncio + # such that a Return[None] msg would be + # implicitly delivered to the trio side? + # + # XXX => this might be the end-all soln for + # converting any-inter-task system (regardless + # of maybe-remote runtime or language) to be + # SC-compat no? + print(f'asyncio breaking early @ {i!r}') + break + + print('asyncio streaming complete!') except asyncio.CancelledError: if not expect_cancel: @@ -355,10 +452,10 @@ async def push_from_aio_task( async def stream_from_aio( - - exit_early: bool = False, - raise_err: bool = False, + trio_exit_early: bool = False, + trio_raise_err: bool = False, aio_raise_err: bool = False, + aio_exit_early: bool = False, fan_out: bool = False, ) -> None: @@ -371,8 +468,18 @@ async def stream_from_aio( async with to_asyncio.open_channel_from( push_from_aio_task, sequence=seq, - expect_cancel=raise_err or exit_early, + expect_cancel=trio_raise_err or trio_exit_early, fail_early=aio_raise_err, + exit_early=aio_exit_early, + + # such that we can test exit early cases + # for each side explicitly. + suppress_graceful_exits=(not( + aio_exit_early + or + trio_exit_early + )) + ) as (first, chan): assert first is True @@ -384,17 +491,28 @@ async def stream_from_aio( ], ): async for value in chan: - print(f'trio received {value}') + print(f'trio received: {value!r}') + + # XXX, debugging EoC not being handled correctly + # in `transate_aio_errors()`.. + # if value is None: + # await tractor.pause(shield=True) + pulled.append(value) if value == 50: - if raise_err: + if trio_raise_err: raise Exception - elif exit_early: + elif trio_exit_early: + print('`consume()` breaking early!\n') break + print('returning from `consume()`..\n') + + # run 2 tasks each pulling from + # the inter-task-channel with the 2nd + # using a fan-out `BroadcastReceiver`. if fan_out: - # start second task that get's the same stream value set. async with ( # NOTE: this has to come first to avoid @@ -402,19 +520,31 @@ async def stream_from_aio( # tasks are joined.. chan.subscribe() as br, - trio.open_nursery() as n, + trio.open_nursery() as tn, ): - n.start_soon(consume, br) + # start 2nd task that get's broadcast the same + # value set. + tn.start_soon(consume, br) await consume(chan) else: await consume(chan) + except BaseException as err: + import logging + log = logging.getLogger() + log.exception('aio-subactor errored!\n') + raise err + finally: - if ( - not raise_err and - not exit_early and - not aio_raise_err + if not ( + trio_raise_err + or + trio_exit_early + or + aio_raise_err + or + aio_exit_early ): if fan_out: # we get double the pulled values in the @@ -424,22 +554,27 @@ async def stream_from_aio( assert list(sorted(pulled)) == expect else: + # await tractor.pause() assert pulled == expect else: assert not fan_out assert pulled == expect[:51] - print('trio guest mode task completed!') + print('trio guest-mode task completed!') + assert chan._aio_task.done() @pytest.mark.parametrize( 'fan_out', [False, True], ids='fan_out_w_chan_subscribe={}'.format ) -def test_basic_interloop_channel_stream(reg_addr, fan_out): +def test_basic_interloop_channel_stream( + reg_addr: tuple[str, int], + fan_out: bool, +): async def main(): - async with tractor.open_nursery() as n: - portal = await n.run_in_actor( + async with tractor.open_nursery() as an: + portal = await an.run_in_actor( stream_from_aio, infect_asyncio=True, fan_out=fan_out, @@ -453,10 +588,10 @@ def test_basic_interloop_channel_stream(reg_addr, fan_out): # TODO: parametrize the above test and avoid the duplication here? def test_trio_error_cancels_intertask_chan(reg_addr): async def main(): - async with tractor.open_nursery() as n: - portal = await n.run_in_actor( + async with tractor.open_nursery() as an: + portal = await an.run_in_actor( stream_from_aio, - raise_err=True, + trio_raise_err=True, infect_asyncio=True, ) # should trigger remote actor error @@ -466,28 +601,119 @@ def test_trio_error_cancels_intertask_chan(reg_addr): trio.run(main) # ensure boxed error type - excinfo.value.boxed_type == Exception + excinfo.value.boxed_type is Exception -def test_trio_closes_early_and_channel_exits(reg_addr): +def test_trio_closes_early_causes_aio_checkpoint_raise( + reg_addr: tuple[str, int], + delay: int, + debug_mode: bool, +): + ''' + Check that if the `trio`-task "exits early and silently" (in this + case during `async for`-ing the inter-task-channel via + a `break`-from-loop), we raise `TrioTaskExited` on the + `asyncio`-side which also then bubbles up through the + `open_channel_from()` block indicating that the `asyncio.Task` + hit a ran another checkpoint despite the `trio.Task` exit. + + ''' async def main(): - async with tractor.open_nursery() as n: - portal = await n.run_in_actor( - stream_from_aio, - exit_early=True, - infect_asyncio=True, - ) - # should raise RAE diectly - await portal.result() + with trio.fail_after(1 + delay): + async with tractor.open_nursery( + debug_mode=debug_mode, + # enable_stack_on_sig=True, + ) as an: + portal = await an.run_in_actor( + stream_from_aio, + trio_exit_early=True, + infect_asyncio=True, + ) + # should raise RAE diectly + print('waiting on final infected subactor result..') + res: None = await portal.wait_for_result() + assert res is None + print(f'infected subactor returned result: {res!r}\n') # should be a quiet exit on a simple channel exit - trio.run(main) + with pytest.raises(RemoteActorError) as excinfo: + trio.run(main) + + # ensure remote error is an explicit `AsyncioCancelled` sub-type + # which indicates to the aio task that the trio side exited + # silently WITHOUT raising a `trio.Cancelled` (which would + # normally be raised instead as a `AsyncioCancelled`). + excinfo.value.boxed_type is to_asyncio.TrioTaskExited -def test_aio_errors_and_channel_propagates_and_closes(reg_addr): +def test_aio_exits_early_relays_AsyncioTaskExited( + # TODO, parametrize the 3 possible trio side conditions: + # - trio blocking on receive, aio exits early + # - trio cancelled AND aio exits early on its next tick + # - trio errors AND aio exits early on its next tick + reg_addr: tuple[str, int], + debug_mode: bool, + delay: int, +): + ''' + Check that if the `asyncio`-task "exits early and silently" (in this + case during `push_from_aio_task()` pushing to the `InterLoopTaskChannel` + it `break`s from the loop), we raise `AsyncioTaskExited` on the + `trio`-side which then DOES NOT BUBBLE up through the + `open_channel_from()` block UNLESS, + + - the trio.Task also errored/cancelled, in which case we wrap + both errors in an eg + - the trio.Task was blocking on rxing a value from the + `InterLoopTaskChannel`. + + ''' async def main(): - async with tractor.open_nursery() as n: - portal = await n.run_in_actor( + with trio.fail_after(1 + delay): + async with tractor.open_nursery( + debug_mode=debug_mode, + # enable_stack_on_sig=True, + ) as an: + portal = await an.run_in_actor( + stream_from_aio, + infect_asyncio=True, + trio_exit_early=False, + aio_exit_early=True, + ) + # should raise RAE diectly + print('waiting on final infected subactor result..') + res: None = await portal.wait_for_result() + assert res is None + print(f'infected subactor returned result: {res!r}\n') + + # should be a quiet exit on a simple channel exit + with pytest.raises(RemoteActorError) as excinfo: + trio.run(main) + + exc = excinfo.value + + # TODO, wow bug! + # -[ ] bp handler not replaced!?!? + # breakpoint() + + # import pdbp; pdbp.set_trace() + + # ensure remote error is an explicit `AsyncioCancelled` sub-type + # which indicates to the aio task that the trio side exited + # silently WITHOUT raising a `trio.Cancelled` (which would + # normally be raised instead as a `AsyncioCancelled`). + assert exc.boxed_type is to_asyncio.AsyncioTaskExited + + +def test_aio_errors_and_channel_propagates_and_closes( + reg_addr: tuple[str, int], + debug_mode: bool, +): + async def main(): + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as an: + portal = await an.run_in_actor( stream_from_aio, aio_raise_err=True, infect_asyncio=True, @@ -502,44 +728,49 @@ def test_aio_errors_and_channel_propagates_and_closes(reg_addr): ) as excinfo: trio.run(main) - excinfo.value.boxed_type == Exception + excinfo.value.boxed_type is Exception + + +async def aio_echo_server( + to_trio: trio.MemorySendChannel, + from_trio: asyncio.Queue, +) -> None: + + to_trio.send_nowait('start') + + while True: + try: + msg = await from_trio.get() + except to_asyncio.TrioTaskExited: + print( + 'breaking aio echo loop due to `trio` exit!' + ) + break + + # echo the msg back + to_trio.send_nowait(msg) + + # if we get the terminate sentinel + # break the echo loop + if msg is None: + print('breaking aio echo loop') + break + + print('exiting asyncio task') @tractor.context async def trio_to_aio_echo_server( - ctx: tractor.Context, + ctx: tractor.Context|None, ): - - async def aio_echo_server( - to_trio: trio.MemorySendChannel, - from_trio: asyncio.Queue, - ) -> None: - - to_trio.send_nowait('start') - - while True: - msg = await from_trio.get() - - # echo the msg back - to_trio.send_nowait(msg) - - # if we get the terminate sentinel - # break the echo loop - if msg is None: - print('breaking aio echo loop') - break - - print('exiting asyncio task') - async with to_asyncio.open_channel_from( aio_echo_server, ) as (first, chan): - assert first == 'start' + await ctx.started(first) async with ctx.open_stream() as stream: - async for msg in stream: print(f'asyncio echoing {msg}') await chan.send(msg) @@ -563,13 +794,15 @@ async def trio_to_aio_echo_server( ids='raise_error={}'.format, ) def test_echoserver_detailed_mechanics( - reg_addr, + reg_addr: tuple[str, int], + debug_mode: bool, raise_error_mid_stream, ): - async def main(): - async with tractor.open_nursery() as n: - p = await n.start_actor( + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as an: + p = await an.start_actor( 'aio_server', enable_modules=[__name__], infect_asyncio=True, @@ -618,6 +851,243 @@ def test_echoserver_detailed_mechanics( trio.run(main) +@tractor.context +async def manage_file( + ctx: tractor.Context, + tmp_path_str: str, + send_sigint_to: str, + trio_side_is_shielded: bool = True, + bg_aio_task: bool = False, +): + ''' + Start an `asyncio` task that just sleeps after registering a context + with `Actor.lifetime_stack`. Trigger a SIGINT to kill the actor tree + and ensure the stack is closed in the infected mode child. + + To verify the teardown state just write a tmpfile to the `testdir` + and delete it on actor close. + + ''' + + tmp_path: Path = Path(tmp_path_str) + tmp_file: Path = tmp_path / f'{" ".join(ctx._actor.uid)}.file' + + # create a the tmp file and tell the parent where it's at + assert not tmp_file.is_file() + tmp_file.touch() + + stack: ExitStack = current_actor().lifetime_stack + stack.callback(tmp_file.unlink) + + await ctx.started(( + str(tmp_file), + os.getpid(), + )) + + # expect to be cancelled from here! + try: + + # NOTE: turns out you don't even need to sched an aio task + # since the original issue, even though seemingly was due to + # the guest-run being abandoned + a `._debug.pause()` inside + # `._runtime._async_main()` (which was originally trying to + # debug the `.lifetime_stack` not closing), IS NOT actually + # the core issue? + # + # further notes: + # + # - `trio` only issues the " RuntimeWarning: Trio guest run + # got abandoned without properly finishing... weird stuff + # might happen" IFF you DO run a asyncio task here, BUT + # - the original issue of the `.lifetime_stack` not closing + # will still happen even if you don't run an `asyncio` task + # here even though the "abandon" messgage won't be shown.. + # + # => ????? honestly i'm lost but it seems to be some issue + # with `asyncio` and SIGINT.. + # + # honestly, this REALLY reminds me why i haven't used + # `asyncio` by choice in years.. XD + # + async with trio.open_nursery() as tn: + if bg_aio_task: + tn.start_soon( + tractor.to_asyncio.run_task, + aio_sleep_forever, + ) + + # XXX don't-need/doesn't-make-a-diff right + # since we're already doing it from parent? + # if send_sigint_to == 'child': + # os.kill( + # os.getpid(), + # signal.SIGINT, + # ) + + # XXX spend a half sec doing shielded checkpointing to + # ensure that despite the `trio`-side task ignoring the + # SIGINT, the `asyncio` side won't abandon the guest-run! + if trio_side_is_shielded: + with trio.CancelScope(shield=True): + for i in range(5): + await trio.sleep(0.1) + + await trio.sleep_forever() + + # signalled manually at the OS level (aka KBI) by the parent actor. + except KeyboardInterrupt: + print('child raised KBI..') + assert tmp_file.exists() + raise + + raise RuntimeError('shoulda received a KBI?') + + +@pytest.mark.parametrize( + 'trio_side_is_shielded', + [ + False, + True, + ], + ids=[ + 'trio_side_no_shielding', + 'trio_side_does_shielded_work', + ], +) +@pytest.mark.parametrize( + 'send_sigint_to', + [ + 'child', + 'parent', + ], + ids='send_SIGINT_to={}'.format, +) +@pytest.mark.parametrize( + 'bg_aio_task', + [ + False, + + # NOTE: (and see notes in `manage_file()` above as well) if + # we FOR SURE SPAWN AN AIO TASK in the child it seems the + # "silent-abandon" case (as is described in detail in + # `to_asyncio.run_as_asyncio_guest()`) does not happen and + # `asyncio`'s loop will at least abandon the `trio` side + # loudly? .. prolly the state-spot to start looking for + # a soln that results in NO ABANDONMENT.. XD + True, + ], + ids=[ + 'bg_aio_task', + 'just_trio_slee', + ], +) +@pytest.mark.parametrize( + 'wait_for_ctx', + [ + False, + True, + ], + ids=[ + 'raise_KBI_in_rent', + 'wait_for_ctx', + ], +) +def test_sigint_closes_lifetime_stack( + tmp_path: Path, + wait_for_ctx: bool, + bg_aio_task: bool, + trio_side_is_shielded: bool, + debug_mode: bool, + send_sigint_to: str, +): + ''' + Ensure that an infected child can use the `Actor.lifetime_stack` + to make a file on boot and it's automatically cleaned up by the + actor-lifetime-linked exit stack closure. + + ''' + async def main(): + + delay = 999 if tractor.debug_mode() else 1 + try: + an: tractor.ActorNursery + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as an: + p: tractor.Portal = await an.start_actor( + 'file_mngr', + enable_modules=[__name__], + infect_asyncio=True, + ) + async with p.open_context( + manage_file, + tmp_path_str=str(tmp_path), + send_sigint_to=send_sigint_to, + bg_aio_task=bg_aio_task, + trio_side_is_shielded=trio_side_is_shielded, + ) as (ctx, first): + + path_str, cpid = first + tmp_file: Path = Path(path_str) + assert tmp_file.exists() + + # XXX originally to simulate what (hopefully) + # the below now triggers.. had to manually + # trigger a SIGINT from a ctl-c in the root. + # await trio.sleep_forever() + + # XXX NOTE XXX signal infected-`asyncio` child to + # OS-cancel with SIGINT; this should trigger the + # bad `asyncio` cancel behaviour that can cause + # a guest-run abandon as was seen causing + # shm-buffer leaks in `piker`'s live quote stream + # susbys! + # + await trio.sleep(.2) + pid: int = ( + cpid if send_sigint_to == 'child' + else os.getpid() + ) + os.kill( + pid, + signal.SIGINT, + ) + + # XXX CASE 1: without the bug fixed, in + # the non-KBI-raised-in-parent case, this + # timeout should trigger! + if wait_for_ctx: + print('waiting for ctx outcome in parent..') + try: + with trio.fail_after(1 + delay): + await ctx.wait_for_result() + except tractor.ContextCancelled as ctxc: + assert ctxc.canceller == ctx.chan.uid + raise + + # XXX CASE 2: this seems to be the source of the + # original issue which exhibited BEFORE we put + # a `Actor.cancel_soon()` inside + # `run_as_asyncio_guest()`.. + else: + raise KeyboardInterrupt + + pytest.fail('should have raised some kinda error?!?') + + except ( + KeyboardInterrupt, + ContextCancelled, + ): + # XXX CASE 2: without the bug fixed, in the + # KBI-raised-in-parent case, the actor teardown should + # never get run (silently abaondoned by `asyncio`..) and + # thus the file should leak! + assert not tmp_file.exists() + assert ctx.maybe_error + + trio.run(main) + + # TODO: debug_mode tests once we get support for `asyncio`! # # -[ ] need tests to wrap both scripts: diff --git a/tests/test_inter_peer_cancellation.py b/tests/test_inter_peer_cancellation.py index 7bf9a2bd..bac9a791 100644 --- a/tests/test_inter_peer_cancellation.py +++ b/tests/test_inter_peer_cancellation.py @@ -170,7 +170,7 @@ def test_do_not_swallow_error_before_started_by_remote_contextcancelled( trio.run(main) rae = excinfo.value - assert rae.boxed_type == TypeError + assert rae.boxed_type is TypeError @tractor.context diff --git a/tests/test_root_infect_asyncio.py b/tests/test_root_infect_asyncio.py new file mode 100644 index 00000000..93deba13 --- /dev/null +++ b/tests/test_root_infect_asyncio.py @@ -0,0 +1,248 @@ +''' +Special attention cases for using "infect `asyncio`" mode from a root +actor; i.e. not using a std `trio.run()` bootstrap. + +''' +import asyncio +from functools import partial + +import pytest +import trio +import tractor +from tractor import ( + to_asyncio, +) +from tests.test_infected_asyncio import ( + aio_echo_server, +) + + +@pytest.mark.parametrize( + 'raise_error_mid_stream', + [ + False, + Exception, + KeyboardInterrupt, + ], + ids='raise_error={}'.format, +) +def test_infected_root_actor( + raise_error_mid_stream: bool|Exception, + + # conftest wide + loglevel: str, + debug_mode: bool, +): + ''' + Verify you can run the `tractor` runtime with `Actor.is_infected_aio() == True` + in the root actor. + + ''' + async def _trio_main(): + with trio.fail_after(2 if not debug_mode else 999): + first: str + chan: to_asyncio.LinkedTaskChannel + async with ( + tractor.open_root_actor( + debug_mode=debug_mode, + loglevel=loglevel, + ), + to_asyncio.open_channel_from( + aio_echo_server, + ) as (first, chan), + ): + assert first == 'start' + + for i in range(1000): + await chan.send(i) + out = await chan.receive() + assert out == i + print(f'asyncio echoing {i}') + + if ( + raise_error_mid_stream + and + i == 500 + ): + raise raise_error_mid_stream + + if out is None: + try: + out = await chan.receive() + except trio.EndOfChannel: + break + else: + raise RuntimeError( + 'aio channel never stopped?' + ) + + if raise_error_mid_stream: + with pytest.raises(raise_error_mid_stream): + tractor.to_asyncio.run_as_asyncio_guest( + trio_main=_trio_main, + ) + else: + tractor.to_asyncio.run_as_asyncio_guest( + trio_main=_trio_main, + ) + + + +async def sync_and_err( + # just signature placeholders for compat with + # ``to_asyncio.open_channel_from()`` + to_trio: trio.MemorySendChannel, + from_trio: asyncio.Queue, + ev: asyncio.Event, + +): + if to_trio: + to_trio.send_nowait('start') + + await ev.wait() + raise RuntimeError('asyncio-side') + + +@pytest.mark.parametrize( + 'aio_err_trigger', + [ + 'before_start_point', + 'after_trio_task_starts', + 'after_start_point', + ], + ids='aio_err_triggered={}'.format +) +def test_trio_prestarted_task_bubbles( + aio_err_trigger: str, + + # conftest wide + loglevel: str, + debug_mode: bool, +): + async def pre_started_err( + raise_err: bool = False, + pre_sleep: float|None = None, + aio_trigger: asyncio.Event|None = None, + task_status=trio.TASK_STATUS_IGNORED, + ): + ''' + Maybe pre-started error then sleep. + + ''' + if pre_sleep is not None: + print(f'Sleeping from trio for {pre_sleep!r}s !') + await trio.sleep(pre_sleep) + + # signal aio-task to raise JUST AFTER this task + # starts but has not yet `.started()` + if aio_trigger: + print('Signalling aio-task to raise from `trio`!!') + aio_trigger.set() + + if raise_err: + print('Raising from trio!') + raise TypeError('trio-side') + + task_status.started() + await trio.sleep_forever() + + async def _trio_main(): + # with trio.fail_after(2): + with trio.fail_after(999): + first: str + chan: to_asyncio.LinkedTaskChannel + aio_ev = asyncio.Event() + + async with ( + tractor.open_root_actor( + debug_mode=False, + loglevel=loglevel, + ), + ): + # TODO, tests for this with 3.13 egs? + # from tractor.devx import open_crash_handler + # with open_crash_handler(): + async with ( + # where we'll start a sub-task that errors BEFORE + # calling `.started()` such that the error should + # bubble before the guest run terminates! + trio.open_nursery() as tn, + + # THEN start an infect task which should error just + # after the trio-side's task does. + to_asyncio.open_channel_from( + partial( + sync_and_err, + ev=aio_ev, + ) + ) as (first, chan), + ): + + for i in range(5): + pre_sleep: float|None = None + last_iter: bool = (i == 4) + + # TODO, missing cases? + # -[ ] error as well on + # 'after_start_point' case as well for + # another case? + raise_err: bool = False + + if last_iter: + raise_err: bool = True + + # trigger aio task to error on next loop + # tick/checkpoint + if aio_err_trigger == 'before_start_point': + aio_ev.set() + + pre_sleep: float = 0 + + await tn.start( + pre_started_err, + raise_err, + pre_sleep, + (aio_ev if ( + aio_err_trigger == 'after_trio_task_starts' + and + last_iter + ) else None + ), + ) + + if ( + aio_err_trigger == 'after_start_point' + and + last_iter + ): + aio_ev.set() + + with pytest.raises( + expected_exception=ExceptionGroup, + ) as excinfo: + tractor.to_asyncio.run_as_asyncio_guest( + trio_main=_trio_main, + ) + + eg = excinfo.value + rte_eg, rest_eg = eg.split(RuntimeError) + + # ensure the trio-task's error bubbled despite the aio-side + # having (maybe) errored first. + if aio_err_trigger in ( + 'after_trio_task_starts', + 'after_start_point', + ): + assert len(errs := rest_eg.exceptions) == 1 + typerr = errs[0] + assert ( + type(typerr) is TypeError + and + 'trio-side' in typerr.args + ) + + # when aio errors BEFORE (last) trio task is scheduled, we should + # never see anythinb but the aio-side. + else: + assert len(rtes := rte_eg.exceptions) == 1 + assert 'asyncio-side' in rtes[0].args[0] diff --git a/tests/test_task_broadcasting.py b/tests/test_task_broadcasting.py index d7a29134..4a2209eb 100644 --- a/tests/test_task_broadcasting.py +++ b/tests/test_task_broadcasting.py @@ -271,7 +271,7 @@ def test_faster_task_to_recv_is_cancelled_by_slower( # the faster subtask was cancelled break - # await tractor.breakpoint() + # await tractor.pause() # await stream.receive() print(f'final value: {value}') diff --git a/tests/test_trioisms.py b/tests/test_trioisms.py index 27dc6c34..fad99f11 100644 --- a/tests/test_trioisms.py +++ b/tests/test_trioisms.py @@ -3,6 +3,10 @@ Reminders for oddities in `trio` that we need to stay aware of and/or want to see changed. ''' +from contextlib import ( + asynccontextmanager as acm, +) + import pytest import trio from trio import TaskStatus @@ -80,3 +84,115 @@ def test_stashed_child_nursery(use_start_soon): with pytest.raises(NameError): trio.run(main) + + +@pytest.mark.parametrize( + ('unmask_from_canc', 'canc_from_finally'), + [ + (True, False), + (True, True), + pytest.param(False, True, + marks=pytest.mark.xfail(reason="never raises!") + ), + ], + # TODO, ask ronny how to impl this .. XD + # ids='unmask_from_canc={0}, canc_from_finally={1}',#.format, +) +def test_acm_embedded_nursery_propagates_enter_err( + canc_from_finally: bool, + unmask_from_canc: bool, +): + ''' + Demo how a masking `trio.Cancelled` could be handled by unmasking from the + `.__context__` field when a user (by accident) re-raises from a `finally:`. + + ''' + import tractor + + @acm + async def maybe_raise_from_masking_exc( + tn: trio.Nursery, + unmask_from: BaseException|None = trio.Cancelled + + # TODO, maybe offer a collection? + # unmask_from: set[BaseException] = { + # trio.Cancelled, + # }, + ): + if not unmask_from: + yield + return + + try: + yield + except* unmask_from as be_eg: + + # TODO, if we offer `unmask_from: set` + # for masker_exc_type in unmask_from: + + matches, rest = be_eg.split(unmask_from) + if not matches: + raise + + for exc_match in be_eg.exceptions: + if ( + (exc_ctx := exc_match.__context__) + and + type(exc_ctx) not in { + # trio.Cancelled, # always by default? + unmask_from, + } + ): + exc_ctx.add_note( + f'\n' + f'WARNING: the above error was masked by a {unmask_from!r} !?!\n' + f'Are you always cancelling? Say from a `finally:` ?\n\n' + + f'{tn!r}' + ) + raise exc_ctx from exc_match + + + @acm + async def wraps_tn_that_always_cancels(): + async with ( + trio.open_nursery() as tn, + maybe_raise_from_masking_exc( + tn=tn, + unmask_from=( + trio.Cancelled + if unmask_from_canc + else None + ), + ) + ): + try: + yield tn + finally: + if canc_from_finally: + tn.cancel_scope.cancel() + await trio.lowlevel.checkpoint() + + async def _main(): + with tractor.devx.open_crash_handler() as bxerr: + assert not bxerr.value + + async with ( + wraps_tn_that_always_cancels() as tn, + ): + assert not tn.cancel_scope.cancel_called + assert 0 + + assert ( + (err := bxerr.value) + and + type(err) is AssertionError + ) + + with pytest.raises(ExceptionGroup) as excinfo: + trio.run(_main) + + eg: ExceptionGroup = excinfo.value + assert_eg, rest_eg = eg.split(AssertionError) + + assert len(assert_eg.exceptions) == 1 diff --git a/tractor/_context.py b/tractor/_context.py index 31db2bad..d4cad88e 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -1703,15 +1703,28 @@ class Context: # TODO: expose as mod func instead! structfmt = pretty_struct.Struct.pformat if self._in_overrun: - log.warning( - f'Queueing OVERRUN msg on caller task:\n\n' - + report: str = ( f'{flow_body}' - f'{structfmt(msg)}\n' ) + over_q: deque = self._overflow_q self._overflow_q.append(msg) + if len(over_q) == over_q.maxlen: + report = ( + 'FAILED to queue OVERRUN msg, OVERAN the OVERRUN QUEUE !!\n\n' + + report + ) + # log.error(report) + log.debug(report) + + else: + report = ( + 'Queueing OVERRUN msg on caller task:\n\n' + + report + ) + log.debug(report) + # XXX NOTE XXX # overrun is the ONLY case where returning early is fine! return False diff --git a/tractor/_entry.py b/tractor/_entry.py index a072706c..19dcb9f6 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -20,6 +20,7 @@ Sub-process entry points. """ from __future__ import annotations from functools import partial +import multiprocessing as mp import os import textwrap from typing import ( @@ -64,20 +65,22 @@ def _mp_main( ''' actor._forkserver_info = forkserver_info from ._spawn import try_set_start_method - spawn_ctx = try_set_start_method(start_method) + spawn_ctx: mp.context.BaseContext = try_set_start_method(start_method) + assert spawn_ctx if actor.loglevel is not None: log.info( - f"Setting loglevel for {actor.uid} to {actor.loglevel}") + f'Setting loglevel for {actor.uid} to {actor.loglevel}' + ) get_console_log(actor.loglevel) - assert spawn_ctx + # TODO: use scops headers like for `trio` below! + # (well after we libify it maybe..) log.info( - f"Started new {spawn_ctx.current_process()} for {actor.uid}") - - _state._current_actor = actor - - log.debug(f"parent_addr is {parent_addr}") + f'Started new {spawn_ctx.current_process()} for {actor.uid}' + # f"parent_addr is {parent_addr}" + ) + _state._current_actor: Actor = actor trio_main = partial( async_main, actor=actor, @@ -94,7 +97,9 @@ def _mp_main( pass # handle it the same way trio does? finally: - log.info(f"Subactor {actor.uid} terminated") + log.info( + f'`mp`-subactor {actor.uid} exited' + ) # TODO: move this func to some kinda `.devx._conc_lang.py` eventually diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 108134ca..f90df5fe 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -82,6 +82,48 @@ class InternalError(RuntimeError): ''' +class AsyncioCancelled(Exception): + ''' + Asyncio cancelled translation (non-base) error + for use with the ``to_asyncio`` module + to be raised in the ``trio`` side task + + NOTE: this should NOT inherit from `asyncio.CancelledError` or + tests should break! + + ''' + + +class AsyncioTaskExited(Exception): + ''' + asyncio.Task "exited" translation error for use with the + `to_asyncio` APIs to be raised in the `trio` side task indicating + on `.run_task()`/`.open_channel_from()` exit that the aio side + exited early/silently. + + ''' + +class TrioCancelled(Exception): + ''' + Trio cancelled translation (non-base) error + for use with the `to_asyncio` module + to be raised in the `asyncio.Task` to indicate + that the `trio` side raised `Cancelled` or an error. + + ''' + +class TrioTaskExited(Exception): + ''' + The `trio`-side task exited without explicitly cancelling the + `asyncio.Task` peer. + + This is very similar to how `trio.ClosedResource` acts as + a "clean shutdown" signal to the consumer side of a mem-chan, + + https://trio.readthedocs.io/en/stable/reference-core.html#clean-shutdown-with-channels + + ''' + # NOTE: more or less should be close to these: # 'boxed_type', @@ -127,8 +169,8 @@ _body_fields: list[str] = list( def get_err_type(type_name: str) -> BaseException|None: ''' - Look up an exception type by name from the set of locally - known namespaces: + Look up an exception type by name from the set of locally known + namespaces: - `builtins` - `tractor._exceptions` @@ -358,6 +400,13 @@ class RemoteActorError(Exception): self._ipc_msg.src_type_str ) + if not self._src_type: + raise TypeError( + f'Failed to lookup src error type with ' + f'`tractor._exceptions.get_err_type()` :\n' + f'{self.src_type_str}' + ) + return self._src_type @property @@ -366,6 +415,9 @@ class RemoteActorError(Exception): String-name of the (last hop's) boxed error type. ''' + # TODO, maybe support also serializing the + # `ExceptionGroup.exeptions: list[BaseException]` set under + # certain conditions? bt: Type[BaseException] = self.boxed_type if bt: return str(bt.__name__) @@ -609,6 +661,7 @@ class RemoteActorError(Exception): # just after bool: + exc: BaseException|BaseExceptionGroup, + + ignore_nested: set[BaseException] = set(), + +) -> bool|BaseExceptionGroup: ''' - Predicate to determine if a possible ``BaseExceptionGroup`` contains - only ``trio.Cancelled`` sub-exceptions (and is likely the result of - cancelling a collection of subtasks. + Predicate to determine if an `BaseExceptionGroup` only contains + some (maybe nested) set of sub-grouped exceptions (like only + `trio.Cancelled`s which get swallowed silently by default) and is + thus the result of "gracefully cancelling" a collection of + sub-tasks (or other conc primitives) and receiving a "cancelled + ACK" from each after termination. + + Docs: + ---- + - https://docs.python.org/3/library/exceptions.html#exception-groups + - https://docs.python.org/3/library/exceptions.html#BaseExceptionGroup.subgroup ''' + + if ( + not ignore_nested + or + trio.Cancelled in ignore_nested + # XXX always count-in `trio`'s native signal + ): + ignore_nested.update({trio.Cancelled}) + if isinstance(exc, BaseExceptionGroup): - return exc.subgroup( - lambda exc: isinstance(exc, trio.Cancelled) - ) is not None + matched_exc: BaseExceptionGroup|None = exc.subgroup( + tuple(ignore_nested), + # TODO, complain about why not allowed XD + # condition=tuple(ignore_nested), + ) + if matched_exc is not None: + return matched_exc + + # NOTE, IFF no excs types match (throughout the error-tree) + # -> return `False`, OW return the matched sub-eg. + # + # IOW, for the inverse of ^ for the purpose of + # maybe-enter-REPL--logic: "only debug when the err-tree contains + # at least one exc-type NOT in `ignore_nested`" ; i.e. the case where + # we fallthrough and return `False` here. return False @@ -1375,7 +1447,9 @@ def _mk_recv_mte( any_pld: Any = msgpack.decode(msg.pld) message: str = ( f'invalid `{msg_type.__qualname__}` msg payload\n\n' - f'value: `{any_pld!r}` does not match type-spec: ' + f'{any_pld!r}\n\n' + f'has type {type(any_pld)!r}\n\n' + f'and does not match type-spec ' f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`' ) bad_msg = msg diff --git a/tractor/_root.py b/tractor/_root.py index 882285a5..e10b02ef 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -80,7 +80,7 @@ async def open_root_actor( # enables the multi-process debugger support debug_mode: bool = False, - maybe_enable_greenback: bool = False, # `.pause_from_sync()/breakpoint()` support + maybe_enable_greenback: bool = True, # `.pause_from_sync()/breakpoint()` support enable_stack_on_sig: bool = False, # internal logging @@ -95,6 +95,17 @@ async def open_root_actor( hide_tb: bool = True, + # XXX, proxied directly to `.devx._debug._maybe_enter_pm()` + # for REPL-entry logic. + debug_filter: Callable[ + [BaseException|BaseExceptionGroup], + bool, + ] = lambda err: not is_multi_cancelled(err), + + # TODO, a way for actors to augment passing derived + # read-only state to sublayers? + # extra_rt_vars: dict|None = None, + ) -> Actor: ''' Runtime init entry point for ``tractor``. @@ -233,14 +244,8 @@ async def open_root_actor( and enable_stack_on_sig ): - try: - logger.info('Enabling `stackscope` traces on SIGUSR1') - from .devx import enable_stack_on_sig - enable_stack_on_sig() - except ImportError: - logger.warning( - '`stackscope` not installed for use in debug mode!' - ) + from .devx._stackscope import enable_stack_on_sig + enable_stack_on_sig() # closed into below ping task-func ponged_addrs: list[tuple[str, int]] = [] @@ -336,6 +341,10 @@ async def open_root_actor( loglevel=loglevel, enable_modules=enable_modules, ) + # XXX, in case the root actor runtime was actually run from + # `tractor.to_asyncio.run_as_asyncio_guest()` and NOt + # `.trio.run()`. + actor._infected_aio = _state._runtime_vars['_is_infected_aio'] # Start up main task set via core actor-runtime nurseries. try: @@ -377,6 +386,7 @@ async def open_root_actor( Exception, BaseExceptionGroup, ) as err: + # XXX NOTE XXX see equiv note inside # `._runtime.Actor._stream_handler()` where in the # non-root or root-that-opened-this-mahually case we @@ -385,11 +395,15 @@ async def open_root_actor( entered: bool = await _debug._maybe_enter_pm( err, api_frame=inspect.currentframe(), + debug_filter=debug_filter, ) + if ( not entered and - not is_multi_cancelled(err) + not is_multi_cancelled( + err, + ) ): logger.exception('Root actor crashed\n') diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 662dd67a..7a00d613 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -59,6 +59,7 @@ from types import ModuleType import warnings import trio +from trio._core import _run as trio_runtime from trio import ( CancelScope, Nursery, @@ -80,6 +81,7 @@ from ._context import ( from .log import get_logger from ._exceptions import ( ContextCancelled, + InternalError, ModuleNotExposed, MsgTypeError, unpack_error, @@ -98,6 +100,7 @@ from ._rpc import ( if TYPE_CHECKING: from ._supervise import ActorNursery + from trio._channel import MemoryChannelState log = get_logger('tractor') @@ -896,11 +899,15 @@ class Actor: f'peer: {chan.uid}\n' f'cid:{cid}\n' ) - ctx._allow_overruns = allow_overruns + ctx._allow_overruns: bool = allow_overruns # adjust buffer size if specified - state = ctx._send_chan._state # type: ignore - if msg_buffer_size and state.max_buffer_size != msg_buffer_size: + state: MemoryChannelState = ctx._send_chan._state # type: ignore + if ( + msg_buffer_size + and + state.max_buffer_size != msg_buffer_size + ): state.max_buffer_size = msg_buffer_size except KeyError: @@ -1094,7 +1101,36 @@ class Actor: '`tractor.pause_from_sync()` not available!' ) - rvs['_is_root'] = False + # XXX ensure the "infected `asyncio` mode" setting + # passed down from our spawning parent is consistent + # with `trio`-runtime initialization: + # - during sub-proc boot, the entrypoint func + # (`._entry._main()`) should set + # `._infected_aio = True` before calling + # `run_as_asyncio_guest()`, + # - the value of `infect_asyncio: bool = True` as + # passed to `ActorNursery.start_actor()` must be + # the same as `_runtime_vars['_is_infected_aio']` + if ( + (aio_rtv := rvs['_is_infected_aio']) + != + (aio_attr := self._infected_aio) + ): + raise InternalError( + 'Parent sent runtime-vars that mismatch for the ' + '"infected `asyncio` mode" settings ?!?\n\n' + + f'rvs["_is_infected_aio"] = {aio_rtv}\n' + f'self._infected_aio = {aio_attr}\n' + ) + if aio_rtv: + assert trio_runtime.GLOBAL_RUN_CONTEXT.runner.is_guest + # ^TODO^ possibly add a `sniffio` or + # `trio` pub-API for `is_guest_mode()`? + + rvs['_is_root'] = False # obvi XD + + # update process-wide globals _state._runtime_vars.update(rvs) # XXX: ``msgspec`` doesn't support serializing tuples diff --git a/tractor/_state.py b/tractor/_state.py index 9f896005..a87ad36b 100644 --- a/tractor/_state.py +++ b/tractor/_state.py @@ -44,6 +44,8 @@ _runtime_vars: dict[str, Any] = { '_root_mailbox': (None, None), '_registry_addrs': [], + '_is_infected_aio': False, + # for `tractor.pause_from_sync()` & `breakpoint()` support 'use_greenback': False, } @@ -70,7 +72,8 @@ def current_actor( ''' if ( err_on_no_runtime - and _current_actor is None + and + _current_actor is None ): msg: str = 'No local actor has been initialized yet?\n' from ._exceptions import NoRuntime diff --git a/tractor/_supervise.py b/tractor/_supervise.py index 4a538e9f..de268078 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -158,6 +158,7 @@ class ActorNursery: # configure and pass runtime state _rtv = _state._runtime_vars.copy() _rtv['_is_root'] = False + _rtv['_is_infected_aio'] = infect_asyncio # allow setting debug policy per actor if debug_mode is not None: diff --git a/tractor/_testing/__init__.py b/tractor/_testing/__init__.py index fd79fe20..1f6624e9 100644 --- a/tractor/_testing/__init__.py +++ b/tractor/_testing/__init__.py @@ -54,6 +54,25 @@ def examples_dir() -> pathlib.Path: return repodir() / 'examples' +def mk_cmd( + ex_name: str, + exs_subpath: str = 'debugging', +) -> str: + ''' + Generate a shell command suitable to pass to ``pexpect.spawn()``. + + ''' + script_path: pathlib.Path = ( + examples_dir() + / exs_subpath + / f'{ex_name}.py' + ) + return ' '.join([ + 'python', + str(script_path) + ]) + + @acm async def expect_ctxc( yay: bool, diff --git a/tractor/devx/__init__.py b/tractor/devx/__init__.py index cfcff931..7047dbdb 100644 --- a/tractor/devx/__init__.py +++ b/tractor/devx/__init__.py @@ -26,7 +26,7 @@ from ._debug import ( breakpoint as breakpoint, pause as pause, pause_from_sync as pause_from_sync, - shield_sigint_handler as shield_sigint_handler, + sigint_shield as sigint_shield, open_crash_handler as open_crash_handler, maybe_open_crash_handler as maybe_open_crash_handler, maybe_init_greenback as maybe_init_greenback, diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 1135932c..04df000f 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -20,6 +20,7 @@ Multi-core debugging for da peeps! """ from __future__ import annotations +import asyncio import bdb from contextlib import ( asynccontextmanager as acm, @@ -67,9 +68,15 @@ from trio import ( TaskStatus, ) import tractor +from tractor.to_asyncio import run_trio_task_in_future from tractor.log import get_logger from tractor._context import Context from tractor import _state +from tractor._exceptions import ( + InternalError, + NoRuntime, + is_multi_cancelled, +) from tractor._state import ( current_actor, is_root_process, @@ -296,10 +303,9 @@ class Lock: ) @classmethod - @pdbp.hideframe + # @pdbp.hideframe def release( cls, - force: bool = False, raise_on_thread: bool = True, ) -> bool: @@ -311,48 +317,47 @@ class Lock: we_released: bool = False ctx_in_debug: Context|None = cls.ctx_in_debug repl_task: Task|Thread|None = DebugStatus.repl_task - if not DebugStatus.is_main_trio_thread(): - thread: threading.Thread = threading.current_thread() - message: str = ( - '`Lock.release()` can not be called from a non-main-`trio` thread!\n' - f'{thread}\n' - ) - if raise_on_thread: - raise RuntimeError(message) - - log.devx(message) - return False - - task: Task = current_task() - - # sanity check that if we're the root actor - # the lock is marked as such. - # note the pre-release value may be diff the the - # post-release task. - if repl_task is task: - assert cls._owned_by_root - message: str = ( - 'TTY lock held by root-actor on behalf of local task\n' - f'|_{repl_task}\n' - ) - else: - assert DebugStatus.repl_task is not task - - message: str = ( - 'TTY lock was NOT released on behalf of caller\n' - f'|_{task}\n' - ) + message: str = '' try: + if not DebugStatus.is_main_trio_thread(): + thread: threading.Thread = threading.current_thread() + message: str = ( + '`Lock.release()` can not be called from a non-main-`trio` thread!\n' + f'{thread}\n' + ) + if raise_on_thread: + raise RuntimeError(message) + + log.devx(message) + return False + + task: Task = current_task() + + # sanity check that if we're the root actor + # the lock is marked as such. + # note the pre-release value may be diff the the + # post-release task. + if repl_task is task: + assert cls._owned_by_root + message: str = ( + 'TTY lock held by root-actor on behalf of local task\n' + f'|_{repl_task}\n' + ) + else: + assert DebugStatus.repl_task is not task + + message: str = ( + 'TTY lock was NOT released on behalf of caller\n' + f'|_{task}\n' + ) + lock: trio.StrictFIFOLock = cls._debug_lock owner: Task = lock.statistics().owner if ( - (lock.locked() or force) - # ^-TODO-NOTE-^ should we just remove this, since the - # RTE case above will always happen when you force - # from the wrong task? - - and (owner is task) + lock.locked() + and + (owner is task) # ^-NOTE-^ if we do NOT ensure this, `trio` will # raise a RTE when a non-owner tries to releasee the # lock. @@ -413,9 +418,9 @@ class Lock: repl_task ) message += ( - f'\nA non-caller task still owns this lock on behalf of ' - f'{behalf_of_task}\n' - f'|_{lock_stats.owner}\n' + f'A non-caller task still owns this lock on behalf of ' + f'`{behalf_of_task}`\n' + f'lock owner task: {lock_stats.owner}\n' ) if ( @@ -440,7 +445,10 @@ class Lock: f'|_{repl_task}\n' ) - log.devx(message) + if message: + log.devx(message) + else: + import pdbp; pdbp.set_trace() return we_released @@ -527,6 +535,10 @@ class Lock: ) +def get_lock() -> Lock: + return Lock + + @tractor.context( # enable the locking msgspec pld_spec=__pld_spec__, @@ -553,6 +565,7 @@ async def lock_stdio_for_peer( # can try to avoid clobbering any connection from a child # that's currently relying on it. we_finished = Lock.req_handler_finished = trio.Event() + lock_blocked: bool = False try: if ctx.cid in Lock._blocked: raise RuntimeError( @@ -565,7 +578,8 @@ async def lock_stdio_for_peer( 'Consider that an internal bug exists given the TTY ' '`Lock`ing IPC dialog..\n' ) - + Lock._blocked.add(ctx.cid) + lock_blocked = True root_task_name: str = current_task().name if tuple(subactor_uid) in Lock._blocked: log.warning( @@ -575,7 +589,11 @@ async def lock_stdio_for_peer( ) ctx._enter_debugger_on_cancel: bool = False message: str = ( - f'Debug lock blocked for {subactor_uid}\n' + f'Debug lock blocked for subactor\n\n' + f'x)<= {subactor_uid}\n\n' + + f'Likely because the root actor already started shutdown and is ' + 'closing IPC connections for this child!\n\n' 'Cancelling debug request!\n' ) log.cancel(message) @@ -589,7 +607,6 @@ async def lock_stdio_for_peer( f'remote task: {subactor_task_uid}\n' ) DebugStatus.shield_sigint() - Lock._blocked.add(ctx.cid) # NOTE: we use the IPC ctx's cancel scope directly in order to # ensure that on any transport failure, or cancellation request @@ -648,31 +665,34 @@ async def lock_stdio_for_peer( ) except BaseException as req_err: - message: str = ( - f'On behalf of remote peer {subactor_task_uid!r}@{ctx.chan.uid!r}\n\n' - 'Forcing `Lock.release()` for req-ctx since likely an ' - 'internal error!\n\n' - f'{ctx}' + fail_reason: str = ( + f'on behalf of peer\n\n' + f'x)<=\n' + f' |_{subactor_task_uid!r}@{ctx.chan.uid!r}\n\n' + + 'Forcing `Lock.release()` due to acquire failure!\n\n' + f'x)=> {ctx}\n' ) if isinstance(req_err, trio.Cancelled): - message = ( - 'Cancelled during root TTY-lock dialog\n' + fail_reason = ( + 'Cancelled during stdio-mutex request ' + - message + fail_reason ) else: - message = ( - 'Errored during root TTY-lock dialog\n' + fail_reason = ( + 'Failed to deliver stdio-mutex request ' + - message + fail_reason ) - log.exception(message) - Lock.release() #force=True) + log.exception(fail_reason) + Lock.release() raise finally: - Lock._blocked.remove(ctx.cid) + if lock_blocked: + Lock._blocked.remove(ctx.cid) # wakeup any waiters since the lock was (presumably) # released, possibly only temporarily. @@ -680,6 +700,14 @@ async def lock_stdio_for_peer( DebugStatus.unshield_sigint() +class DebugStateError(InternalError): + ''' + Something inconsistent or unexpected happend with a sub-actor's + debug mutex request to the root actor. + + ''' + + # TODO: rename to ReplState or somethin? # DebugRequest, make it a singleton instance? class DebugStatus: @@ -707,6 +735,9 @@ class DebugStatus: # -[ ] see if we can get our proto oco task-mngr to work for # this? repl_task: Task|None = None + # repl_thread: Thread|None = None + # ^TODO? + repl_release: trio.Event|None = None req_task: Task|None = None @@ -780,17 +811,24 @@ class DebugStatus: # in which case schedule the SIGINT shielding override # to in the main thread. # https://docs.python.org/3/library/signal.html#signals-and-threads - if not cls.is_main_trio_thread(): + if ( + not cls.is_main_trio_thread() + and + not _state._runtime_vars.get( + '_is_infected_aio', + False, + ) + ): cls._orig_sigint_handler: Callable = trio.from_thread.run_sync( signal.signal, signal.SIGINT, - shield_sigint_handler, + sigint_shield, ) else: cls._orig_sigint_handler = signal.signal( signal.SIGINT, - shield_sigint_handler, + sigint_shield, ) @classmethod @@ -805,7 +843,17 @@ class DebugStatus: # always restore ``trio``'s sigint handler. see notes below in # the pdb factory about the nightmare that is that code swapping # out the handler when the repl activates... - if not cls.is_main_trio_thread(): + # if not cls.is_main_trio_thread(): + if ( + not cls.is_main_trio_thread() + and + not _state._runtime_vars.get( + '_is_infected_aio', + False, + ) + # not current_actor().is_infected_aio() + # ^XXX, since for bg-thr case will always raise.. + ): trio.from_thread.run_sync( signal.signal, signal.SIGINT, @@ -833,20 +881,37 @@ class DebugStatus: `trio.to_thread.run_sync()`. ''' + try: + async_lib: str = sniffio.current_async_library() + except sniffio.AsyncLibraryNotFoundError: + async_lib = None + + is_main_thread: bool = trio._util.is_main_thread() + # ^TODO, since this is private, @oremanj says + # we should just copy the impl for now..? + if is_main_thread: + thread_name: str = 'main' + else: + thread_name: str = threading.current_thread().name + is_trio_main = ( - # TODO: since this is private, @oremanj says - # we should just copy the impl for now.. - (is_main_thread := trio._util.is_main_thread()) + is_main_thread and - (async_lib := sniffio.current_async_library()) == 'trio' + (async_lib == 'trio') ) - if ( - not is_trio_main - and is_main_thread - ): - log.warning( + + report: str = f'Running thread: {thread_name!r}\n' + if async_lib: + report += ( f'Current async-lib detected by `sniffio`: {async_lib}\n' ) + else: + report += ( + 'No async-lib detected (by `sniffio`) ??\n' + ) + if not is_trio_main: + log.warning(report) + return is_trio_main # XXX apparently unreliable..see ^ # ( @@ -863,7 +928,7 @@ class DebugStatus: return False @classmethod - @pdbp.hideframe + # @pdbp.hideframe def release( cls, cancel_req_task: bool = False, @@ -872,17 +937,51 @@ class DebugStatus: try: # sometimes the task might already be terminated in # which case this call will raise an RTE? + # See below for reporting on that.. if ( repl_release is not None + and + not repl_release.is_set() ): if cls.is_main_trio_thread(): repl_release.set() + + elif ( + _state._runtime_vars.get( + '_is_infected_aio', + False, + ) + # ^XXX, again bc we need to not except + # but for bg-thread case it will always raise.. + # + # TODO, is there a better api then using + # `err_on_no_runtime=False` in the below? + # current_actor().is_infected_aio() + ): + async def _set_repl_release(): + repl_release.set() + + fute: asyncio.Future = run_trio_task_in_future( + _set_repl_release + ) + if not fute.done(): + log.warning('REPL release state unknown..?') + else: # XXX NOTE ONLY used for bg root-actor sync # threads, see `.pause_from_sync()`. trio.from_thread.run_sync( repl_release.set ) + + except RuntimeError as rte: + log.exception( + f'Failed to release debug-request ??\n\n' + f'{cls.repr()}\n' + ) + # pdbp.set_trace() + raise rte + finally: # if req_ctx := cls.req_ctx: # req_ctx._scope.cancel() @@ -896,10 +995,29 @@ class DebugStatus: # actor-local state, irrelevant for non-root. cls.repl_task = None + + # XXX WARNING needs very special caughtion, and we should + # prolly make a more explicit `@property` API? + # + # - if unset in root multi-threaded case can cause + # issues with detecting that some root thread is + # using a REPL, + # + # - what benefit is there to unsetting, it's always + # set again for the next task in some actor.. + # only thing would be to avoid in the sigint-handler + # logging when we don't need to? cls.repl = None - # restore original sigint handler - cls.unshield_sigint() + # maybe restore original sigint handler + # XXX requires runtime check to avoid crash! + if current_actor(err_on_no_runtime=False): + cls.unshield_sigint() + + +# TODO: use the new `@lowlevel.singleton` for this! +def get_debug_req() -> DebugStatus|None: + return DebugStatus class TractorConfig(pdbp.DefaultConfig): @@ -982,7 +1100,7 @@ class PdbREPL(pdbp.Pdb): # Lock.release(raise_on_thread=False) Lock.release() - # XXX after `Lock.release()` for root local repl usage + # XXX AFTER `Lock.release()` for root local repl usage DebugStatus.release() def set_quit(self): @@ -1167,7 +1285,7 @@ async def request_root_stdio_lock( ): log.cancel( 'Debug lock request was CANCELLED?\n\n' - f'{req_ctx}\n' + f'<=c) {req_ctx}\n' # f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' # f'{pformat_cs(req_ctx._scope, var_name="req_ctx._scope")}\n\n' ) @@ -1179,22 +1297,26 @@ async def request_root_stdio_lock( message: str = ( 'Failed during debug request dialog with root actor?\n\n' ) - - if req_ctx: + if (req_ctx := DebugStatus.req_ctx): message += ( - f'{req_ctx}\n' + f'<=x) {req_ctx}\n\n' f'Cancelling IPC ctx!\n' ) - await req_ctx.cancel() + try: + await req_ctx.cancel() + except trio.ClosedResourceError as terr: + ctx_err.add_note( + # f'Failed with {type(terr)!r} x)> `req_ctx.cancel()` ' + f'Failed with `req_ctx.cancel()` bool: return False -def shield_sigint_handler( +_ctlc_ignore_header: str = ( + 'Ignoring SIGINT while debug REPL in use' +) + +def sigint_shield( signum: int, frame: 'frame', # type: ignore # noqa *args, @@ -1342,13 +1469,17 @@ def shield_sigint_handler( # root actor branch that reports whether or not a child # has locked debugger. if is_root_process(): + # log.warning( + log.devx( + 'Handling SIGINT in root actor\n' + f'{Lock.repr()}' + f'{DebugStatus.repr()}\n' + ) # try to see if the supposed (sub)actor in debug still # has an active connection to *this* actor, and if not # it's likely they aren't using the TTY lock / debugger # and we should propagate SIGINT normally. any_connected: bool = any_connected_locker_child() - # if not any_connected: - # return do_cancel() problem = ( f'root {actor.uid} handling SIGINT\n' @@ -1379,7 +1510,9 @@ def shield_sigint_handler( # NOTE: don't emit this with `.pdb()` level in # root without a higher level. log.runtime( - f'Ignoring SIGINT while debug REPL in use by child ' + _ctlc_ignore_header + + + f' by child ' f'{uid_in_debug}\n' ) problem = None @@ -1397,19 +1530,27 @@ def shield_sigint_handler( # an actor using the `Lock` (a bug state) ?? # => so immediately cancel any stale lock cs and revert # the handler! - if not repl: + if not DebugStatus.repl: # TODO: WHEN should we revert back to ``trio`` # handler if this one is stale? # -[ ] maybe after a counts work of ctl-c mashes? # -[ ] use a state var like `stale_handler: bool`? problem += ( - '\n' 'No subactor is using a `pdb` REPL according `Lock.ctx_in_debug`?\n' - 'BUT, the root should be using it, WHY this handler ??\n' + 'BUT, the root should be using it, WHY this handler ??\n\n' + 'So either..\n' + '- some root-thread is using it but has no `.repl` set?, OR\n' + '- something else weird is going on outside the runtime!?\n' ) else: + # NOTE: since we emit this msg on ctl-c, we should + # also always re-print the prompt the tail block! log.pdb( - 'Ignoring SIGINT while pdb REPL in use by root actor..\n' + _ctlc_ignore_header + + + f' by root actor..\n' + f'{DebugStatus.repl_task}\n' + f' |_{repl}\n' ) problem = None @@ -1459,7 +1600,6 @@ def shield_sigint_handler( 'Allowing SIGINT propagation..' ) DebugStatus.unshield_sigint() - # do_cancel() repl_task: str|None = DebugStatus.repl_task req_task: str|None = DebugStatus.req_task @@ -1469,15 +1609,24 @@ def shield_sigint_handler( repl ): log.pdb( - f'Ignoring SIGINT while local task using debug REPL\n' - f'|_{repl_task}\n' - f' |_{repl}\n' + _ctlc_ignore_header + + + f' by local task\n\n' + f'{repl_task}\n' + f' |_{repl}\n' ) elif req_task: - log.pdb( - f'Ignoring SIGINT while debug request task is open\n' - f'|_{req_task}\n' + log.debug( + _ctlc_ignore_header + + + f' by local request-task and either,\n' + f'- someone else is already REPL-in and has the `Lock`, or\n' + f'- some other local task already is replin?\n\n' + f'{req_task}\n' ) + + # TODO can we remove this now? + # -[ ] does this path ever get hit any more? else: msg: str = ( 'SIGINT shield handler still active BUT, \n\n' @@ -1513,37 +1662,53 @@ def shield_sigint_handler( # https://github.com/goodboy/tractor/issues/320 # elif debug_mode(): - # NOTE: currently (at least on ``fancycompleter`` 0.9.2) - # it looks to be that the last command that was run (eg. ll) - # will be repeated by default. - # maybe redraw/print last REPL output to console since # we want to alert the user that more input is expect since # nothing has been done dur to ignoring sigint. if ( - repl # only when current actor has a REPL engaged + DebugStatus.repl # only when current actor has a REPL engaged ): + flush_status: str = ( + 'Flushing stdout to ensure new prompt line!\n' + ) + # XXX: yah, mega hack, but how else do we catch this madness XD - if repl.shname == 'xonsh': + if ( + repl.shname == 'xonsh' + ): + flush_status += ( + '-> ALSO re-flushing due to `xonsh`..\n' + ) repl.stdout.write(repl.prompt) + # log.warning( + log.devx( + flush_status + ) repl.stdout.flush() - # TODO: make this work like sticky mode where if there is output - # detected as written to the tty we redraw this part underneath - # and erase the past draw of this same bit above? + # TODO: better console UX to match the current "mode": + # -[ ] for example if in sticky mode where if there is output + # detected as written to the tty we redraw this part underneath + # and erase the past draw of this same bit above? # repl.sticky = True # repl._print_if_sticky() - # also see these links for an approach from ``ptk``: + # also see these links for an approach from `ptk`: # https://github.com/goodboy/tractor/issues/130#issuecomment-663752040 # https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py + else: + log.devx( + # log.warning( + 'Not flushing stdout since not needed?\n' + f'|_{repl}\n' + ) # XXX only for tracing this handler log.devx('exiting SIGINT') -_pause_msg: str = 'Attaching to pdb REPL in actor' +_pause_msg: str = 'Opening a pdb REPL in paused actor' class DebugRequestError(RuntimeError): @@ -1553,7 +1718,7 @@ class DebugRequestError(RuntimeError): ''' -_repl_fail_msg: str = ( +_repl_fail_msg: str|None = ( 'Failed to REPl via `_pause()` ' ) @@ -1583,7 +1748,7 @@ async def _pause( ] = trio.TASK_STATUS_IGNORED, **debug_func_kwargs, -) -> tuple[PdbREPL, Task]|None: +) -> tuple[Task, PdbREPL]|None: ''' Inner impl for `pause()` to avoid the `trio.CancelScope.__exit__()` stack frame when not shielded (since apparently i can't figure out @@ -1593,22 +1758,29 @@ async def _pause( ''' __tracebackhide__: bool = hide_tb + pause_err: BaseException|None = None actor: Actor = current_actor() try: task: Task = current_task() except RuntimeError as rte: + # NOTE, 2 cases we might get here: + # + # - ACTUALLY not a `trio.lowlevel.Task` nor runtime caller, + # |_ error out as normal + # + # - an infected `asycio` actor calls it from an actual + # `asyncio.Task` + # |_ in this case we DO NOT want to RTE! __tracebackhide__: bool = False - log.exception( - 'Failed to get current `trio`-task?' - ) - # if actor.is_infected_aio(): - # mk_pdb().set_trace() - # raise RuntimeError( - # '`tractor.pause[_from_sync]()` not yet supported ' - # 'directly (infected) `asyncio` tasks!' - # ) from rte - - raise + if actor.is_infected_aio(): + log.exception( + 'Failed to get current `trio`-task?' + ) + raise RuntimeError( + 'An `asyncio` task should not be calling this!?' + ) from rte + else: + task = asyncio.current_task() if debug_func is not None: debug_func = partial(debug_func) @@ -1616,9 +1788,13 @@ async def _pause( # XXX NOTE XXX set it here to avoid ctl-c from cancelling a debug # request from a subactor BEFORE the REPL is entered by that # process. - if not repl: + if ( + not repl + and + debug_func + ): + repl: PdbREPL = mk_pdb() DebugStatus.shield_sigint() - repl: PdbREPL = repl or mk_pdb() # TODO: move this into a `open_debug_request()` @acm? # -[ ] prolly makes the most sense to do the request @@ -1653,7 +1829,13 @@ async def _pause( # recurrent entries/requests from the same # actor-local task. DebugStatus.repl_task = task - DebugStatus.repl = repl + if repl: + DebugStatus.repl = repl + else: + log.error( + 'No REPl instance set before entering `debug_func`?\n' + f'{debug_func}\n' + ) # invoke the low-level REPL activation routine which itself # should call into a `Pdb.set_trace()` of some sort. @@ -1752,7 +1934,7 @@ async def _pause( ) with trio.CancelScope(shield=shield): await trio.lowlevel.checkpoint() - return repl, task + return (repl, task) # elif repl_task: # log.warning( @@ -1959,11 +2141,13 @@ async def _pause( # TODO: prolly factor this plus the similar block from # `_enter_repl_sync()` into a common @cm? - except BaseException as pause_err: + except BaseException as _pause_err: + pause_err: BaseException = _pause_err if isinstance(pause_err, bdb.BdbQuit): log.devx( - 'REPL for pdb was quit!\n' + 'REPL for pdb was explicitly quit!\n' ) + _repl_fail_msg = None # when the actor is mid-runtime cancellation the # `Actor._service_n` might get closed before we can spawn @@ -1982,26 +2166,32 @@ async def _pause( ) return - else: - log.exception( - _repl_fail_msg - + - f'on behalf of {repl_task} ??\n' + elif isinstance(pause_err, trio.Cancelled): + _repl_fail_msg = ( + 'You called `tractor.pause()` from an already cancelled scope!\n\n' + 'Consider `await tractor.pause(shield=True)` to make it work B)\n' ) - DebugStatus.release(cancel_req_task=True) + else: + _repl_fail_msg += f'on behalf of {repl_task} ??\n' + + if _repl_fail_msg: + log.exception(_repl_fail_msg) + + if not actor.is_infected_aio(): + DebugStatus.release(cancel_req_task=True) # sanity checks for ^ on request/status teardown - assert DebugStatus.repl is None + # assert DebugStatus.repl is None # XXX no more bc bg thread cases? assert DebugStatus.repl_task is None # sanity, for when hackin on all this? if not isinstance(pause_err, trio.Cancelled): req_ctx: Context = DebugStatus.req_ctx - if req_ctx: - # XXX, bc the child-task in root might cancel it? - # assert req_ctx._scope.cancel_called - assert req_ctx.maybe_error + # if req_ctx: + # # XXX, bc the child-task in root might cancel it? + # # assert req_ctx._scope.cancel_called + # assert req_ctx.maybe_error raise @@ -2016,6 +2206,8 @@ async def _pause( DebugStatus.req_err or repl_err + or + pause_err ): __tracebackhide__: bool = False @@ -2041,11 +2233,13 @@ def _set_trace( # root here? Bo log.pdb( f'{_pause_msg}\n' - '|\n' - # TODO: more compact pformating? + f'>(\n' + f'|_{actor.uid}\n' + f' |_{task}\n' # @ {actor.uid}\n' + # f'|_{task}\n' + # ^-TODO-^ more compact pformating? # -[ ] make an `Actor.__repr()__` # -[ ] should we use `log.pformat_task_uid()`? - f'|_ {task} @ {actor.uid}\n' ) # presuming the caller passed in the "api frame" # (the last frame before user code - like `.pause()`) @@ -2231,7 +2425,12 @@ async def _pause_from_bg_root_thread( 'Trying to acquire `Lock` on behalf of bg thread\n' f'|_{behalf_of_thread}\n' ) - # DebugStatus.repl_task = behalf_of_thread + + # NOTE: this is already a task inside the main-`trio`-thread, so + # we don't need to worry about calling it another time from the + # bg thread on which who's behalf this task is operating. + DebugStatus.shield_sigint() + out = await _pause( debug_func=None, repl=repl, @@ -2240,6 +2439,8 @@ async def _pause_from_bg_root_thread( called_from_bg_thread=True, **_pause_kwargs ) + DebugStatus.repl_task = behalf_of_thread + lock: trio.FIFOLock = Lock._debug_lock stats: trio.LockStatistics= lock.statistics() assert stats.owner is task @@ -2273,7 +2474,6 @@ async def _pause_from_bg_root_thread( f'|_{behalf_of_thread}\n' ) task_status.started(out) - DebugStatus.shield_sigint() # wait for bg thread to exit REPL sesh. try: @@ -2291,6 +2491,8 @@ def pause_from_sync( called_from_builtin: bool = False, api_frame: FrameType|None = None, + allow_no_runtime: bool = False, + # proxy to `._pause()`, for ex: # shield: bool = False, # api_frame: FrameType|None = None, @@ -2309,40 +2511,41 @@ def pause_from_sync( ''' __tracebackhide__: bool = hide_tb + repl_owner: Task|Thread|None = None try: actor: tractor.Actor = current_actor( err_on_no_runtime=False, ) - message: str = ( - f'{actor.uid} task called `tractor.pause_from_sync()`\n\n' - ) - if not actor: - raise RuntimeError( - 'Not inside the `tractor`-runtime?\n' + if ( + not actor + and + not allow_no_runtime + ): + raise NoRuntime( + 'The actor runtime has not been opened?\n\n' '`tractor.pause_from_sync()` is not functional without a wrapping\n' '- `async with tractor.open_nursery()` or,\n' - '- `async with tractor.open_root_actor()`\n' - ) + '- `async with tractor.open_root_actor()`\n\n' - # TODO: once supported, remove this AND the one - # inside `._pause()`! - # outstanding impl fixes: - # -[ ] need to make `.shield_sigint()` below work here! - # -[ ] how to handle `asyncio`'s new SIGINT-handler - # injection? - # -[ ] should `breakpoint()` work and what does it normally - # do in `asyncio` ctxs? - if actor.is_infected_aio(): - raise RuntimeError( - '`tractor.pause[_from_sync]()` not yet supported ' - 'for infected `asyncio` mode!' + 'If you are getting this from a builtin `breakpoint()` call\n' + 'it might mean the runtime was started then ' + 'stopped prematurely?\n' ) + message: str = ( + f'{actor.uid} task called `tractor.pause_from_sync()`\n' + ) - DebugStatus.shield_sigint() repl: PdbREPL = mk_pdb() # message += f'-> created local REPL {repl}\n' + is_trio_thread: bool = DebugStatus.is_main_trio_thread() is_root: bool = is_root_process() + is_infected_aio: bool = actor.is_infected_aio() + thread: Thread = threading.current_thread() + + asyncio_task: asyncio.Task|None = None + if is_infected_aio: + asyncio_task = asyncio.current_task() # TODO: we could also check for a non-`.to_thread` context # using `trio.from_thread.check_cancelled()` (says @@ -2355,17 +2558,33 @@ def pause_from_sync( # when called from a (bg) thread, run an async task in a new # thread which will call `._pause()` manually with special # handling for root-actor caller usage. - if not DebugStatus.is_main_trio_thread(): - thread: threading.Thread = threading.current_thread() - repl_owner = thread + if ( + not is_trio_thread + and + not asyncio_task + ): + # TODO: `threading.Lock()` this so we don't get races in + # multi-thr cases where they're acquiring/releasing the + # REPL and setting request/`Lock` state, etc.. + repl_owner: Thread = thread # TODO: make root-actor bg thread usage work! if is_root: message += ( f'-> called from a root-actor bg {thread}\n' - f'-> scheduling `._pause_from_sync_thread()`..\n' ) - bg_task, repl = trio.from_thread.run( + + message += ( + '-> scheduling `._pause_from_bg_root_thread()`..\n' + ) + # XXX SUBTLE BADNESS XXX that should really change! + # don't over-write the `repl` here since when + # this behalf-of-bg_thread-task calls pause it will + # pass `debug_func=None` which will result in it + # returing a `repl==None` output and that get's also + # `.started(out)` back here! So instead just ignore + # that output and assign the `repl` created above! + bg_task, _ = trio.from_thread.run( afn=partial( actor._service_n.start, partial( @@ -2375,10 +2594,11 @@ def pause_from_sync( hide_tb=hide_tb, **_pause_kwargs, ), - ) + ), ) + DebugStatus.shield_sigint() message += ( - f'-> `._pause_from_sync_thread()` started bg task {bg_task}\n' + f'-> `._pause_from_bg_root_thread()` started bg task {bg_task}\n' ) else: message += f'-> called from a bg {thread}\n' @@ -2387,7 +2607,7 @@ def pause_from_sync( # `request_root_stdio_lock()` and we don't need to # worry about all the special considerations as with # the root-actor per above. - bg_task, repl = trio.from_thread.run( + bg_task, _ = trio.from_thread.run( afn=partial( _pause, debug_func=None, @@ -2402,8 +2622,101 @@ def pause_from_sync( **_pause_kwargs ), ) + # ?TODO? XXX where do we NEED to call this in the + # subactor-bg-thread case? + DebugStatus.shield_sigint() assert bg_task is not DebugStatus.repl_task + # TODO: once supported, remove this AND the one + # inside `._pause()`! + # outstanding impl fixes: + # -[ ] need to make `.shield_sigint()` below work here! + # -[ ] how to handle `asyncio`'s new SIGINT-handler + # injection? + # -[ ] should `breakpoint()` work and what does it normally + # do in `asyncio` ctxs? + # if actor.is_infected_aio(): + # raise RuntimeError( + # '`tractor.pause[_from_sync]()` not yet supported ' + # 'for infected `asyncio` mode!' + # ) + elif ( + not is_trio_thread + and + is_infected_aio # as in, the special actor-runtime mode + # ^NOTE XXX, that doesn't mean the caller is necessarily + # an `asyncio.Task` just that `trio` has been embedded on + # the `asyncio` event loop! + and + asyncio_task # transitive caller is an actual `asyncio.Task` + ): + greenback: ModuleType = maybe_import_greenback() + + if greenback.has_portal(): + DebugStatus.shield_sigint() + fute: asyncio.Future = run_trio_task_in_future( + partial( + _pause, + debug_func=None, + repl=repl, + hide_tb=hide_tb, + + # XXX to prevent `._pause()` for setting + # `DebugStatus.repl_task` to the gb task! + called_from_sync=True, + called_from_bg_thread=True, + + **_pause_kwargs + ) + ) + repl_owner = asyncio_task + bg_task, _ = greenback.await_(fute) + # TODO: ASYNC version -> `.pause_from_aio()`? + # bg_task, _ = await fute + + # handle the case where an `asyncio` task has been + # spawned WITHOUT enabling a `greenback` portal.. + # => can often happen in 3rd party libs. + else: + bg_task = repl_owner + + # TODO, ostensibly we can just acquire the + # debug lock directly presuming we're the + # root actor running in infected asyncio + # mode? + # + # TODO, this would be a special case where + # a `_pause_from_root()` would come in very + # handy! + # if is_root: + # import pdbp; pdbp.set_trace() + # log.warning( + # 'Allowing `asyncio` task to acquire debug-lock in root-actor..\n' + # 'This is not fully implemented yet; there may be teardown hangs!\n\n' + # ) + # else: + + # simply unsupported, since there exists no hack (i + # can think of) to workaround this in a subactor + # which needs to lock the root's REPL ow we're sure + # to get prompt stdstreams clobbering.. + cf_repr: str = '' + if api_frame: + caller_frame: FrameType = api_frame.f_back + cf_repr: str = f'caller_frame: {caller_frame!r}\n' + + raise RuntimeError( + f"CAN'T USE `greenback._await()` without a portal !?\n\n" + f'Likely this task was NOT spawned via the `tractor.to_asyncio` API..\n' + f'{asyncio_task}\n' + f'{cf_repr}\n' + + f'Prolly the task was started out-of-band (from some lib?)\n' + f'AND one of the below was never called ??\n' + f'- greenback.ensure_portal()\n' + f'- greenback.bestow_portal()\n' + ) + else: # we are presumably the `trio.run()` + main thread # raises on not-found by default greenback: ModuleType = maybe_import_greenback() @@ -2414,7 +2727,12 @@ def pause_from_sync( # greenback: ModuleType = await maybe_init_greenback() message += f'-> imported {greenback}\n' + + # NOTE XXX seems to need to be set BEFORE the `_pause()` + # invoke using gb below? + DebugStatus.shield_sigint() repl_owner: Task = current_task() + message += '-> calling `greenback.await_(_pause(debug_func=None))` from sync caller..\n' try: out = greenback.await_( @@ -2439,9 +2757,20 @@ def pause_from_sync( raise if out: - bg_task, repl = out - assert repl is repl - assert bg_task is repl_owner + bg_task, _ = out + else: + bg_task: Task = current_task() + + # assert repl is repl + # assert bg_task is repl_owner + if bg_task is not repl_owner: + raise DebugStateError( + f'The registered bg task for this debug request is NOT its owner ??\n' + f'bg_task: {bg_task}\n' + f'repl_owner: {repl_owner}\n\n' + + f'{DebugStatus.repr()}\n' + ) # NOTE: normally set inside `_enter_repl_sync()` DebugStatus.repl_task: str = repl_owner @@ -2455,7 +2784,10 @@ def pause_from_sync( ) log.devx(message) + # NOTE set as late as possible to avoid state clobbering + # in the multi-threaded case! DebugStatus.repl = repl + _set_trace( api_frame=api_frame or inspect.currentframe(), repl=repl, @@ -2470,6 +2802,10 @@ def pause_from_sync( # -[ ] tried to use `@pdbp.hideframe` decoration but # still doesn't work except BaseException as err: + log.exception( + 'Failed to sync-pause from\n\n' + f'{repl_owner}\n' + ) __tracebackhide__: bool = False raise err @@ -2513,13 +2849,12 @@ async def breakpoint( _crash_msg: str = ( - 'Attaching to pdb REPL in crashed actor' + 'Opening a pdb REPL in crashed actor' ) def _post_mortem( - # provided and passed by `_pause()` - repl: PdbREPL, + repl: PdbREPL, # normally passed by `_pause()` # XXX all `partial`-ed in by `post_mortem()` below! tb: TracebackType, @@ -2535,19 +2870,30 @@ def _post_mortem( ''' __tracebackhide__: bool = hide_tb - actor: tractor.Actor = current_actor() + try: + actor: tractor.Actor = current_actor() + actor_repr: str = str(actor.uid) + # ^TODO, instead a nice runtime-info + maddr + uid? + # -[ ] impl a `Actor.__repr()__`?? + # |_ : @ + # no_runtime: bool = False + + except NoRuntime: + actor_repr: str = '' + # no_runtime: bool = True + + try: + task_repr: Task = current_task() + except RuntimeError: + task_repr: str = '' # TODO: print the actor supervion tree up to the root # here! Bo log.pdb( f'{_crash_msg}\n' - '|\n' - # f'|_ {current_task()}\n' - f'|_ {current_task()} @ {actor.uid}\n' + f'x>(\n' + f' |_ {task_repr} @ {actor_repr}\n' - # f'|_ @{actor.uid}\n' - # TODO: make an `Actor.__repr()__` - # f'|_ {current_task()} @ {actor.name}\n' ) # NOTE only replacing this from `pdbp.xpm()` to add the @@ -2570,6 +2916,8 @@ def _post_mortem( # Since we presume the post-mortem was enaged to a task-ending # error, we MUST release the local REPL request so that not other # local task nor the root remains blocked! + # if not no_runtime: + # DebugStatus.release() DebugStatus.release() @@ -2618,8 +2966,14 @@ async def _maybe_enter_pm( tb: TracebackType|None = None, api_frame: FrameType|None = None, hide_tb: bool = False, + + # only enter debugger REPL when returns `True` + debug_filter: Callable[ + [BaseException|BaseExceptionGroup], + bool, + ] = lambda err: not is_multi_cancelled(err), + ): - from tractor._exceptions import is_multi_cancelled if ( debug_mode() @@ -2636,7 +2990,8 @@ async def _maybe_enter_pm( # Really we just want to mostly avoid catching KBIs here so there # might be a simpler check we can do? - and not is_multi_cancelled(err) + and + debug_filter(err) ): api_frame: FrameType = api_frame or inspect.currentframe() tb: TracebackType = tb or sys.exc_info()[2] @@ -2658,7 +3013,8 @@ async def acquire_debug_lock( tuple, ]: ''' - Request to acquire the TTY `Lock` in the root actor, release on exit. + Request to acquire the TTY `Lock` in the root actor, release on + exit. This helper is for actor's who don't actually need to acquired the debugger but want to wait until the lock is free in the @@ -2670,10 +3026,14 @@ async def acquire_debug_lock( yield None return + task: Task = current_task() async with trio.open_nursery() as n: ctx: Context = await n.start( - request_root_stdio_lock, - subactor_uid, + partial( + request_root_stdio_lock, + actor_uid=subactor_uid, + task_uid=(task.name, id(task)), + ) ) yield ctx ctx.cancel() @@ -2802,20 +3162,23 @@ async def maybe_wait_for_debugger( # pass return False + # TODO: better naming and what additionals? # - [ ] optional runtime plugging? # - [ ] detection for sync vs. async code? # - [ ] specialized REPL entry when in distributed mode? +# -[x] hide tb by def # - [x] allow ignoring kbi Bo @cm def open_crash_handler( catch: set[BaseException] = { - Exception, + # Exception, BaseException, }, ignore: set[BaseException] = { KeyboardInterrupt, }, + tb_hide: bool = True, ): ''' Generic "post mortem" crash handler using `pdbp` REPL debugger. @@ -2828,19 +3191,52 @@ def open_crash_handler( `trio.run()`. ''' + __tracebackhide__: bool = tb_hide + + class BoxedMaybeException(Struct): + value: BaseException|None = None + + # TODO, yield a `outcome.Error`-like boxed type? + # -[~] use `outcome.Value/Error` X-> frozen! + # -[x] write our own..? + # -[ ] consider just wtv is used by `pytest.raises()`? + # + boxed_maybe_exc = BoxedMaybeException() err: BaseException try: - yield + yield boxed_maybe_exc except tuple(catch) as err: - if type(err) not in ignore: - pdbp.xpm() + boxed_maybe_exc.value = err + if ( + type(err) not in ignore + and + not is_multi_cancelled( + err, + ignore_nested=ignore + ) + ): + try: + # use our re-impl-ed version + _post_mortem( + repl=mk_pdb(), + tb=sys.exc_info()[2], + api_frame=inspect.currentframe().f_back, + ) + except bdb.BdbQuit: + __tracebackhide__: bool = False + raise err - raise + # XXX NOTE, `pdbp`'s version seems to lose the up-stack + # tb-info? + # pdbp.xpm() + + raise err @cm def maybe_open_crash_handler( pdb: bool = False, + tb_hide: bool = True, ): ''' Same as `open_crash_handler()` but with bool input flag @@ -2849,6 +3245,8 @@ def maybe_open_crash_handler( Normally this is used with CLI endpoints such that if the --pdb flag is passed the pdb REPL is engaed on any crashes B) ''' + __tracebackhide__: bool = tb_hide + rtctx = nullcontext if pdb: rtctx = open_crash_handler diff --git a/tractor/devx/_frame_stack.py b/tractor/devx/_frame_stack.py index 89a9e849..8e9bf46f 100644 --- a/tractor/devx/_frame_stack.py +++ b/tractor/devx/_frame_stack.py @@ -234,7 +234,7 @@ def find_caller_info( _frame2callerinfo_cache: dict[FrameType, CallerInfo] = {} -# TODO: -[x] move all this into new `.devx._code`! +# TODO: -[x] move all this into new `.devx._frame_stack`! # -[ ] consider rename to _callstack? # -[ ] prolly create a `@runtime_api` dec? # |_ @api_frame seems better? @@ -286,3 +286,18 @@ def api_frame( wrapped._call_infos: dict[FrameType, CallerInfo] = _frame2callerinfo_cache wrapped.__api_func__: bool = True return wrapper(wrapped) + + +# TODO: something like this instead of the adhoc frame-unhiding +# blocks all over the runtime!! XD +# -[ ] ideally we can expect a certain error (set) and if something +# else is raised then all frames below the wrapped one will be +# un-hidden via `__tracebackhide__: bool = False`. +# |_ might need to dynamically mutate the code objs like +# `pdbp.hideframe()` does? +# -[ ] use this as a `@acm` decorator as introed in 3.10? +# @acm +# async def unhide_frame_when_not( +# error_set: set[BaseException], +# ) -> TracebackType: +# ... diff --git a/tractor/devx/_stackscope.py b/tractor/devx/_stackscope.py index e8e97d1a..ccc46534 100644 --- a/tractor/devx/_stackscope.py +++ b/tractor/devx/_stackscope.py @@ -24,19 +24,32 @@ disjoint, parallel executing tasks in separate actors. ''' from __future__ import annotations +# from functools import partial +from threading import ( + current_thread, + Thread, + RLock, +) import multiprocessing as mp from signal import ( signal, + getsignal, SIGUSR1, + SIGINT, +) +# import traceback +from types import ModuleType +from typing import ( + Callable, + TYPE_CHECKING, ) -import traceback -from typing import TYPE_CHECKING import trio from tractor import ( _state, log as logmod, ) +from tractor.devx import _debug log = logmod.get_logger(__name__) @@ -51,26 +64,68 @@ if TYPE_CHECKING: @trio.lowlevel.disable_ki_protection def dump_task_tree() -> None: - import stackscope - from tractor.log import get_console_log + ''' + Do a classic `stackscope.extract()` task-tree dump to console at + `.devx()` level. + ''' + import stackscope tree_str: str = str( stackscope.extract( trio.lowlevel.current_root_task(), recurse_child_tasks=True ) ) - log = get_console_log( - name=__name__, - level='cancel', - ) actor: Actor = _state.current_actor() + thr: Thread = current_thread() + current_sigint_handler: Callable = getsignal(SIGINT) + if ( + current_sigint_handler + is not + _debug.DebugStatus._trio_handler + ): + sigint_handler_report: str = ( + 'The default `trio` SIGINT handler was replaced?!' + ) + else: + sigint_handler_report: str = ( + 'The default `trio` SIGINT handler is in use?!' + ) + + # sclang symbology + # |_ + # |_(Task/Thread/Process/Actor + # |_{Supervisor/Scope + # |_[Storage/Memory/IPC-Stream/Data-Struct + log.devx( f'Dumping `stackscope` tree for actor\n' - f'{actor.name}: {actor}\n' - f' |_{mp.current_process()}\n\n' - f'{tree_str}\n' + f'(>: {actor.uid!r}\n' + f' |_{mp.current_process()}\n' + f' |_{thr}\n' + f' |_{actor}\n' + f'\n' + f'{sigint_handler_report}\n' + f'signal.getsignal(SIGINT) -> {current_sigint_handler!r}\n' + # f'\n' + # start-of-trace-tree delimiter (mostly for testing) + # f'------ {actor.uid!r} ------\n' + f'\n' + f'------ start-of-{actor.uid!r} ------\n' + f'|\n' + f'{tree_str}' + # end-of-trace-tree delimiter (mostly for testing) + f'|\n' + f'|_____ end-of-{actor.uid!r} ______\n' ) + # TODO: can remove this right? + # -[ ] was original code from author + # + # print( + # 'DUMPING FROM PRINT\n' + # + + # content + # ) # import logging # try: # with open("/dev/tty", "w") as tty: @@ -80,58 +135,130 @@ def dump_task_tree() -> None: # "task_tree" # ).exception("Error printing task tree") +_handler_lock = RLock() +_tree_dumped: bool = False -def signal_handler( + +def dump_tree_on_sig( sig: int, frame: object, relay_to_subs: bool = True, ) -> None: - try: - trio.lowlevel.current_trio_token( - ).run_sync_soon(dump_task_tree) - except RuntimeError: - # not in async context -- print a normal traceback - traceback.print_stack() + global _tree_dumped, _handler_lock + with _handler_lock: + # if _tree_dumped: + # log.warning( + # 'Already dumped for this actor...??' + # ) + # return + + _tree_dumped = True + + # actor: Actor = _state.current_actor() + log.devx( + 'Trying to dump `stackscope` tree..\n' + ) + try: + dump_task_tree() + # await actor._service_n.start_soon( + # partial( + # trio.to_thread.run_sync, + # dump_task_tree, + # ) + # ) + # trio.lowlevel.current_trio_token().run_sync_soon( + # dump_task_tree + # ) + + except RuntimeError: + log.exception( + 'Failed to dump `stackscope` tree..\n' + ) + # not in async context -- print a normal traceback + # traceback.print_stack() + raise + + except BaseException: + log.exception( + 'Failed to dump `stackscope` tree..\n' + ) + raise + + # log.devx( + # 'Supposedly we dumped just fine..?' + # ) if not relay_to_subs: return an: ActorNursery for an in _state.current_actor()._actoruid2nursery.values(): - subproc: ProcessType subactor: Actor for subactor, subproc, _ in an._children.values(): - log.devx( + log.warning( f'Relaying `SIGUSR1`[{sig}] to sub-actor\n' f'{subactor}\n' f' |_{subproc}\n' ) - if isinstance(subproc, trio.Process): - subproc.send_signal(sig) + # bc of course stdlib can't have a std API.. XD + match subproc: + case trio.Process(): + subproc.send_signal(sig) - elif isinstance(subproc, mp.Process): - subproc._send_signal(sig) + case mp.Process(): + subproc._send_signal(sig) def enable_stack_on_sig( - sig: int = SIGUSR1 -) -> None: + sig: int = SIGUSR1, +) -> ModuleType: ''' Enable `stackscope` tracing on reception of a signal; by default this is SIGUSR1. + HOT TIP: a task/ctx-tree dump can be triggered from a shell with + fancy cmds. + + For ex. from `bash` using `pgrep` and cmd-sustitution + (https://www.gnu.org/software/bash/manual/bash.html#Command-Substitution) + you could use: + + >> kill -SIGUSR1 $(pgrep -f ) + + OR without a sub-shell, + + >> pkill --signal SIGUSR1 -f + ''' + try: + import stackscope + except ImportError: + log.warning( + '`stackscope` not installed for use in debug mode!' + ) + return None + + handler: Callable|int = getsignal(sig) + if handler is dump_tree_on_sig: + log.devx( + 'A `SIGUSR1` handler already exists?\n' + f'|_ {handler!r}\n' + ) + return + signal( sig, - signal_handler, + dump_tree_on_sig, ) - # NOTE: not the above can be triggered from - # a (xonsh) shell using: - # kill -SIGUSR1 @$(pgrep -f '') - # - # for example if you were looking to trace a `pytest` run - # kill -SIGUSR1 @$(pgrep -f 'pytest') + log.devx( + 'Enabling trace-trees on `SIGUSR1` ' + 'since `stackscope` is installed @ \n' + f'{stackscope!r}\n\n' + f'With `SIGUSR1` handler\n' + f'|_{dump_tree_on_sig}\n' + ) + return stackscope diff --git a/tractor/devx/pformat.py b/tractor/devx/pformat.py index 5fe9bc62..1530ef02 100644 --- a/tractor/devx/pformat.py +++ b/tractor/devx/pformat.py @@ -53,6 +53,7 @@ def pformat_boxed_tb( tb_box_indent: int|None = None, tb_body_indent: int = 1, + boxer_header: str = '-' ) -> str: ''' @@ -88,10 +89,10 @@ def pformat_boxed_tb( tb_box: str = ( f'|\n' - f' ------ - ------\n' + f' ------ {boxer_header} ------\n' f'{tb_body}' - f' ------ - ------\n' - f'_|\n' + f' ------ {boxer_header}- ------\n' + f'_|' ) tb_box_indent: str = ( tb_box_indent diff --git a/tractor/log.py b/tractor/log.py index 47f1f259..74e0321b 100644 --- a/tractor/log.py +++ b/tractor/log.py @@ -258,20 +258,28 @@ class ActorContextInfo(Mapping): def get_logger( - - name: str | None = None, + name: str|None = None, _root_name: str = _proj_name, + logger: Logger|None = None, + + # TODO, using `.config.dictConfig()` api? + # -[ ] SO answer with docs links + # |_https://stackoverflow.com/questions/7507825/where-is-a-complete-example-of-logging-config-dictconfig + # |_https://docs.python.org/3/library/logging.config.html#configuration-dictionary-schema + subsys_spec: str|None = None, + ) -> StackLevelAdapter: '''Return the package log or a sub-logger for ``name`` if provided. ''' log: Logger - log = rlog = logging.getLogger(_root_name) + log = rlog = logger or logging.getLogger(_root_name) if ( name - and name != _proj_name + and + name != _proj_name ): # NOTE: for handling for modules that use ``get_logger(__name__)`` @@ -283,7 +291,7 @@ def get_logger( # since in python the {filename} is always this same # module-file. - sub_name: None | str = None + sub_name: None|str = None rname, _, sub_name = name.partition('.') pkgpath, _, modfilename = sub_name.rpartition('.') @@ -306,7 +314,10 @@ def get_logger( # add our actor-task aware adapter which will dynamically look up # the actor and task names at each log emit - logger = StackLevelAdapter(log, ActorContextInfo()) + logger = StackLevelAdapter( + log, + ActorContextInfo(), + ) # additional levels for name, val in CUSTOM_LEVELS.items(): @@ -319,15 +330,25 @@ def get_logger( def get_console_log( - level: str | None = None, + level: str|None = None, + logger: Logger|None = None, **kwargs, -) -> LoggerAdapter: - '''Get the package logger and enable a handler which writes to stderr. - Yeah yeah, i know we can use ``DictConfig``. You do it. +) -> LoggerAdapter: ''' - log = get_logger(**kwargs) # our root logger - logger = log.logger + Get a `tractor`-style logging instance: a `Logger` wrapped in + a `StackLevelAdapter` which injects various concurrency-primitive + (process, thread, task) fields and enables a `StreamHandler` that + writes on stderr using `colorlog` formatting. + + Yeah yeah, i know we can use `logging.config.dictConfig()`. You do it. + + ''' + log = get_logger( + logger=logger, + **kwargs + ) # set a root logger + logger: Logger = log.logger if not level: return log @@ -346,9 +367,13 @@ def get_console_log( None, ) ): + fmt = LOG_FORMAT + # if logger: + # fmt = None + handler = StreamHandler() formatter = colorlog.ColoredFormatter( - LOG_FORMAT, + fmt=fmt, datefmt=DATE_FORMAT, log_colors=STD_PALETTE, secondary_log_colors=BOLD_PALETTE, @@ -365,7 +390,7 @@ def get_loglevel() -> str: # global module logger for tractor itself -log = get_logger('tractor') +log: StackLevelAdapter = get_logger('tractor') def at_least_level( diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index c1301bd2..32f690f1 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -41,8 +41,10 @@ import textwrap from typing import ( Any, Callable, + Protocol, Type, TYPE_CHECKING, + TypeVar, Union, ) from types import ModuleType @@ -181,7 +183,11 @@ def mk_dec( dec_hook: Callable|None = None, ) -> MsgDec: + ''' + Create an IPC msg decoder, normally used as the + `PayloadMsg.pld: PayloadT` field decoder inside a `PldRx`. + ''' return MsgDec( _dec=msgpack.Decoder( type=spec, # like `MsgType[Any]` @@ -227,6 +233,13 @@ def pformat_msgspec( join_char: str = '\n', ) -> str: + ''' + Pretty `str` format the `msgspec.msgpack.Decoder.type` attribute + for display in (console) log messages as a nice (maybe multiline) + presentation of all supported `Struct`s (subtypes) available for + typed decoding. + + ''' dec: msgpack.Decoder = getattr(codec, 'dec', codec) return join_char.join( mk_msgspec_table( @@ -630,31 +643,57 @@ def limit_msg_spec( # # import pdbp; pdbp.set_trace() # assert ext_codec.pld_spec == extended_spec # yield ext_codec +# +# ^-TODO-^ is it impossible to make something like this orr!? + +# TODO: make an auto-custom hook generator from a set of input custom +# types? +# -[ ] below is a proto design using a `TypeCodec` idea? +# +# type var for the expected interchange-lib's +# IPC-transport type when not available as a built-in +# serialization output. +WireT = TypeVar('WireT') -# TODO: make something similar to this inside `._codec` such that -# user can just pass a type table of some sort? -# -[ ] we would need to decode all msgs to `pretty_struct.Struct` -# and then call `.to_dict()` on them? -# -[x] we're going to need to re-impl all the stuff changed in the -# runtime port such that it can handle dicts or `Msg`s? -# -# def mk_dict_msg_codec_hooks() -> tuple[Callable, Callable]: -# ''' -# Deliver a `enc_hook()`/`dec_hook()` pair which does -# manual convertion from our above native `Msg` set -# to `dict` equivalent (wire msgs) in order to keep legacy compat -# with the original runtime implementation. -# -# Note: this is is/was primarly used while moving the core -# runtime over to using native `Msg`-struct types wherein we -# start with the send side emitting without loading -# a typed-decoder and then later flipping the switch over to -# load to the native struct types once all runtime usage has -# been adjusted appropriately. -# -# ''' -# return ( -# # enc_to_dict, -# dec_from_dict, -# ) +# TODO: some kinda (decorator) API for built-in subtypes +# that builds this implicitly by inspecting the `mro()`? +class TypeCodec(Protocol): + ''' + A per-custom-type wire-transport serialization translator + description type. + + ''' + src_type: Type + wire_type: WireT + + def encode(obj: Type) -> WireT: + ... + + def decode( + obj_type: Type[WireT], + obj: WireT, + ) -> Type: + ... + + +class MsgpackTypeCodec(TypeCodec): + ... + + +def mk_codec_hooks( + type_codecs: list[TypeCodec], + +) -> tuple[Callable, Callable]: + ''' + Deliver a `enc_hook()`/`dec_hook()` pair which handle + manual convertion from an input `Type` set such that whenever + the `TypeCodec.filter()` predicate matches the + `TypeCodec.decode()` is called on the input native object by + the `dec_hook()` and whenever the + `isiinstance(obj, TypeCodec.type)` matches against an + `enc_hook(obj=obj)` the return value is taken from a + `TypeCodec.encode(obj)` callback. + + ''' + ... diff --git a/tractor/msg/pretty_struct.py b/tractor/msg/pretty_struct.py index 15e469e2..91eba8bd 100644 --- a/tractor/msg/pretty_struct.py +++ b/tractor/msg/pretty_struct.py @@ -30,9 +30,9 @@ from msgspec import ( Struct as _Struct, structs, ) -from pprint import ( - saferepr, -) +# from pprint import ( +# saferepr, +# ) from tractor.log import get_logger @@ -75,8 +75,8 @@ class DiffDump(UserList): for k, left, right in self: repstr += ( f'({k},\n' - f'\t{repr(left)},\n' - f'\t{repr(right)},\n' + f' |_{repr(left)},\n' + f' |_{repr(right)},\n' ')\n' ) repstr += ']\n' @@ -144,15 +144,22 @@ def pformat( field_indent=indent + field_indent, ) - else: # the `pprint` recursion-safe format: + else: + val_str: str = repr(v) + + # XXX LOL, below just seems to be f#$%in causing + # recursion errs.. + # + # the `pprint` recursion-safe format: # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr - try: - val_str: str = saferepr(v) - except Exception: - log.exception( - 'Failed to `saferepr({type(struct)})` !?\n' - ) - return _Struct.__repr__(struct) + # try: + # val_str: str = saferepr(v) + # except Exception: + # log.exception( + # 'Failed to `saferepr({type(struct)})` !?\n' + # ) + # raise + # return _Struct.__repr__(struct) # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') @@ -203,12 +210,7 @@ class Struct( return sin_props pformat = pformat - # __repr__ = pformat - # __str__ = __repr__ = pformat - # TODO: use a pprint.PrettyPrinter instance around ONLY rendering - # inside a known tty? - # def __repr__(self) -> str: - # ... + def __repr__(self) -> str: try: return pformat(self) @@ -218,6 +220,13 @@ class Struct( ) return _Struct.__repr__(self) + # __repr__ = pformat + # __str__ = __repr__ = pformat + # TODO: use a pprint.PrettyPrinter instance around ONLY rendering + # inside a known tty? + # def __repr__(self) -> str: + # ... + def copy( self, update: dict | None = None, @@ -267,13 +276,15 @@ class Struct( fi.type(getattr(self, fi.name)), ) + # TODO: make a mod func instead and just point to it here for + # method impl? def __sub__( self, other: Struct, ) -> DiffDump[tuple[str, Any, Any]]: ''' - Compare fields/items key-wise and return a ``DiffDump`` + Compare fields/items key-wise and return a `DiffDump` for easy visual REPL comparison B) ''' @@ -290,3 +301,42 @@ class Struct( )) return diffs + + @classmethod + def fields_diff( + cls, + other: dict|Struct, + + ) -> DiffDump[tuple[str, Any, Any]]: + ''' + Very similar to `PrettyStruct.__sub__()` except accepts an + input `other: dict` (presumably that would normally be called + like `Struct(**other)`) which returns a `DiffDump` of the + fields of the struct and the `dict`'s fields. + + ''' + nullish = object() + consumed: dict = other.copy() + diffs: DiffDump[tuple[str, Any, Any]] = DiffDump() + for fi in structs.fields(cls): + field_name: str = fi.name + # ours: Any = getattr(self, field_name) + theirs: Any = consumed.pop(field_name, nullish) + if theirs is nullish: + diffs.append(( + field_name, + f'{fi.type!r}', + 'NOT-DEFINED in `other: dict`', + )) + + # when there are lingering fields in `other` that this struct + # DOES NOT define we also append those. + if consumed: + for k, v in consumed.items(): + diffs.append(( + k, + f'NOT-DEFINED for `{cls.__name__}`', + f'`other: dict` has value = {v!r}', + )) + + return diffs diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index d1451b4c..08b1ed25 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -18,11 +18,16 @@ Infection apis for ``asyncio`` loops running ``trio`` using guest mode. ''' +from __future__ import annotations import asyncio -from asyncio.exceptions import CancelledError +from asyncio.exceptions import ( + CancelledError, +) from contextlib import asynccontextmanager as acm from dataclasses import dataclass import inspect +import platform +import traceback from typing import ( Any, Callable, @@ -30,29 +35,85 @@ from typing import ( Awaitable, ) -import trio -from outcome import Error - -from tractor.log import get_logger -from tractor._state import ( - current_actor, - debug_mode, +import tractor +from tractor._exceptions import ( + InternalError, + is_multi_cancelled, + TrioTaskExited, + TrioCancelled, + AsyncioTaskExited, + AsyncioCancelled, ) +from tractor._state import ( + debug_mode, + _runtime_vars, +) +from tractor._context import Unresolved from tractor.devx import _debug -from tractor._exceptions import AsyncioCancelled +from tractor.log import ( + get_logger, + StackLevelAdapter, +) +# TODO, wite the equiv of `trio.abc.Channel` but without attrs.. +# -[ ] `trionics.chan_types.ChanStruct` maybe? +# from tractor.msg import ( +# pretty_struct, +# ) from tractor.trionics._broadcast import ( broadcast_receiver, BroadcastReceiver, ) +import trio +from outcome import ( + Error, + Outcome, +) -log = get_logger(__name__) +log: StackLevelAdapter = get_logger(__name__) -__all__ = ['run_task', 'run_as_asyncio_guest'] +__all__ = [ + 'run_task', + 'run_as_asyncio_guest', +] +if (_py_313 := ( + ('3', '13') + == + platform.python_version_tuple()[:-1] + ) +): + # 3.13+ only.. lel. + # https://docs.python.org/3.13/library/asyncio-queue.html#asyncio.QueueShutDown + from asyncio import ( + QueueShutDown, + ) +else: + QueueShutDown = False + + +# TODO, generally speaking we can generalize this abstraction, a "SC linked +# parent->child task pair", as the same "supervision scope primitive" +# **that is** our `._context.Context` with the only difference being +# in how the tasks conduct msg-passing comms. +# +# For `LinkedTaskChannel` we are passing the equivalent of (once you +# include all the recently added `._trio/aio_to_raise` +# exd-as-signals) our SC-dialog-proto over each asyncIO framework's +# mem-chan impl, +# +# verus in `Context` +# +# We are doing the same thing but msg-passing comms happens over an +# IPC transport between tasks in different memory domains. @dataclass -class LinkedTaskChannel(trio.abc.Channel): +class LinkedTaskChannel( + trio.abc.Channel, + + # XXX LAME! meta-base conflict.. + # pretty_struct.Struct, +): ''' A "linked task channel" which allows for two-way synchronized msg passing between a ``trio``-in-guest-mode task and an ``asyncio`` @@ -61,43 +122,135 @@ class LinkedTaskChannel(trio.abc.Channel): ''' _to_aio: asyncio.Queue _from_aio: trio.MemoryReceiveChannel - _to_trio: trio.MemorySendChannel + _to_trio: trio.MemorySendChannel _trio_cs: trio.CancelScope + _trio_task: trio.Task _aio_task_complete: trio.Event + + _suppress_graceful_exits: bool = True + + _trio_err: BaseException|None = None + _trio_to_raise: ( + AsyncioTaskExited| # aio task exits while trio ongoing + AsyncioCancelled| # aio task is (self-)cancelled + BaseException| + None + ) = None _trio_exited: bool = False - # set after ``asyncio.create_task()`` + # set after `asyncio.create_task()` _aio_task: asyncio.Task|None = None _aio_err: BaseException|None = None + _aio_to_raise: ( + TrioTaskExited| # trio task exits while aio ongoing + BaseException| + None + ) = None + # _aio_first: Any|None = None # TODO? + _aio_result: Any|Unresolved = Unresolved + + def _final_result_is_set(self) -> bool: + return self._aio_result is not Unresolved + + # TODO? equiv from `Context`? + # @property + # def has_outcome(self) -> bool: + # return ( + # bool(self.maybe_error) + # or + # self._final_result_is_set() + # ) + + async def wait_for_result( + self, + hide_tb: bool = True, + + ) -> Any: + ''' + Wait for the `asyncio.Task.result()` from `trio` + + ''' + __tracebackhide__: bool = hide_tb + assert self._portal, ( + '`Context.wait_for_result()` can not be called from callee side!' + ) + if self._final_result_is_set(): + return self._aio_result + + async with translate_aio_errors( + chan=self, + wait_aio_task=False, + ): + await self._aio_task_complete.wait() + + if ( + not self._final_result_is_set() + ): + if (trio_to_raise := self._trio_to_raise): + raise trio_to_raise from self._aio_err + + elif aio_err := self._aio_err: + raise aio_err + + else: + raise InternalError( + f'Asyncio-task has no result or error set !?\n' + f'{self._aio_task}' + ) + + return self._aio_result + _broadcaster: BroadcastReceiver|None = None async def aclose(self) -> None: await self._from_aio.aclose() + def started( + self, + val: Any = None, + ) -> None: + self._aio_started_val = val + return self._to_trio.send_nowait(val) + + # TODO, mk this side-agnostic? + # + # -[ ] add private meths for both sides and dynamically + # determine which to use based on task-type read at calltime? + # -[ ] `._recv_trio()`: receive to trio<-asyncio + # -[ ] `._send_trio()`: send from trio->asyncio + # -[ ] `._recv_aio()`: send from asyncio->trio + # -[ ] `._send_aio()`: receive to asyncio<-trio + # + # -[ ] pass the instance to the aio side instead of the separate + # per-side chan types? + # async def receive(self) -> Any: - async with translate_aio_errors( - self, - - # XXX: obviously this will deadlock if an on-going stream is - # being procesed. - # wait_on_aio_task=False, - ): + ''' + Receive a value from the paired `asyncio.Task` with + exception/cancel handling to teardown both sides on any + unexpected error. + ''' + try: # TODO: do we need this to guarantee asyncio code get's # cancelled in the case where the trio side somehow creates # a state where the asyncio cycle-task isn't getting the # cancel request sent by (in theory) the last checkpoint # cycle on the trio side? # await trio.lowlevel.checkpoint() - return await self._from_aio.receive() + except BaseException as err: + async with translate_aio_errors( + chan=self, + # NOTE, determined by `open_channel_from()` input arg + suppress_graceful_exits=self._suppress_graceful_exits, - async def wait_asyncio_complete(self) -> None: - await self._aio_task_complete.wait() - - # def cancel_asyncio_task(self) -> None: - # self._aio_task.cancel() + # XXX: obviously this will deadlock if an on-going stream is + # being procesed. + # wait_on_aio_task=False, + ): + raise err async def send(self, item: Any) -> None: ''' @@ -108,6 +261,18 @@ class LinkedTaskChannel(trio.abc.Channel): ''' self._to_aio.put_nowait(item) + # TODO? needed? + # async def wait_aio_complete(self) -> None: + # await self._aio_task_complete.wait() + + def cancel_asyncio_task( + self, + msg: str = '', + ) -> None: + self._aio_task.cancel( + msg=msg, + ) + def closed(self) -> bool: return self._from_aio._closed # type: ignore @@ -147,21 +312,23 @@ class LinkedTaskChannel(trio.abc.Channel): def _run_asyncio_task( - func: Callable, *, qsize: int = 1, provide_channels: bool = False, + suppress_graceful_exits: bool = True, + hide_tb: bool = False, **kwargs, ) -> LinkedTaskChannel: ''' - Run an ``asyncio`` async function or generator in a task, return - or stream the result back to ``trio``. + Run an `asyncio`-compat async function or generator in a task, + return or stream the result back to the caller + `trio.lowleve.Task`. ''' - __tracebackhide__ = True - if not current_actor().is_infected_aio(): + __tracebackhide__: bool = hide_tb + if not tractor.current_actor().is_infected_aio(): raise RuntimeError( "`infect_asyncio` mode is not enabled!?" ) @@ -172,7 +339,6 @@ def _run_asyncio_task( to_trio, from_aio = trio.open_memory_channel(qsize) # type: ignore args = tuple(inspect.getfullargspec(func).args) - if getattr(func, '_tractor_steam_function', None): # the assumption is that the target async routine accepts the # send channel then it intends to yield more then one return @@ -191,37 +357,39 @@ def _run_asyncio_task( coro = func(**kwargs) - cancel_scope = trio.CancelScope() + trio_task: trio.Task = trio.lowlevel.current_task() + trio_cs = trio.CancelScope() aio_task_complete = trio.Event() - aio_err: BaseException|None = None chan = LinkedTaskChannel( - aio_q, # asyncio.Queue - from_aio, # recv chan - to_trio, # send chan - - cancel_scope, - aio_task_complete, + _to_aio=aio_q, # asyncio.Queue + _from_aio=from_aio, # recv chan + _to_trio=to_trio, # send chan + _trio_cs=trio_cs, + _trio_task=trio_task, + _aio_task_complete=aio_task_complete, + _suppress_graceful_exits=suppress_graceful_exits, ) async def wait_on_coro_final_result( - to_trio: trio.MemorySendChannel, coro: Awaitable, aio_task_complete: trio.Event, ) -> None: ''' - Await ``coro`` and relay result back to ``trio``. + Await input `coro` as/in an `asyncio.Task` and deliver final + `return`-ed result back to `trio`. ''' - nonlocal aio_err nonlocal chan orig = result = id(coro) try: - result = await coro + result: Any = await coro + chan._aio_result = result except BaseException as aio_err: + chan._aio_err = aio_err if isinstance(aio_err, CancelledError): log.runtime( '`asyncio` task was cancelled..\n' @@ -230,35 +398,70 @@ def _run_asyncio_task( log.exception( '`asyncio` task errored\n' ) - chan._aio_err = aio_err raise - else: if ( - result != orig and - aio_err is None and + result != orig + and + chan._aio_err is None + and - # in the ``open_channel_from()`` case we don't + # in the `open_channel_from()` case we don't # relay through the "return value". not provide_channels ): to_trio.send_nowait(result) finally: - # if the task was spawned using ``open_channel_from()`` + # if the task was spawned using `open_channel_from()` # then we close the channels on exit. if provide_channels: + # breakpoint() # TODO! why no work!? + # import pdbp; pdbp.set_trace() + + # IFF there is a blocked trio waiter, we set the + # aio-side error to be an explicit "exited early" + # (much like a `Return` in our SC IPC proto) for the + # `.open_channel_from()` case where the parent trio + # task might not wait directly for a final returned + # result (i.e. the trio side might be waiting on + # a streamed value) - this is a signal that the + # asyncio.Task has returned early! + # + # TODO, solve other cases where trio side might, + # - raise Cancelled but aio side exits on next tick. + # - raise error but aio side exits on next tick. + # - raise error and aio side errors "independently" + # on next tick (SEE draft HANDLER BELOW). + stats: trio.MemoryChannelStatistics = to_trio.statistics() + if ( + stats.tasks_waiting_receive + and + not chan._aio_err + ): + chan._trio_to_raise = AsyncioTaskExited( + f'Task exited with final result: {result!r}\n' + ) + # only close the sender side which will relay - # a ``trio.EndOfChannel`` to the trio (consumer) side. + # a `trio.EndOfChannel` to the trio (consumer) side. to_trio.close() aio_task_complete.set() - log.runtime(f'`asyncio` task: {task.get_name()} is complete') + log.runtime( + f'`asyncio` task completed\n' + f')>\n' + f' |_{task}\n' + ) # start the asyncio task we submitted from trio if not inspect.isawaitable(coro): - raise TypeError(f"No support for invoking {coro}") + raise TypeError( + f'Pass the async-fn NOT a coroutine\n' + f'{coro!r}' + ) + # schedule the (bg) `asyncio.Task` task: asyncio.Task = asyncio.create_task( wait_on_coro_final_result( to_trio, @@ -266,10 +469,13 @@ def _run_asyncio_task( aio_task_complete ) ) - chan._aio_task = task + chan._aio_task: asyncio.Task = task # XXX TODO XXX get this actually workin.. XD - # maybe setup `greenback` for `asyncio`-side task REPLing + # -[ ] we need logic to setup `greenback` for `asyncio`-side task + # REPLing.. which should normally be nearly the same as for + # `trio`? + # -[ ] add to a new `.devx._greenback.maybe_init_for_asyncio()`? if ( debug_mode() and @@ -278,173 +484,648 @@ def _run_asyncio_task( raise_not_found=False, )) ): + log.info( + f'Bestowing `greenback` portal for `asyncio`-task\n' + f'{task}\n' + ) greenback.bestow_portal(task) - def cancel_trio(task: asyncio.Task) -> None: + def signal_trio_when_done( + task: asyncio.Task, + ) -> None: ''' - Cancel the calling ``trio`` task on error. + Maybe-cancel, relay-and-raise an error to, OR pack a final + `return`-value for the parent (in SC terms) `trio.Task` on + completion of the `asyncio.Task`. + + Note for certain "edge" scheduling-race-conditions we allow + the aio side to dictate dedicated `tractor`-defined excs to + be raised in the `trio` parent task; the intention is to + indicate those races in a VERY pedantic manner! ''' nonlocal chan - aio_err = chan._aio_err - task_err: BaseException|None = None + trio_err: BaseException|None = chan._trio_err - # only to avoid ``asyncio`` complaining about uncaptured + # XXX, since the original error we read from the asyncio.Task + # might change between BEFORE and AFTER we here call + # `asyncio.Task.result()` + # + # -> THIS is DUE TO US in `translate_aio_errors()`! + # + # => for example we might set a special exc + # (`AsyncioCancelled|AsyncioTaskExited`) meant to be raised + # in trio (and maybe absorbed depending on the called API) + # BEFORE this done-callback is invoked by `asyncio`'s + # runtime. + trio_to_raise: BaseException|None = chan._trio_to_raise + orig_aio_err: BaseException|None = chan._aio_err + aio_err: BaseException|None = None + + # only to avoid `asyncio` complaining about uncaptured # task exceptions try: - task.exception() - except BaseException as terr: - task_err = terr + res: Any = task.result() + log.info( + f'`trio` received final result from `asyncio` task,\n' + f')> {res}\n' + f' |_{task}\n' + ) + if not chan._aio_result: + chan._aio_result = res + + # ?TODO, should we also raise `AsyncioTaskExited[res]` + # in any case where trio is NOT blocking on the + # `._to_trio` chan? + # + # -> ?NO RIGHT? since the + # `open_channel_from().__aexit__()` should detect this + # and then set any final `res` from above as a field + # that can optionally be read by the trio-paren-task as + # needed (just like in our + # `Context.wait_for_result()/.result` API yah? + # + # if provide_channels: + + except BaseException as _aio_err: + aio_err: BaseException = _aio_err + + # READ AGAIN, AFTER the `asyncio` side errors, in case + # it was cancelled due to an error from `trio` (or + # some other out of band exc) and then set to something + # else? + curr_aio_err: BaseException|None = chan._aio_err + + # always true right? + assert ( + type(aio_err) + is type(orig_aio_err) + is type(curr_aio_err) + ), ( + f'`asyncio`-side task errors mismatch?!?\n\n' + f'(caught) aio_err: {aio_err}\n' + f'ORIG chan._aio_err: {orig_aio_err}\n' + f'chan._aio_err: {curr_aio_err}\n' + ) msg: str = ( - 'Infected `asyncio` task {etype_str}\n' - f'|_{task}\n' + '`trio`-side reports that the `asyncio`-side ' + '{etype_str}\n' + # ^NOTE filled in below ) - if isinstance(terr, CancelledError): + if isinstance(aio_err, CancelledError): + msg += ( + f'c)>\n' + f' |_{task}\n' + ) log.cancel( msg.format(etype_str='cancelled') ) - else: - log.exception( - msg.format(etype_str='cancelled') + + # XXX when the asyncio.Task exits early (before the trio + # side) we relay through an exc-as-signal which is + # normally suppressed unless the trio.Task also errors + # + # ?TODO, is this even needed (does it happen) now? + elif ( + _py_313 + and + isinstance(aio_err, QueueShutDown) + ): + # import pdbp; pdbp.set_trace() + trio_err = AsyncioTaskExited( + 'Task exited before `trio` side' + ) + if not chan._trio_err: + chan._trio_err = trio_err + + msg += ( + f')>\n' + f' |_{task}\n' + ) + log.info( + msg.format(etype_str='exited') ) - assert type(terr) is type(aio_err), ( - '`asyncio` task error mismatch?!?' - ) + else: + msg += ( + f'x)>\n' + f' |_{task}\n' + ) + log.exception( + msg.format(etype_str='errored') + ) - if aio_err is not None: - # XXX: uhh is this true? - # assert task_err, f'Asyncio task {task.get_name()} discrepancy!?' + # is trio the src of the aio task's exc-as-outcome? + trio_err: BaseException|None = chan._trio_err + curr_aio_err: BaseException|None = chan._aio_err + if ( + curr_aio_err + or + trio_err + or + trio_to_raise + ): + # XXX, if not already, ALWAYs cancel the trio-side on an + # aio-side error or early return. In the case where the trio task is + # blocking on a checkpoint or `asyncio.Queue.get()`. # NOTE: currently mem chan closure may act as a form - # of error relay (at least in the ``asyncio.CancelledError`` - # case) since we have no way to directly trigger a ``trio`` + # of error relay (at least in the `asyncio.CancelledError` + # case) since we have no way to directly trigger a `trio` # task error without creating a nursery to throw one. # We might want to change this in the future though. from_aio.close() - if task_err is None: - assert aio_err - aio_err.with_traceback(aio_err.__traceback__) - # log.error( - # 'infected task errorred' - # ) + if ( + not trio_cs.cancelled_caught + or + not trio_cs.cancel_called + ): + log.cancel( + f'Cancelling `trio` side due to aio-side src exc\n' + f'{curr_aio_err}\n' + f'\n' + f'(c>\n' + f' |_{trio_task}\n' + ) + trio_cs.cancel() - # TODO: show that the cancellation originated - # from the ``trio`` side? right? - # elif type(aio_err) is CancelledError: - # log.cancel( - # 'infected task was cancelled' - # ) + # maybe the `trio` task errored independent from the + # `asyncio` one and likely in between + # a guest-run-sched-tick. + # + # The obvious ex. is where one side errors during + # the current tick and then the other side immediately + # errors before its next checkpoint; i.e. the 2 errors + # are "independent". + # + # "Independent" here means in the sense that neither task + # was the explicit cause of the other side's exception + # according to our `tractor.to_asyncio` SC API's error + # relaying mechanism(s); the error pair is *possibly + # due-to* but **not necessarily** inter-related by some + # (subsys) state between the tasks, + # + # NOTE, also see the `test_trio_prestarted_task_bubbles` + # for reproducing detailed edge cases as per the above + # cases. + # + trio_to_raise: AsyncioCancelled|AsyncioTaskExited = chan._trio_to_raise + aio_to_raise: TrioTaskExited|TrioCancelled = chan._aio_to_raise + if ( + not chan._aio_result + and + not trio_cs.cancelled_caught + and ( + (aio_err and type(aio_err) not in { + asyncio.CancelledError + }) + or + aio_to_raise + ) + and ( + ((trio_err := chan._trio_err) and type(trio_err) not in { + trio.Cancelled, + }) + or + trio_to_raise + ) + ): + eg = ExceptionGroup( + 'Both the `trio` and `asyncio` tasks errored independently!!\n', + ( + trio_to_raise or trio_err, + aio_to_raise or aio_err, + ), + ) + # chan._trio_err = eg + # chan._aio_err = eg + raise eg - # if cancel_scope.cancelled: - # raise aio_err from err + elif aio_err: + # XXX raise any `asyncio` side error IFF it doesn't + # match the one we just caught from the task above! + # (that would indicate something weird/very-wrong + # going on?) + if ( + aio_err is not trio_to_raise + and ( + not suppress_graceful_exits + and ( + chan._aio_result is not Unresolved + and + isinstance(trio_to_raise, AsyncioTaskExited) + ) + ) + ): + # raise aio_err from relayed_aio_err + raise trio_to_raise from curr_aio_err - # XXX: alway cancel the scope on error - # in case the trio task is blocking - # on a checkpoint. - cancel_scope.cancel() + raise aio_err - # raise any ``asyncio`` side error. - raise aio_err - - task.add_done_callback(cancel_trio) + task.add_done_callback(signal_trio_when_done) return chan @acm async def translate_aio_errors( - chan: LinkedTaskChannel, wait_on_aio_task: bool = False, + cancel_aio_task_on_trio_exit: bool = True, + suppress_graceful_exits: bool = True, + + hide_tb: bool = True, ) -> AsyncIterator[None]: ''' - Error handling context around ``asyncio`` task spawns which + An error handling to cross-loop propagation context around + `asyncio.Task` spawns via one of this module's APIs: + + - `open_channel_from()` + - `run_task()` + appropriately translates errors and cancels into ``trio`` land. ''' + __tracebackhide__: bool = hide_tb + trio_task = trio.lowlevel.current_task() - - aio_err: BaseException|None = None - - # TODO: make thisi a channel method? - def maybe_raise_aio_err( - err: Exception|None = None - ) -> None: - aio_err = chan._aio_err - if ( - aio_err is not None and - type(aio_err) != CancelledError - ): - # always raise from any captured asyncio error - if err: - raise aio_err from err - else: - raise aio_err - - task = chan._aio_task - assert task + aio_err: BaseException|None = chan._aio_err + aio_task: asyncio.Task = chan._aio_task + aio_done_before_trio: bool = aio_task.done() + assert aio_task + trio_err: BaseException|None = None try: - yield + yield # back to one of the cross-loop apis + except trio.Cancelled as taskc: + trio_err = taskc + chan._trio_err = trio_err - except ( - trio.Cancelled, - ): - # relay cancel through to called ``asyncio`` task + # should NEVER be the case that `trio` is cancel-handling + # BEFORE the other side's task-ref was set!? assert chan._aio_task - chan._aio_task.cancel( - msg=f'the `trio` caller task was cancelled: {trio_task.name}' + + # import pdbp; pdbp.set_trace() # lolevel-debug + + # relay cancel through to called `asyncio` task + chan._aio_to_raise = TrioCancelled( + f'trio`-side cancelled the `asyncio`-side,\n' + f'c)>\n' + f' |_{trio_task}\n' + f'\n' + f'trio src exc: {trio_err!r}\n' ) - raise + # XXX NOTE XXX seems like we can get all sorts of unreliable + # behaviour from `asyncio` under various cancellation + # conditions (like SIGINT/kbi) when this is used.. + # SO FOR NOW, try to avoid it at most costs! + # + # aio_task.cancel( + # msg=f'the `trio` parent task was cancelled: {trio_task.name}' + # ) + # raise + + # XXX always passthrough EoC since this translator is often + # called from `LinkedTaskChannel.receive()` which we want + # passthrough and further we have no special meaning for it in + # terms of relaying errors or signals from the aio side! + except trio.EndOfChannel as eoc: + trio_err = chan._trio_err = eoc + raise eoc + + # NOTE ALSO SEE the matching note in the `cancel_trio()` asyncio + # task-done-callback. + # + # when the aio side is (possibly self-)cancelled it will close + # the `chan._to_trio` and thus trigger the trio side to raise + # a dedicated `AsyncioCancelled` except ( - # NOTE: see the note in the ``cancel_trio()`` asyncio task - # termination callback trio.ClosedResourceError, - # trio.BrokenResourceError, - ): + ) as cre: + chan._trio_err = cre aio_err = chan._aio_err - if ( - task.cancelled() and - type(aio_err) is CancelledError - ): - # if an underlying ``asyncio.CancelledError`` triggered this - # channel close, raise our (non-``BaseException``) wrapper - # error: ``AsyncioCancelled`` from that source error. - raise AsyncioCancelled from aio_err + # XXX if an underlying `asyncio.CancelledError` triggered + # this channel close, raise our (non-`BaseException`) wrapper + # exception (`AsyncioCancelled`) from that source error. + if ( + # aio-side is cancelled? + # |_ first not set until it terminates?? + aio_task.cancelled() + and + type(aio_err) is CancelledError + + # TODO, if we want suppression of the + # silent-exit-by-`trio` case? + # -[ ] the parent task can also just catch it though? + # -[ ] OR, offer a `signal_aio_side_on_exit=True` ?? + ): + # await tractor.pause(shield=True) + chan._trio_to_raise = AsyncioCancelled( + f'asyncio`-side cancelled the `trio`-side,\n' + f'c(>\n' + f' |_{aio_task}\n\n' + + f'(triggered on the `trio`-side by a {cre!r})\n' + ) + # TODO?? needed or does this just get reraised in the + # `finally:` block below? + # raise to_raise_trio from aio_err + + # maybe the chan-closure is due to something else? else: - raise + raise cre + + except BaseException as _trio_err: + trio_err = chan._trio_err = _trio_err + # await tractor.pause(shield=True) # workx! + entered: bool = await _debug._maybe_enter_pm( + trio_err, + api_frame=inspect.currentframe(), + ) + if ( + not entered + and + not is_multi_cancelled(trio_err) + ): + log.exception( + '`trio`-side task errored?' + ) + # __tracebackhide__: bool = False + + # TODO, just a log msg here indicating the scope closed + # and that the trio-side expects that and what the final + # result from the aio side was? + # + # if isinstance(chan._aio_err, AsyncioTaskExited): + # await tractor.pause(shield=True) + + # if aio side is still active cancel it due to the trio-side + # error! + # ?TODO, mk `AsyncioCancelled[typeof(trio_err)]` embed the + # current exc? + if ( + # not aio_task.cancelled() + # and + not aio_task.done() # TODO? only need this one? + + # XXX LOL, so if it's not set it's an error !? + # yet another good jerb by `ascyncio`.. + # and + # not aio_task.exception() + ): + aio_taskc = TrioCancelled( + f'The `trio`-side task crashed!\n' + f'{trio_err}' + ) + # ??TODO? move this into the func that tries to use + # `Task._fut_waiter: Future` instead?? + # + # aio_task.set_exception(aio_taskc) + # wait_on_aio_task = False + try: + aio_task.set_exception(aio_taskc) + except ( + asyncio.InvalidStateError, + RuntimeError, + # ^XXX, uhh bc apparently we can't use `.set_exception()` + # any more XD .. ?? + ): + wait_on_aio_task = False finally: + # record wtv `trio`-side error transpired + if trio_err: + assert chan._trio_err is trio_err + # if chan._trio_err is not trio_err: + # await tractor.pause(shield=True) + + ya_trio_exited: bool = chan._trio_exited + graceful_trio_exit: bool = ( + ya_trio_exited + and + not chan._trio_err # XXX CRITICAL, `asyncio.Task.cancel()` is cucked man.. + ) + + # XXX NOTE! XXX by default always cancel the `asyncio` task if + # we've made it this far and it's not done. + # TODO, how to detect if there's an out-of-band error that + # caused the exit? if ( - # NOTE: always cancel the ``asyncio`` task if we've made it - # this far and it's not done. - not task.done() and aio_err + not aio_task.done() + and ( + cancel_aio_task_on_trio_exit + # and + # chan._aio_err # TODO, if it's not .done() is this possible? - # or the trio side has exited it's surrounding cancel scope - # indicating the lifetime of the ``asyncio``-side task - # should also be terminated. - or chan._trio_exited - ): - log.runtime( - f'Cancelling `asyncio`-task: {task.get_name()}' + # did the `.open_channel_from()` parent caller already + # (gracefully) exit scope before this translator was + # invoked? + # => since we couple the lifetime of the `asyncio.Task` + # to the `trio` parent task, it should should also be + # terminated via either, + # + # 1. raising an explicit `TrioTaskExited|TrioCancelled` + # in task via `asyncio.Task._fut_waiter.set_exception()` + # + # 2. or (worst case) by cancelling the aio task using + # the std-but-never-working `asyncio.Task.cancel()` + # (which i can't figure out why that nor + # `Task.set_exception()` seem to never ever do the + # rignt thing! XD). + or + graceful_trio_exit + ) + ): + report: str = ( + 'trio-side exited silently!' + ) + assert not chan._aio_err, ( + 'WTF why duz asyncio have err but not dun?!' ) - # assert not aio_err, 'WTF how did asyncio do this?!' - task.cancel() - # Required to sync with the far end ``asyncio``-task to ensure - # any error is captured (via monkeypatching the - # ``channel._aio_err``) before calling ``maybe_raise_aio_err()`` - # below! + # if the `trio.Task` terminated without raising + # `trio.Cancelled` (curently handled above) there's + # 2 posibilities, + # + # i. it raised a `trio_err` + # ii. it did a "silent exit" where the + # `open_channel_from().__aexit__()` phase ran without + # any raise or taskc (task cancel) and no final result + # was collected (yet) from the aio side. + # + # SO, ensure the asyncio-side is notified and terminated + # by a dedicated exc-as-signal which distinguishes + # various aio-task-state at termination cases. + # + # Consequently if the aio task doesn't absorb said + # exc-as-signal, the trio side should then see the same exc + # propagate up through the .open_channel_from() call to + # the parent task. + # + # if the `trio.Task` already exited (only can happen for + # the `open_channel_from()` use case) block due to to + # either plain ol' graceful `__aexit__()` or due to taskc + # or an error, we ensure the aio-side gets signalled via + # an explicit exception and its `Queue` is shutdown. + if ya_trio_exited: + # XXX py3.13+ ONLY.. + # raise `QueueShutDown` on next `Queue.get/put()` + if _py_313: + chan._to_aio.shutdown() + + # pump this event-loop (well `Runner` but ya) + # + # TODO? is this actually needed? + # -[ ] theory is this let's the aio side error on + # next tick and then we sync task states from + # here onward? + await trio.lowlevel.checkpoint() + + # TODO? factor the next 2 branches into a func like + # `try_terminate_aio_task()` and use it for the taskc + # case above as well? + fut: asyncio.Future|None = aio_task._fut_waiter + if ( + fut + and + not fut.done() + ): + # await tractor.pause() + if graceful_trio_exit: + fut.set_exception( + TrioTaskExited( + f'the `trio.Task` gracefully exited but ' + f'its `asyncio` peer is not done?\n' + f')>\n' + f' |_{trio_task}\n' + f'\n' + f'>>\n' + f' |_{aio_task!r}\n' + ) + ) + + # TODO? should this need to exist given the equiv + # `TrioCancelled` equivalent in the be handler + # above?? + else: + fut.set_exception( + TrioTaskExited( + f'The `trio`-side task crashed!\n' + f'{trio_err}' + ) + ) + else: + aio_taskc_warn: str = ( + f'\n' + f'MANUALLY Cancelling `asyncio`-task: {aio_task.get_name()}!\n\n' + f'**THIS CAN SILENTLY SUPPRESS ERRORS FYI\n\n' + ) + # await tractor.pause() + report += aio_taskc_warn + # TODO XXX, figure out the case where calling this makes the + # `test_infected_asyncio.py::test_trio_closes_early_and_channel_exits` + # hang and then don't call it in that case! + # + aio_task.cancel(msg=aio_taskc_warn) + + log.warning(report) + + # sync with the `asyncio.Task`'s completion to ensure any + # error is captured and relayed (via + # `channel._aio_err/._trio_to_raise`) BEFORE calling + # `maybe_raise_aio_side_err()` below! + # + # XXX WARNING NOTE + # the `task.set_exception(aio_taskc)` call above MUST NOT + # EXCEPT or this WILL HANG!! SO, if you get a hang maybe step + # through and figure out why it erroed out up there! + # if wait_on_aio_task: await chan._aio_task_complete.wait() + log.info( + 'asyncio-task is done and unblocked trio-side!\n' + ) - # NOTE: if any ``asyncio`` error was caught, raise it here inline - # here in the ``trio`` task - maybe_raise_aio_err() + # NOTE, was a `maybe_raise_aio_side_err()` closure that + # i moved inline BP + ''' + Raise any `trio`-side-caused cancellation or legit task + error normally propagated from the caller of either, + - `open_channel_from()` + - `run_task()` + + ''' + aio_err: BaseException|None = chan._aio_err + trio_to_raise: ( + AsyncioCancelled| + AsyncioTaskExited| + None + ) = chan._trio_to_raise + + if not suppress_graceful_exits: + raise trio_to_raise from (aio_err or trio_err) + + if trio_to_raise: + match ( + trio_to_raise, + trio_err, + ): + case ( + AsyncioTaskExited(), + trio.Cancelled()| + None, + ): + log.info( + 'Ignoring aio exit signal since trio also exited!' + ) + return + + case ( + AsyncioTaskExited(), + trio.EndOfChannel(), + ): + raise trio_err + + case ( + AsyncioCancelled(), + trio.Cancelled(), + ): + if not aio_done_before_trio: + log.info( + 'Ignoring aio cancelled signal since trio was also cancelled!' + ) + return + case _: + raise trio_to_raise from (aio_err or trio_err) + + # Check if the asyncio-side is the cause of the trio-side + # error. + elif ( + aio_err is not None + and + type(aio_err) is not AsyncioCancelled + ): + # always raise from any captured asyncio error + if trio_err: + raise trio_err from aio_err + + # XXX NOTE! above in the `trio.ClosedResourceError` + # handler we specifically set the + # `aio_err = AsyncioCancelled` such that it is raised + # as that special exc here! + raise aio_err + + if trio_err: + raise trio_err + + # ^^TODO?? case where trio_err is not None and + # aio_err is AsyncioTaskExited => raise eg! + # -[x] maybe use a match bc this get's real + # complex fast XD + # => i did this above for silent exit cases ya? async def run_task( @@ -456,12 +1137,12 @@ async def run_task( ) -> Any: ''' - Run an ``asyncio`` async function or generator in a task, return - or stream the result back to ``trio``. + Run an `asyncio`-compat async function or generator in a task, + return or stream the result back to `trio`. ''' # simple async func - chan = _run_asyncio_task( + chan: LinkedTaskChannel = _run_asyncio_task( func, qsize=1, **kwargs, @@ -470,20 +1151,24 @@ async def run_task( async with translate_aio_errors( chan, wait_on_aio_task=True, + suppress_graceful_exits=chan._suppress_graceful_exits, ): # return single value that is the output from the - # ``asyncio`` function-as-task. Expect the mem chan api to - # do the job of handling cross-framework cancellations + # ``asyncio`` function-as-task. Expect the mem chan api + # to do the job of handling cross-framework cancellations # / errors via closure and translation in the - # ``translate_aio_errors()`` in the above ctx mngr. - return await chan.receive() + # `translate_aio_errors()` in the above ctx mngr. + + return await chan._from_aio.receive() + # return await chan.receive() @acm async def open_channel_from( target: Callable[..., Any], - **kwargs, + suppress_graceful_exits: bool = True, + **target_kwargs, ) -> AsyncIterator[Any]: ''' @@ -491,35 +1176,182 @@ async def open_channel_from( spawned ``asyncio`` task and ``trio``. ''' - chan = _run_asyncio_task( + chan: LinkedTaskChannel = _run_asyncio_task( target, qsize=2**8, provide_channels=True, - **kwargs, + suppress_graceful_exits=suppress_graceful_exits, + **target_kwargs, ) + # TODO, tuple form here? async with chan._from_aio: async with translate_aio_errors( chan, wait_on_aio_task=True, + suppress_graceful_exits=suppress_graceful_exits, ): # sync to a "started()"-like first delivered value from the # ``asyncio`` task. try: - with chan._trio_cs: + with (cs := chan._trio_cs): first = await chan.receive() # deliver stream handle upward yield first, chan + except trio.Cancelled as taskc: + # await tractor.pause(shield=True) # ya it worx ;) + if cs.cancel_called: + if isinstance(chan._trio_to_raise, AsyncioCancelled): + log.cancel( + f'trio-side was manually cancelled by aio side\n' + f'|_c>}}{cs!r}?\n' + ) + # TODO, maybe a special `TrioCancelled`??? + + raise taskc + finally: chan._trio_exited = True - chan._to_trio.close() + + # when the aio side is still ongoing but trio exits + # early we signal with a special exc (kinda like + # a `Return`-msg for IPC ctxs) + aio_task: asyncio.Task = chan._aio_task + if not aio_task.done(): + fut: asyncio.Future|None = aio_task._fut_waiter + if fut: + fut.set_exception( + TrioTaskExited( + f'but the child `asyncio` task is still running?\n' + f'>>\n' + f' |_{aio_task!r}\n' + ) + ) + else: + # XXX SHOULD NEVER HAPPEN! + await tractor.pause() + else: + chan._to_trio.close() + + +class AsyncioRuntimeTranslationError(RuntimeError): + ''' + We failed to correctly relay runtime semantics and/or maintain SC + supervision rules cross-event-loop. + + ''' + + +def run_trio_task_in_future( + async_fn, + *args, +) -> asyncio.Future: + ''' + Run an async-func as a `trio` task from an `asyncio.Task` wrapped + in a `asyncio.Future` which is returned to the caller. + + Another astounding feat by the great @oremanj !! + + Bo + + ''' + result_future = asyncio.Future() + cancel_scope = trio.CancelScope() + finished: bool = False + + # monkey-patch the future's `.cancel()` meth to + # allow cancellation relay to `trio`-task. + cancel_message: str|None = None + orig_cancel = result_future.cancel + + def wrapped_cancel( + msg: str|None = None, + ): + nonlocal cancel_message + if finished: + # We're being called back after the task completed + if msg is not None: + return orig_cancel(msg) + elif cancel_message is not None: + return orig_cancel(cancel_message) + else: + return orig_cancel() + + if result_future.done(): + return False + + # Forward cancellation to the Trio task, don't mark + # future as cancelled until it completes + cancel_message = msg + cancel_scope.cancel() + return True + + result_future.cancel = wrapped_cancel + + async def trio_task() -> None: + nonlocal finished + try: + with cancel_scope: + try: + # TODO: type this with new tech in 3.13 + result: Any = await async_fn(*args) + finally: + finished = True + + # Propagate result or cancellation to the Future + if cancel_scope.cancelled_caught: + result_future.cancel() + + elif not result_future.cancelled(): + result_future.set_result(result) + + except BaseException as exc: + # the result future gets all the non-Cancelled + # exceptions. Any Cancelled need to keep propagating + # out of this stack frame in order to reach the cancel + # scope for which they're intended. + cancelled: BaseException|None + rest: BaseException|None + if isinstance(exc, BaseExceptionGroup): + cancelled, rest = exc.split(trio.Cancelled) + + elif isinstance(exc, trio.Cancelled): + cancelled, rest = exc, None + + else: + cancelled, rest = None, exc + + if not result_future.cancelled(): + if rest: + result_future.set_exception(rest) + else: + result_future.cancel() + + if cancelled: + raise cancelled + + trio.lowlevel.spawn_system_task( + trio_task, + name=async_fn, + ) + return result_future def run_as_asyncio_guest( - trio_main: Callable, + # ^-NOTE-^ when spawned with `infected_aio=True` this func is + # normally `Actor._async_main()` as is passed by some boostrap + # entrypoint like `._entry._trio_main()`. + + _sigint_loop_pump_delay: float = 0, ) -> None: + # ^-TODO-^ technically whatever `trio_main` returns.. we should + # try to use func-typevar-params at leaast by 3.13! + # -[ ] https://typing.readthedocs.io/en/latest/spec/callables.html#callback-protocols + # -[ ] https://peps.python.org/pep-0646/#using-type-variable-tuples-in-functions + # -[ ] https://typing.readthedocs.io/en/latest/spec/callables.html#unpack-for-keyword-arguments + # -[ ] https://peps.python.org/pep-0718/ ''' Entry for an "infected ``asyncio`` actor". @@ -532,64 +1364,263 @@ def run_as_asyncio_guest( # Uh, oh. # # :o - - # It looks like your event loop has caught a case of the ``trio``s. - - # :() - - # Don't worry, we've heard you'll barely notice. You might - # hallucinate a few more propagating errors and feel like your - # digestion has slowed but if anything get's too bad your parents - # will know about it. - + # + # looks like your stdlib event loop has caught a case of "the trios" ! + # + # :O + # + # Don't worry, we've heard you'll barely notice. + # # :) - + # + # You might hallucinate a few more propagating errors and feel + # like your digestion has slowed, but if anything get's too bad + # your parents will know about it. + # + # B) + # async def aio_main(trio_main): + ''' + Main `asyncio.Task` which calls + `trio.lowlevel.start_guest_run()` to "infect" the `asyncio` + event-loop by embedding the `trio` scheduler allowing us to + boot the `tractor` runtime and connect back to our parent. + ''' loop = asyncio.get_running_loop() - trio_done_fut = asyncio.Future() + trio_done_fute = asyncio.Future() + startup_msg: str = ( + 'Starting `asyncio` guest-loop-run\n' + '-> got running loop\n' + '-> built a `trio`-done future\n' + ) + # TODO: is this evern run or needed? + # -[ ] pretty sure it never gets run for root-infected-aio + # since this main task is always the parent of any + # eventual `open_root_actor()` call? if debug_mode(): - # XXX make it obvi we know this isn't supported yet! log.error( - 'Attempting to enter unsupported `greenback` init ' - 'from `asyncio` task..' + 'Attempting to enter non-required `greenback` init ' + 'from `asyncio` task ???' ) - await _debug.maybe_init_greenback( - force_reload=True, + # XXX make it obvi we know this isn't supported yet! + assert 0 + # await _debug.maybe_init_greenback( + # force_reload=True, + # ) + + def trio_done_callback(main_outcome: Outcome): + log.runtime( + f'`trio` guest-run finishing with outcome\n' + f'>) {main_outcome}\n' + f'|_{trio_done_fute}\n' ) - def trio_done_callback(main_outcome): - + # import pdbp; pdbp.set_trace() if isinstance(main_outcome, Error): - error = main_outcome.error - trio_done_fut.set_exception(error) + error: BaseException = main_outcome.error - # TODO: explicit asyncio tb? - # traceback.print_exception(error) - - # XXX: do we need this? - # actor.cancel_soon() + # show an dedicated `asyncio`-side tb from the error + tb_str: str = ''.join(traceback.format_exception(error)) + log.exception( + 'Guest-run errored!?\n\n' + f'{main_outcome}\n' + f'{error}\n\n' + f'{tb_str}\n' + ) + trio_done_fute.set_exception(error) + # raise inline main_outcome.unwrap() + else: - trio_done_fut.set_result(main_outcome) - log.runtime(f"trio_main finished: {main_outcome!r}") + trio_done_fute.set_result(main_outcome) + + log.info( + f'`trio` guest-run finished with,\n' + f')>\n' + f'|_{trio_done_fute}\n' + ) + + startup_msg += ( + f'-> created {trio_done_callback!r}\n' + f'-> scheduling `trio_main`: {trio_main!r}\n' + ) # start the infection: run trio on the asyncio loop in "guest mode" log.runtime( - 'Infecting `asyncio`-process with a `trio` guest-run of\n\n' - f'{trio_main!r}\n\n' - - f'{trio_done_callback}\n' + f'{startup_msg}\n\n' + + + 'Infecting `asyncio`-process with a `trio` guest-run!\n' ) + + # TODO, somehow bootstrap this! + _runtime_vars['_is_infected_aio'] = True + trio.lowlevel.start_guest_run( trio_main, run_sync_soon_threadsafe=loop.call_soon_threadsafe, done_callback=trio_done_callback, ) - # NOTE `.unwrap()` will raise on error - return (await trio_done_fut).unwrap() + fute_err: BaseException|None = None + try: + out: Outcome = await asyncio.shield(trio_done_fute) + # ^TODO still don't really understand why the `.shield()` + # is required ... ?? + # https://docs.python.org/3/library/asyncio-task.html#asyncio.shield + # ^ seems as though in combo with the try/except here + # we're BOLDLY INGORING cancel of the trio fute? + # + # I guess it makes sense bc we don't want `asyncio` to + # cancel trio just because they can't handle SIGINT + # sanely? XD .. kk + + # XXX, sin-shield causes guest-run abandons on SIGINT.. + # out: Outcome = await trio_done_fute + + # NOTE will raise (via `Error.unwrap()`) from any + # exception packed into the guest-run's `main_outcome`. + return out.unwrap() + + except ( + # XXX special SIGINT-handling is required since + # `asyncio.shield()`-ing seems to NOT handle that case as + # per recent changes in 3.11: + # https://docs.python.org/3/library/asyncio-runner.html#handling-keyboard-interruption + # + # NOTE: further, apparently ONLY need to handle this + # special SIGINT case since all other `asyncio`-side + # errors can be processed via our `chan._aio_err` + # relaying (right?); SIGINT seems to be totally diff + # error path in `asyncio`'s runtime..? + asyncio.CancelledError, + + ) as _fute_err: + fute_err = _fute_err + err_message: str = ( + 'main `asyncio` task ' + 'was cancelled!\n' + ) + + # TODO, handle possible edge cases with + # `open_root_actor()` closing before this is run! + # + actor: tractor.Actor = tractor.current_actor() + + log.exception( + err_message + + + 'Cancelling `trio`-side `tractor`-runtime..\n' + f'c(>\n' + f' |_{actor}.cancel_soon()\n' + ) + + # XXX WARNING XXX the next LOCs are super important! + # + # SINCE without them, we can get guest-run ABANDONMENT + # cases where `asyncio` will not schedule or wait on the + # guest-run `trio.Task` nor invoke its registered + # `trio_done_callback()` before final shutdown! + # + # This is particularly true if the `trio` side has tasks + # in shielded sections when an OC-cancel (SIGINT) + # condition occurs! + # + # We now have the + # `test_infected_asyncio.test_sigint_closes_lifetime_stack()` + # suite to ensure we do not suffer this issues + # (hopefully) ever again. + # + # The original abandonment issue surfaced as 2 different + # race-condition dependent types scenarios all to do with + # `asyncio` handling SIGINT from the system: + # + # - "silent-abandon" (WORST CASE): + # `asyncio` abandons the `trio` guest-run task silently + # and no `trio`-guest-run or `tractor`-actor-runtime + # teardown happens whatsoever.. + # + # - "loud-abandon" (BEST-ish CASE): + # the guest run get's abaondoned "loudly" with `trio` + # reporting a console traceback and further tbs of all + # the (failed) GC-triggered shutdown routines which + # thankfully does get dumped to console.. + # + # The abandonment is most easily reproduced if the `trio` + # side has tasks doing shielded work where those tasks + # ignore the normal `Cancelled` condition and continue to + # run, but obviously `asyncio` isn't aware of this and at + # some point bails on the guest-run unless we take manual + # intervention.. + # + # To repeat, *WITHOUT THIS* stuff below the guest-run can + # get race-conditionally abandoned!! + # + # XXX SOLUTION XXX + # ------ - ------ + # XXX FIRST PART: + # ------ - ------ + # the obvious fix to the "silent-abandon" case is to + # explicitly cancel the actor runtime such that no + # runtime tasks are even left unaware that the guest-run + # should be terminated due to OS cancellation. + # + actor.cancel_soon() + + # ------ - ------ + # XXX SECOND PART: + # ------ - ------ + # Pump the `asyncio` event-loop to allow + # `trio`-side to `trio`-guest-run to complete and + # teardown !! + # + # oh `asyncio`, how i don't miss you at all XD + while not trio_done_fute.done(): + log.runtime( + 'Waiting on main guest-run `asyncio` task to complete..\n' + f'|_trio_done_fut: {trio_done_fute}\n' + ) + await asyncio.sleep(_sigint_loop_pump_delay) + + # XXX is there any alt API/approach like the internal + # call below but that doesn't block indefinitely..? + # loop._run_once() + + try: + return trio_done_fute.result() + except ( + asyncio.InvalidStateError, + # asyncio.CancelledError, + # ^^XXX `.shield()` call above prevents this?? + + )as state_err: + + # XXX be super dupere noisy about abandonment issues! + aio_task: asyncio.Task = asyncio.current_task() + message: str = ( + 'The `asyncio`-side task likely exited before the ' + '`trio`-side guest-run completed!\n\n' + ) + if fute_err: + message += ( + f'The main {aio_task}\n' + f'STOPPED due to {type(fute_err)}\n\n' + ) + + message += ( + f'Likely something inside our guest-run-as-task impl is ' + f'not effectively waiting on the `trio`-side to complete ?!\n' + f'This code -> {aio_main!r}\n\n' + + 'Below you will likely see a ' + '"RuntimeWarning: Trio guest run got abandoned.." !!\n' + ) + raise AsyncioRuntimeTranslationError(message) from state_err + + # XXX, should never get here ;) + # else: + # import pdbp; pdbp.set_trace() # might as well if it's installed. try: @@ -597,6 +1628,8 @@ def run_as_asyncio_guest( loop = uvloop.new_event_loop() asyncio.set_event_loop(loop) except ImportError: - pass + log.runtime('`uvloop` not available..') - return asyncio.run(aio_main(trio_main)) + return asyncio.run( + aio_main(trio_main), + ) diff --git a/tractor/trionics/_broadcast.py b/tractor/trionics/_broadcast.py index 977b6828..154b037d 100644 --- a/tractor/trionics/_broadcast.py +++ b/tractor/trionics/_broadcast.py @@ -382,7 +382,7 @@ class BroadcastReceiver(ReceiveChannel): # likely it makes sense to unwind back to the # underlying? # import tractor - # await tractor.breakpoint() + # await tractor.pause() log.warning( f'Only one sub left for {self}?\n' 'We can probably unwind from breceiver?'