Merge pull request #165 from goodboy/signint_saviour

Ignore SIGINT when in a debugger REPL
2022-08-03 09:26:54 -04:00 · 2022-08-03 09:26:54 -04:00 · 641ed7a32a
parent 4902e184e9 cc5f60bba0
commit 641ed7a32a
21 changed files with 1282 additions and 454 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -73,8 +73,11 @@ jobs:
      - name: Install dependencies
        run: pip install -U . -r requirements-test.txt -r requirements-docs.txt --upgrade-strategy eager
      - name: List dependencies
        run: pip freeze
      - name: Run tests
-        run: pytest tests/ --spawn-backend=${{ matrix.spawn_backend }} -rs
+        run: pytest tests/ --spawn-backend=${{ matrix.spawn_backend }} -rsx
  # We skip 3.10 on windows for now due to
  # https://github.com/pytest-dev/pytest/issues/8733
@ -110,5 +113,8 @@ jobs:
      - name: Install dependencies
        run: pip install -U . -r requirements-test.txt -r requirements-docs.txt --upgrade-strategy eager
      - name: List dependencies
        run: pip freeze
      - name: Run tests
-        run: pytest tests/ --spawn-backend=${{ matrix.spawn_backend }} -rs
+        run: pytest tests/ --spawn-backend=${{ matrix.spawn_backend }} -rs --full-trace
--- a/docs/README.rst
+++ b/docs/README.rst
@ -3,13 +3,14 @@
 |gh_actions|
 |docs|
-``tractor`` is a `structured concurrent`_, multi-processing_ runtime built on trio_.
+``tractor`` is a `structured concurrent`_, multi-processing_ runtime
 built on trio_.
 Fundamentally ``tractor`` gives you parallelism via ``trio``-"*actors*":
 our nurseries_ let you spawn new Python processes which each run a ``trio``
 scheduled runtime - a call to ``trio.run()``.
-We believe the system adhere's to the `3 axioms`_ of an "`actor model`_"
+We believe the system adheres to the `3 axioms`_ of an "`actor model`_"
 but likely *does not* look like what *you* probably think an "actor
 model" looks like, and that's *intentional*.
@ -577,13 +578,13 @@ say hi, please feel free to reach us in our `matrix channel`_.  If
 matrix seems too hip, we're also mostly all in the the `trio gitter
 channel`_!
 .. _structured concurrent: https://trio.discourse.group/t/concise-definition-of-structured-concurrency/228
 .. _multi-processing: https://en.wikipedia.org/wiki/Multiprocessing
 .. _trio: https://github.com/python-trio/trio
 .. _nurseries: https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/#nurseries-a-structured-replacement-for-go-statements
 .. _actor model: https://en.wikipedia.org/wiki/Actor_model
 .. _trio: https://github.com/python-trio/trio
 .. _multi-processing: https://en.wikipedia.org/wiki/Multiprocessing
 .. _trionic: https://trio.readthedocs.io/en/latest/design.html#high-level-design-principles
 .. _async sandwich: https://trio.readthedocs.io/en/latest/tutorial.html#async-sandwich
 .. _structured concurrent: https://trio.discourse.group/t/concise-definition-of-structured-concurrency/228
 .. _3 axioms: https://www.youtube.com/watch?v=7erJ1DV_Tlo&t=162s
 .. .. _3 axioms: https://en.wikipedia.org/wiki/Actor_model#Fundamental_concepts
 .. _adherance to: https://www.youtube.com/watch?v=7erJ1DV_Tlo&t=1821s
--- a/examples/init.py
+++ b/examples/init.py
--- a/examples/debugging/open_ctx_modnofound.py
+++ b/examples/debugging/open_ctx_modnofound.py
@ -0,0 +1,40 @@
 import trio
 import tractor
@tractor.context
 async def just_sleep(
    ctx: tractor.Context,
    **kwargs,
 ) -> None:
    '''
    Start and sleep.
    '''
    await ctx.started()
    await trio.sleep_forever()
 async def main() -> None:
    async with tractor.open_nursery(
        debug_mode=True,
    ) as n:
        portal = await n.start_actor(
            'ctx_child',
            # XXX: we don't enable the current module in order
            # to trigger `ModuleNotFound`.
            enable_modules=[],
        )
        async with portal.open_context(
            just_sleep,  # taken from pytest parameterization
        ) as (ctx, sent):
            raise KeyboardInterrupt
 if __name__ == '__main__':
    trio.run(main)
--- a/examples/debugging/subactor_bp_in_ctx.py
+++ b/examples/debugging/subactor_bp_in_ctx.py
@ -0,0 +1,50 @@
 import tractor
 import trio
 async def gen():
    yield 'yo'
    await tractor.breakpoint()
    yield 'yo'
    await tractor.breakpoint()
@tractor.context
 async def just_bp(
    ctx: tractor.Context,
 ) -> None:
    await ctx.started()
    await tractor.breakpoint()
    # TODO: bps and errors in this call..
    async for val in gen():
        print(val)
    # await trio.sleep(0.5)
    # prematurely destroy the connection
    await ctx.chan.aclose()
    # THIS CAUSES AN UNRECOVERABLE HANG
    # without latest ``pdbpp``:
    assert 0
 async def main():
    async with tractor.open_nursery(
        debug_mode=True,
    ) as n:
        p = await n.start_actor(
            'bp_boi',
            enable_modules=[__name__],
        )
        async with p.open_context(
            just_bp,
        ) as (ctx, first):
            await trio.sleep_forever()
 if __name__ == '__main__':
    trio.run(main)
--- a/examples/integration/open_context_and_sleep.py
+++ b/examples/integration/open_context_and_sleep.py
@ -0,0 +1,49 @@
 import trio
 import click
 import tractor
 import pydantic
 # from multiprocessing import shared_memory
@tractor.context
 async def just_sleep(
    ctx: tractor.Context,
    **kwargs,
 ) -> None:
    '''
    Test a small ping-pong 2-way streaming server.
    '''
    await ctx.started()
    await trio.sleep_forever()
 async def main() -> None:
    proc = await trio.open_process( (
        'python',
        '-c',
        'import trio; trio.run(trio.sleep_forever)',
    ))
    await proc.wait()
    # await trio.sleep_forever()
    # async with tractor.open_nursery() as n:
    #     portal = await n.start_actor(
    #         'rpc_server',
    #         enable_modules=[__name__],
    #     )
    #     async with portal.open_context(
    #         just_sleep,  # taken from pytest parameterization
    #     ) as (ctx, sent):
    #         await trio.sleep_forever()
 if __name__ == '__main__':
    import time
    # time.sleep(999)
    trio.run(main)
--- a/nooz/165.feature.rst
+++ b/nooz/165.feature.rst
@ -0,0 +1,36 @@
 Add SIGINT protection to our `pdbpp` based debugger subystem such that
 for (single-depth) actor trees in debug mode we ignore interrupts in any
 actor currently holding the TTY lock thus avoiding clobbering IPC
 connections and/or task and process state when working in the REPL.
 As a big note currently so called "nested" actor trees (trees with
 actors having more then one parent/ancestor) are not fully supported
 since we don't yet have a mechanism to relay the debug mode knowledge
 "up" the actor tree (for eg. when handling a crash in a leaf actor).
 As such currently there is a set of tests and known scenarios which will
 result in process cloberring by the zombie repaing machinery and these
 have been documented in https://github.com/goodboy/tractor/issues/320.
 The implementation details include:
 - utilizing a custom SIGINT handler which we apply whenever an actor's
  runtime enters the debug machinery, which we also make sure the
  stdlib's `pdb` configuration doesn't override (which it does by
  default without special instance config).
 - litter the runtime with `maybe_wait_for_debugger()` mostly in spots
  where the root actor should block before doing embedded nursery
  teardown ops which both cancel potential-children-in-deubg as well
  as eventually trigger zombie reaping machinery.
 - hardening of the TTY locking semantics/API both in terms of IPC
  terminations and cancellation and lock release determinism from
  sync debugger instance methods.
 - factoring of locking infrastructure into a new `._debug.Lock` global
  which encapsulates all details of the ``trio`` sync primitives and
  task/actor uid management and tracking.
 We also add `ctrl-c` cases throughout the test suite though these are
 disabled for py3.9 (`pdbpp` UX differences that don't seem worth
 compensating for, especially since this will be our last 3.9 supported
 release) and there are a slew of marked cases that aren't expected to
 work in CI more generally (as mentioned in the "nested" tree note
 above) despite seemingly working  when run manually on linux.
--- a/requirements-test.txt
+++ b/requirements-test.txt
@ -1,5 +1,6 @@
 pytest
 pytest-trio
 pytest-timeout
 pdbpp
 mypy<0.920
 trio_typing<0.7.0
--- a/setup.py
+++ b/setup.py
@ -43,7 +43,7 @@ setup(
    install_requires=[
        # trio related
-        'trio>0.8',
+        'trio >= 0.20',
        'async_generator',
        'trio_typing',
@ -54,12 +54,23 @@ setup(
        # tooling
        'colorlog',
        'wrapt',
-        'pdbpp',
+
        # pip ref docs on these specs:
        # https://pip.pypa.io/en/stable/reference/requirement-specifiers/#examples
        # and pep:
        # https://peps.python.org/pep-0440/#version-specifiers
        'pdbpp <= 0.10.1; python_version < "3.10"',
        # windows deps workaround for ``pdbpp``
        # https://github.com/pdbpp/pdbpp/issues/498
        # https://github.com/pdbpp/fancycompleter/issues/37
        'pyreadline3 ; platform_system == "Windows"',
        # 3.10 has an outstanding unreleased issue and `pdbpp` itself
        #   pins to patched forks of its own dependencies as well..and
        #   we need a specific patch on master atm.
        'pdbpp @ git+https://github.com/pdbpp/pdbpp@76c4be5#egg=pdbpp ; python_version > "3.9"',  # noqa: E501
        # serialization
        'msgspec >= "0.4.0"'
@ -83,8 +94,8 @@ setup(
        "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
        "Programming Language :: Python :: Implementation :: CPython",
        "Programming Language :: Python :: 3 :: Only",
        "Programming Language :: Python :: 3.10",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
        "Intended Audience :: Science/Research",
        "Intended Audience :: Developers",
        "Topic :: System :: Distributed Computing",
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -85,11 +85,14 @@ def spawn_backend(request):
    return request.config.option.spawn_backend
 _ci_env: bool = os.environ.get('CI', False)
@pytest.fixture(scope='session')
 def ci_env() -> bool:
    """Detect CI envoirment.
    """
-    return os.environ.get('TRAVIS', False) or os.environ.get('CI', False)
+    return _ci_env
@pytest.fixture(scope='session')
--- a/tests/test_context_stream_semantics.py
+++ b/tests/test_context_stream_semantics.py
@ -265,6 +265,7 @@ async def test_callee_closes_ctx_after_stream_open():
            enable_modules=[__name__],
        )
        with trio.fail_after(2):
            async with portal.open_context(
                close_ctx_immediately,
@ -297,6 +298,7 @@ async def test_callee_closes_ctx_after_stream_open():
                # of a stream to the context (at least until a time of
                # if/when we decide that's a good idea?)
                try:
                    with trio.fail_after(0.5):
                        async with ctx.open_stream() as stream:
                            pass
                except trio.ClosedResourceError:
--- a/tests/test_debugger.py
+++ b/tests/test_debugger.py
@ -1,5 +1,5 @@
 """
-That native debug better work!
+That "native" debug mode better work!
 All these tests can be understood (somewhat) by running the equivalent
 `examples/debugging/` scripts manually.
@ -10,15 +10,20 @@ TODO:
    - wonder if any of it'll work on OS X?
 """
 import time
 from os import path
 from typing import Optional
 import platform
 import sys
 import time
 import pytest
 import pexpect
 from pexpect.exceptions import (
    TIMEOUT,
    EOF,
 )
-from conftest import repodir
+from conftest import repodir, _ci_env
 # TODO: The next great debugger audit could be done by you!
 # - recurrent entry to breakpoint() from single actor *after* and an
@ -52,6 +57,24 @@ def mk_cmd(ex_name: str) -> str:
    )
 # TODO: was trying to this xfail style but some weird bug i see in CI
 # that's happening at collect time.. pretty soon gonna dump actions i'm
 # thinkin...
 # in CI we skip tests which >= depth 1 actor trees due to there
 # still being an oustanding issue with relaying the debug-mode-state
 # through intermediary parents.
 has_nested_actors = pytest.mark.has_nested_actors
 # .xfail(
 #     os.environ.get('CI', False),
 #     reason=(
 #         'This test uses nested actors and fails in CI\n'
 #         'The test seems to run fine locally but until we solve the '
 #         'following issue this CI test will be xfail:\n'
 #         'https://github.com/goodboy/tractor/issues/320'
 #     )
 # )
@pytest.fixture
 def spawn(
    start_method,
@ -73,6 +96,96 @@ def spawn(
    return _spawn
 PROMPT = r"\(Pdb\+\+\)"
 def expect(
    child,
    # prompt by default
    patt: str = PROMPT,
    **kwargs,
 ) -> None:
    '''
    Expect wrapper that prints last seen console
    data before failing.
    '''
    try:
        child.expect(
            patt,
            **kwargs,
        )
    except TIMEOUT:
        before = str(child.before.decode())
        print(before)
        raise
 def assert_before(
    child,
    patts: list[str],
 ) -> None:
    before = str(child.before.decode())
    for patt in patts:
        try:
            assert patt in before
        except AssertionError:
            print(before)
            raise
@pytest.fixture(
    params=[False, True],
    ids='ctl-c={}'.format,
 )
 def ctlc(
    request,
    ci_env: bool,
 ) -> bool:
    use_ctlc = request.param
    if (
        sys.version_info <= (3, 10)
        and use_ctlc
    ):
        # on 3.9 it seems the REPL UX
        # is highly unreliable and frankly annoying
        # to test for. It does work from manual testing
        # but i just don't think it's wroth it to try
        # and get this working especially since we want to
        # be 3.10+ mega-asap.
        pytest.skip('Py3.9 and `pdbpp` son no bueno..')
    if ci_env:
        node = request.node
        markers = node.own_markers
        for mark in markers:
            if mark.name == 'has_nested_actors':
                pytest.skip(
                    f'Test for {node} uses nested actors and fails in CI\n'
                    f'The test seems to run fine locally but until we solve'
                    'this issue this CI test will be xfail:\n'
                    'https://github.com/goodboy/tractor/issues/320'
                )
    if use_ctlc:
        # XXX: disable pygments highlighting for auto-tests
        # since some envs (like actions CI) will struggle
        # the the added color-char encoding..
        from tractor._debug import TractorConfig
        TractorConfig.use_pygements = False
    yield use_ctlc
@pytest.mark.parametrize(
    'user_in_out',
    [
@ -89,7 +202,7 @@ def test_root_actor_error(spawn, user_in_out):
    child = spawn('root_actor_error')
    # scan for the pdbpp prompt
-    child.expect(r"\(Pdb\+\+\)")
+    expect(child, PROMPT)
    before = str(child.before.decode())
@ -101,7 +214,7 @@ def test_root_actor_error(spawn, user_in_out):
    child.sendline(user_input)
    # process should exit
-    child.expect(pexpect.EOF)
+    expect(child, EOF)
    assert expect_err_str in str(child.before)
@ -137,20 +250,68 @@ def test_root_actor_bp(spawn, user_in_out):
        assert expect_err_str in str(child.before)
-def test_root_actor_bp_forever(spawn):
+def do_ctlc(
    child,
    count: int = 3,
    delay: float = 0.1,
    patt: Optional[str] = None,
    # expect repl UX to reprint the prompt after every
    # ctrl-c send.
    # XXX: no idea but, in CI this never seems to work even on 3.10 so
    # needs some further investigation potentially...
    expect_prompt: bool = not _ci_env,
 ) -> None:
    # make sure ctl-c sends don't do anything but repeat output
    for _ in range(count):
        time.sleep(delay)
        child.sendcontrol('c')
        # TODO: figure out why this makes CI fail..
        # if you run this test manually it works just fine..
        if expect_prompt:
            before = str(child.before.decode())
            time.sleep(delay)
            child.expect(r"\(Pdb\+\+\)")
            time.sleep(delay)
            if patt:
                # should see the last line on console
                assert patt in before
 def test_root_actor_bp_forever(
    spawn,
    ctlc: bool,
 ):
    "Re-enter a breakpoint from the root actor-task."
    child = spawn('root_actor_breakpoint_forever')
    # do some "next" commands to demonstrate recurrent breakpoint
    # entries
    for _ in range(10):
-        child.sendline('next')
+
        child.expect(r"\(Pdb\+\+\)")
-    # do one continue which should trigger a new task to lock the tty
+        if ctlc:
            do_ctlc(child)
        child.sendline('next')
    # do one continue which should trigger a
    # new task to lock the tty
    child.sendline('continue')
    child.expect(r"\(Pdb\+\+\)")
    # seems that if we hit ctrl-c too fast the
    # sigint guard machinery might not kick in..
    time.sleep(0.001)
    if ctlc:
        do_ctlc(child)
    # XXX: this previously caused a bug!
    child.sendline('n')
    child.expect(r"\(Pdb\+\+\)")
@ -158,10 +319,25 @@ def test_root_actor_bp_forever(spawn):
    child.sendline('n')
    child.expect(r"\(Pdb\+\+\)")
    # quit out of the loop
    child.sendline('q')
    child.expect(pexpect.EOF)
 def test_subactor_error(spawn):
    "Single subactor raising an error"
@pytest.mark.parametrize(
    'do_next',
    (True, False),
    ids='do_next={}'.format,
 )
 def test_subactor_error(
    spawn,
    ctlc: bool,
    do_next: bool,
 ):
    '''
    Single subactor raising an error
    '''
    child = spawn('subactor_error')
    # scan for the pdbpp prompt
@ -170,23 +346,33 @@ def test_subactor_error(spawn):
    before = str(child.before.decode())
    assert "Attaching to pdb in crashed actor: ('name_error'" in before
-    # send user command
+    if do_next:
-    # (in this case it's the same for 'continue' vs. 'quit')
+        child.sendline('n')
    else:
        # make sure ctl-c sends don't do anything but repeat output
        if ctlc:
            do_ctlc(
                child,
            )
        # send user command and (in this case it's the same for 'continue'
        # vs. 'quit') the debugger should enter a second time in the nursery
        # creating actor
        child.sendline('continue')
    # the debugger should enter a second time in the nursery
    # creating actor
    child.expect(r"\(Pdb\+\+\)")
    before = str(child.before.decode())
    # root actor gets debugger engaged
    assert "Attaching to pdb in crashed actor: ('root'" in before
    # error is a remote error propagated from the subactor
    assert "RemoteActorError: ('name_error'" in before
    # another round
    if ctlc:
        do_ctlc(child)
    child.sendline('c')
    child.expect('\r\n')
@ -194,7 +380,10 @@ def test_subactor_error(spawn):
    child.expect(pexpect.EOF)
-def test_subactor_breakpoint(spawn):
+def test_subactor_breakpoint(
    spawn,
    ctlc: bool,
 ):
    "Single subactor with an infinite breakpoint loop"
    child = spawn('subactor_breakpoint')
@ -211,6 +400,9 @@ def test_subactor_breakpoint(spawn):
        child.sendline('next')
        child.expect(r"\(Pdb\+\+\)")
        if ctlc:
            do_ctlc(child)
    # now run some "continues" to show re-entries
    for _ in range(5):
        child.sendline('continue')
@ -218,6 +410,9 @@ def test_subactor_breakpoint(spawn):
        before = str(child.before.decode())
        assert "Attaching pdb to actor: ('breakpoint_forever'" in before
        if ctlc:
            do_ctlc(child)
    # finally quit the loop
    child.sendline('q')
@ -228,6 +423,9 @@ def test_subactor_breakpoint(spawn):
    assert "RemoteActorError: ('breakpoint_forever'" in before
    assert 'bdb.BdbQuit' in before
    if ctlc:
        do_ctlc(child)
    # quit the parent
    child.sendline('c')
@ -239,11 +437,16 @@ def test_subactor_breakpoint(spawn):
    assert 'bdb.BdbQuit' in before
-def test_multi_subactors(spawn):
+@has_nested_actors
-    """
+def test_multi_subactors(
-    Multiple subactors, both erroring and breakpointing as well as
+    spawn,
-    a nested subactor erroring.
+    ctlc: bool,
-    """
+):
    '''
    Multiple subactors, both erroring and
    breakpointing as well as a nested subactor erroring.
    '''
    child = spawn(r'multi_subactors')
    # scan for the pdbpp prompt
@ -252,12 +455,18 @@ def test_multi_subactors(spawn):
    before = str(child.before.decode())
    assert "Attaching pdb to actor: ('breakpoint_forever'" in before
    if ctlc:
        do_ctlc(child)
    # do some "next" commands to demonstrate recurrent breakpoint
    # entries
    for _ in range(10):
        child.sendline('next')
        child.expect(r"\(Pdb\+\+\)")
        if ctlc:
            do_ctlc(child)
    # continue to next error
    child.sendline('c')
@ -267,14 +476,22 @@ def test_multi_subactors(spawn):
    assert "Attaching to pdb in crashed actor: ('name_error'" in before
    assert "NameError" in before
    if ctlc:
        do_ctlc(child)
    # continue again
    child.sendline('c')
    # 2nd name_error failure
    child.expect(r"\(Pdb\+\+\)")
-    before = str(child.before.decode())
+
-    assert "Attaching to pdb in crashed actor: ('name_error_1'" in before
+    assert_before(child, [
-    assert "NameError" in before
+        "Attaching to pdb in crashed actor: ('name_error_1'",
        "NameError",
    ])
    if ctlc:
        do_ctlc(child)
    # breakpoint loop should re-engage
    child.sendline('c')
@ -282,20 +499,33 @@ def test_multi_subactors(spawn):
    before = str(child.before.decode())
    assert "Attaching pdb to actor: ('breakpoint_forever'" in before
    if ctlc:
        do_ctlc(child)
    # wait for spawn error to show up
    spawn_err = "Attaching to pdb in crashed actor: ('spawn_error'"
-    while spawn_err not in before:
+    start = time.time()
    while (
        spawn_err not in before
        and (time.time() - start) < 3  # timeout eventually
    ):
        child.sendline('c')
        time.sleep(0.1)
        child.expect(r"\(Pdb\+\+\)")
        before = str(child.before.decode())
        if ctlc:
            do_ctlc(child)
    # 2nd depth nursery should trigger
-    # child.sendline('c')
+    # (XXX: this below if guard is technically a hack that makes the
-    # child.expect(r"\(Pdb\+\+\)")
+    # nested case seem to work locally on linux but ideally in the long
-    # before = str(child.before.decode())
+    # run this can be dropped.)
-    assert spawn_err in before
+    if not ctlc:
-    assert "RemoteActorError: ('name_error_1'" in before
+        assert_before(child, [
            spawn_err,
            "RemoteActorError: ('name_error_1'",
        ])
    # now run some "continues" to show re-entries
    for _ in range(5):
@ -306,31 +536,46 @@ def test_multi_subactors(spawn):
    child.sendline('q')
    child.expect(r"\(Pdb\+\+\)")
    before = str(child.before.decode())
    assert_before(child, [
        # debugger attaches to root
-    assert "Attaching to pdb in crashed actor: ('root'" in before
+        "Attaching to pdb in crashed actor: ('root'",
        # expect a multierror with exceptions for each sub-actor
-    assert "RemoteActorError: ('breakpoint_forever'" in before
+        "RemoteActorError: ('breakpoint_forever'",
-    assert "RemoteActorError: ('name_error'" in before
+        "RemoteActorError: ('name_error'",
-    assert "RemoteActorError: ('spawn_error'" in before
+        "RemoteActorError: ('spawn_error'",
-    assert "RemoteActorError: ('name_error_1'" in before
+        "RemoteActorError: ('name_error_1'",
-    assert 'bdb.BdbQuit' in before
+        'bdb.BdbQuit',
    ])
    if ctlc:
        do_ctlc(child)
    # process should exit
    child.sendline('c')
    child.expect(pexpect.EOF)
    # repeat of previous multierror for final output
-    before = str(child.before.decode())
+    assert_before(child, [
-    assert "RemoteActorError: ('breakpoint_forever'" in before
+        "RemoteActorError: ('breakpoint_forever'",
-    assert "RemoteActorError: ('name_error'" in before
+        "RemoteActorError: ('name_error'",
-    assert "RemoteActorError: ('spawn_error'" in before
+        "RemoteActorError: ('spawn_error'",
-    assert "RemoteActorError: ('name_error_1'" in before
+        "RemoteActorError: ('name_error_1'",
-    assert 'bdb.BdbQuit' in before
+        'bdb.BdbQuit',
    ])
-def test_multi_daemon_subactors(spawn, loglevel):
+def test_multi_daemon_subactors(
-    """Multiple daemon subactors, both erroring and breakpointing within a
+    spawn,
    loglevel: str,
    ctlc: bool
 ):
    '''
    Multiple daemon subactors, both erroring and breakpointing within a
    stream.
-    """
+
    '''
    child = spawn('multi_daemon_subactors')
    child.expect(r"\(Pdb\+\+\)")
@ -352,6 +597,9 @@ def test_multi_daemon_subactors(spawn, loglevel):
    else:
        raise ValueError("Neither log msg was found !?")
    if ctlc:
        do_ctlc(child)
    # NOTE: previously since we did not have clobber prevention
    # in the root actor this final resume could result in the debugger
    # tearing down since both child actors would be cancelled and it was
@ -371,7 +619,7 @@ def test_multi_daemon_subactors(spawn, loglevel):
    # now the root actor won't clobber the bp_forever child
    # during it's first access to the debug lock, but will instead
    # wait for the lock to release, by the edge triggered
-    # ``_debug._no_remote_has_tty`` event before sending cancel messages
+    # ``_debug.Lock.no_remote_has_tty`` event before sending cancel messages
    # (via portals) to its underlings B)
    # at some point here there should have been some warning msg from
@ -379,6 +627,9 @@ def test_multi_daemon_subactors(spawn, loglevel):
    # it seems unreliable in testing here to gnab it:
    # assert "in use by child ('bp_forever'," in before
    if ctlc:
        do_ctlc(child)
    # wait for final error in root
    while True:
@ -394,17 +645,24 @@ def test_multi_daemon_subactors(spawn, loglevel):
        except AssertionError:
            assert bp_forever_msg in before
        if ctlc:
            do_ctlc(child)
    try:
        child.sendline('c')
        child.expect(pexpect.EOF)
-    except pexpect.exceptions.TIMEOUT:
+    except TIMEOUT:
        # Failed to exit using continue..?
        child.sendline('q')
        child.expect(pexpect.EOF)
-def test_multi_subactors_root_errors(spawn):
+@has_nested_actors
 def test_multi_subactors_root_errors(
    spawn,
    ctlc: bool
 ):
    '''
    Multiple subactors, both erroring and breakpointing as well as
    a nested subactor erroring.
@ -419,33 +677,48 @@ def test_multi_subactors_root_errors(spawn):
    before = str(child.before.decode())
    assert "NameError: name 'doggypants' is not defined" in before
    if ctlc:
        do_ctlc(child)
    # continue again to catch 2nd name error from
    # actor 'name_error_1' (which is 2nd depth).
    child.sendline('c')
    child.expect(r"\(Pdb\+\+\)")
-    before = str(child.before.decode())
+    assert_before(child, [
-    assert "Attaching to pdb in crashed actor: ('name_error_1'" in before
+        "Attaching to pdb in crashed actor: ('name_error_1'",
-    assert "NameError" in before
+        "NameError",
    ])
    if ctlc:
        do_ctlc(child)
    child.sendline('c')
    child.expect(r"\(Pdb\+\+\)")
-    before = str(child.before.decode())
+    assert_before(child, [
-    assert "Attaching to pdb in crashed actor: ('spawn_error'" in before
+        "Attaching to pdb in crashed actor: ('spawn_error'",
        # boxed error from previous step
-    assert "RemoteActorError: ('name_error_1'" in before
+        "RemoteActorError: ('name_error_1'",
-    assert "NameError" in before
+        "NameError",
    ])
    if ctlc:
        do_ctlc(child)
    child.sendline('c')
    child.expect(r"\(Pdb\+\+\)")
-    before = str(child.before.decode())
+    assert_before(child, [
-    assert "Attaching to pdb in crashed actor: ('root'" in before
+        "Attaching to pdb in crashed actor: ('root'",
-    # boxed error from first level failure
+        # boxed error from previous step
-    assert "RemoteActorError: ('name_error'" in before
+        "RemoteActorError: ('name_error'",
-    assert "NameError" in before
+        "NameError",
    ])
    # warnings assert we probably don't need
    # assert "Cancelling nursery in ('spawn_error'," in before
    if ctlc:
        do_ctlc(child)
    # continue again
    child.sendline('c')
    child.expect(pexpect.EOF)
@ -455,7 +728,14 @@ def test_multi_subactors_root_errors(spawn):
    assert "AssertionError" in before
-def test_multi_nested_subactors_error_through_nurseries(spawn):
+@has_nested_actors
 def test_multi_nested_subactors_error_through_nurseries(
    spawn,
    # TODO: address debugger issue for nested tree:
    # https://github.com/goodboy/tractor/issues/320
    # ctlc: bool,
 ):
    """Verify deeply nested actors that error trigger debugger entries
    at each actor nurserly (level) all the way up the tree.
@ -476,7 +756,7 @@ def test_multi_nested_subactors_error_through_nurseries(spawn):
            child.sendline('c')
            time.sleep(0.1)
-        except pexpect.exceptions.EOF:
+        except EOF:
            # race conditions on how fast the continue is sent?
            print(f"Failed early on {i}?")
@ -490,14 +770,19 @@ def test_multi_nested_subactors_error_through_nurseries(spawn):
        assert "NameError" in before
@pytest.mark.timeout(15)
@has_nested_actors
 def test_root_nursery_cancels_before_child_releases_tty_lock(
    spawn,
-    start_method
+    start_method,
    ctlc: bool,
 ):
-    """Test that when the root sends a cancel message before a nested
+    '''
-    child has unblocked (which can happen when it has the tty lock and
+    Test that when the root sends a cancel message before a nested child
-    is engaged in pdb) it is indeed cancelled after exiting the debugger.
+    has unblocked (which can happen when it has the tty lock and is
-    """
+    engaged in pdb) it is indeed cancelled after exiting the debugger.
    '''
    timed_out_early = False
    child = spawn('root_cancelled_but_child_is_in_tty_lock')
@ -509,6 +794,9 @@ def test_root_nursery_cancels_before_child_releases_tty_lock(
    assert "tractor._exceptions.RemoteActorError: ('name_error'" not in before
    time.sleep(0.5)
    if ctlc:
        do_ctlc(child)
    child.sendline('c')
    for i in range(4):
@ -517,8 +805,8 @@ def test_root_nursery_cancels_before_child_releases_tty_lock(
            child.expect(r"\(Pdb\+\+\)")
        except (
-            pexpect.exceptions.EOF,
+            EOF,
-            pexpect.exceptions.TIMEOUT,
+            TIMEOUT,
        ):
            # races all over..
@ -533,26 +821,37 @@ def test_root_nursery_cancels_before_child_releases_tty_lock(
        before = str(child.before.decode())
        assert "NameError: name 'doggypants' is not defined" in before
-        child.sendline('c')
+        if ctlc:
            do_ctlc(child)
    while True:
        try:
            child.expect(pexpect.EOF)
            break
        except pexpect.exceptions.TIMEOUT:
        child.sendline('c')
        time.sleep(0.1)
    for i in range(3):
        try:
            child.expect(pexpect.EOF, timeout=0.5)
            break
        except TIMEOUT:
            child.sendline('c')
            time.sleep(0.1)
            print('child was able to grab tty lock again?')
    else:
        print('giving up on child releasing, sending `quit` cmd')
        child.sendline('q')
        expect(child, EOF)
    if not timed_out_early:
        before = str(child.before.decode())
-        assert "tractor._exceptions.RemoteActorError: ('spawner0'" in before
+        assert_before(child, [
-        assert "tractor._exceptions.RemoteActorError: ('name_error'" in before
+            "tractor._exceptions.RemoteActorError: ('spawner0'",
-        assert "NameError: name 'doggypants' is not defined" in before
+            "tractor._exceptions.RemoteActorError: ('name_error'",
            "NameError: name 'doggypants' is not defined",
        ])
 def test_root_cancels_child_context_during_startup(
    spawn,
    ctlc: bool,
 ):
    '''Verify a fast fail in the root doesn't lock up the child reaping
    and all while using the new context api.
@ -565,12 +864,16 @@ def test_root_cancels_child_context_during_startup(
    before = str(child.before.decode())
    assert "AssertionError" in before
    if ctlc:
        do_ctlc(child)
    child.sendline('c')
    child.expect(pexpect.EOF)
 def test_different_debug_mode_per_actor(
    spawn,
    ctlc: bool,
 ):
    child = spawn('per_actor_debug')
    child.expect(r"\(Pdb\+\+\)")
@ -580,6 +883,9 @@ def test_different_debug_mode_per_actor(
    assert "Attaching to pdb in crashed actor: ('debugged_boi'" in before
    assert "RuntimeError" in before
    if ctlc:
        do_ctlc(child)
    child.sendline('c')
    child.expect(pexpect.EOF)
--- a/tests/test_docs_examples.py
+++ b/tests/test_docs_examples.py
@ -81,11 +81,14 @@ def run_example_in_subproc(loglevel, testdir, arb_addr):
    'example_script',
    # walk yields: (dirpath, dirnames, filenames)
-    [(p[0], f) for p in os.walk(examples_dir()) for f in p[2]
+    [
        (p[0], f) for p in os.walk(examples_dir()) for f in p[2]
        if '__' not in f
        and f[0] != '_'
-        and 'debugging' not in p[0]],
+        and 'debugging' not in p[0]
        and 'integration' not in p[0]
    ],
    ids=lambda t: t[1],
 )
@ -113,9 +116,19 @@ def test_example(run_example_in_subproc, example_script):
            # print(f'STDOUT: {out}')
            # if we get some gnarly output let's aggregate and raise
            if err:
                errmsg = err.decode()
                errlines = errmsg.splitlines()
-            if err and 'Error' in errlines[-1]:
+                last_error = errlines[-1]
                if (
                    'Error' in last_error
                    # XXX: currently we print this to console, but maybe
                    # shouldn't eventually once we figure out what's
                    # a better way to be explicit about aio side
                    # cancels?
                    and 'asyncio.exceptions.CancelledError' not in last_error
                ):
                    raise Exception(errmsg)
            assert proc.returncode == 0
--- a/tests/test_spawning.py
+++ b/tests/test_spawning.py
@ -150,13 +150,13 @@ def test_loglevel_propagated_to_subactor(
    async def main():
        async with tractor.open_nursery(
            name='arbiter',
            loglevel=level,
            start_method=start_method,
            arbiter_addr=arb_addr,
        ) as tn:
            await tn.run_in_actor(
                check_loglevel,
                loglevel=level,
                level=level,
            )
--- a/tractor/_actor.py
+++ b/tractor/_actor.py
@ -26,8 +26,11 @@ import importlib
 import importlib.util
 import inspect
 import uuid
-import typing
+from typing import (
-from typing import Any, Optional, Union
+    Any, Optional,
    Union, TYPE_CHECKING,
    Callable,
 )
 from types import ModuleType
 import sys
 import os
@ -57,6 +60,10 @@ from . import _state
 from . import _mp_fixup_main
 if TYPE_CHECKING:
    from ._supervise import ActorNursery
 log = get_logger('tractor')
@ -65,7 +72,7 @@ async def _invoke(
    actor: 'Actor',
    cid: str,
    chan: Channel,
-    func: typing.Callable,
+    func: Callable,
    kwargs: dict[str, Any],
    is_rpc: bool = True,
@ -80,9 +87,10 @@ async def _invoke(
    '''
    __tracebackhide__ = True
-    treat_as_gen = False
+    treat_as_gen: bool = False
    failed_resp: bool = False
-    # possible a traceback (not sure what typing is for this..)
+    # possibly a traceback (not sure what typing is for this..)
    tb = None
    cancel_scope = trio.CancelScope()
@ -183,7 +191,8 @@ async def _invoke(
                    ctx._scope_nursery = scope_nursery
                    cs = scope_nursery.cancel_scope
                    task_status.started(cs)
-                    await chan.send({'return': await coro, 'cid': cid})
+                    res = await coro
                    await chan.send({'return': res, 'cid': cid})
            except trio.MultiError:
                # if a context error was set then likely
@ -197,10 +206,15 @@ async def _invoke(
                # XXX: only pop the context tracking if
                # a ``@tractor.context`` entrypoint was called
                assert chan.uid
                # don't pop the local context until we know the
                # associated child isn't in debug any more
                await _debug.maybe_wait_for_debugger()
                ctx = actor._contexts.pop((chan.uid, cid))
                if ctx:
                    log.runtime(
-                        f'Context entrypoint for {func} was terminated:\n{ctx}'
+                        f'Context entrypoint {func} was terminated:\n{ctx}'
                    )
            assert cs
@ -228,12 +242,29 @@ async def _invoke(
        else:
            # regular async function
            try:
                await chan.send({'functype': 'asyncfunc', 'cid': cid})
            except trio.BrokenResourceError:
                failed_resp = True
                if is_rpc:
                    raise
                else:
                    log.warning(
                        f'Failed to respond to non-rpc request: {func}'
                    )
            with cancel_scope as cs:
                task_status.started(cs)
-                await chan.send({'return': await coro, 'cid': cid})
+                result = await coro
                log.cancel(f'result: {result}')
                if not failed_resp:
                    # only send result if we know IPC isn't down
                    await chan.send({'return': result, 'cid': cid})
-    except (Exception, trio.MultiError) as err:
+    except (
        Exception,
        trio.MultiError
    ) as err:
        if not is_multi_cancelled(err):
@ -267,7 +298,14 @@ async def _invoke(
        try:
            await chan.send(err_msg)
-        except trio.ClosedResourceError:
+        # TODO: tests for this scenario:
        # - RPC caller closes connection before getting a response
        # should **not** crash this actor..
        except (
            trio.ClosedResourceError,
            trio.BrokenResourceError,
            BrokenPipeError,
        ):
            # if we can't propagate the error that's a big boo boo
            log.error(
                f"Failed to ship error to caller @ {chan.uid} !?"
@ -316,7 +354,9 @@ async def try_ship_error_to_parent(
            trio.ClosedResourceError,
            trio.BrokenResourceError,
        ):
-            log.error(
+            # in SC terms this is one of the worst things that can
            # happen and creates the 2-general's dilemma.
            log.critical(
                f"Failed to ship error to parent "
                f"{channel.uid}, channel was closed"
            )
@ -424,7 +464,7 @@ class Actor:
        # (chan, cid) -> (cancel_scope, func)
        self._rpc_tasks: dict[
            tuple[Channel, str],
-            tuple[trio.CancelScope, typing.Callable, trio.Event]
+            tuple[trio.CancelScope, Callable, trio.Event]
        ] = {}
        # map {actor uids -> Context}
@ -491,10 +531,17 @@ class Actor:
            mne = ModuleNotExposed(*err.args)
            if ns == '__main__':
                modpath = '__name__'
            else:
                modpath = f"'{ns}'"
            msg = (
-                    "\n\nMake sure you exposed the current module using:\n\n"
+                "\n\nMake sure you exposed the target module, `{ns}`, "
-                    "ActorNursery.start_actor(<name>, enable_modules="
+                "using:\n"
-                    "[__name__])"
+                "ActorNursery.start_actor(<name>, enable_modules=[{mod}])"
            ).format(
                ns=ns,
                mod=modpath,
            )
            mne.msg += msg
@ -513,6 +560,7 @@ class Actor:
        self._no_more_peers = trio.Event()  # unset
        chan = Channel.from_stream(stream)
        uid: Optional[tuple[str, str]] = chan.uid
        log.runtime(f"New connection to us {chan}")
        # send/receive initial handshake response
@ -560,39 +608,51 @@ class Actor:
        # append new channel
        self._peers[uid].append(chan)
        local_nursery: Optional[ActorNursery] = None  # noqa
        disconnected: bool = False
        # Begin channel management - respond to remote requests and
        # process received reponses.
        try:
-            await self._process_messages(chan)
+            disconnected = await self._process_messages(chan)
-        except trio.Cancelled:
+        except (
            trio.Cancelled,
        ):
            log.cancel(f"Msg loop was cancelled for {chan}")
            raise
        finally:
            local_nursery = self._actoruid2nursery.get(uid, local_nursery)
            # This is set in ``Portal.cancel_actor()``. So if
            # the peer was cancelled we try to wait for them
            # to tear down their side of the connection before
            # moving on with closing our own side.
            local_nursery = self._actoruid2nursery.get(chan.uid)
            if (
                local_nursery
            ):
                log.cancel(f"Waiting on cancel request to peer {chan.uid}")
                # XXX: this is a soft wait on the channel (and its
-                # underlying transport protocol) to close from the remote
+                # underlying transport protocol) to close from the
-                # peer side since we presume that any channel which
+                # remote peer side since we presume that any channel
-                # is mapped to a sub-actor (i.e. it's managed by
+                # which is mapped to a sub-actor (i.e. it's managed by
-                # one of our local nurseries)
+                # one of our local nurseries) has a message is sent to
-                # message is sent to the peer likely by this actor which is
+                # the peer likely by this actor (which is now in
-                # now in a cancelled condition) when the local runtime here
+                # a cancelled condition) when the local runtime here is
-                # is now cancelled while (presumably) in the middle of msg
+                # now cancelled while (presumably) in the middle of msg
                # loop processing.
                with trio.move_on_after(0.5) as cs:
                    cs.shield = True
                    # Attempt to wait for the far end to close the channel
                    # and bail after timeout (2-generals on closure).
                    assert chan.msgstream
                    log.runtime(
                        f'Draining lingering msgs from stream {chan.msgstream}'
                    )
                    async for msg in chan.msgstream.drain():
                        # try to deliver any lingering msgs
                        # before we destroy the channel.
@ -609,6 +669,21 @@ class Actor:
                    await local_nursery.exited.wait()
                if disconnected:
                    # if the transport died and this actor is still
                    # registered within a local nursery, we report that the
                    # IPC layer may have failed unexpectedly since it may be
                    # the cause of other downstream errors.
                    entry = local_nursery._children.get(uid)
                    if entry:
                        _, proc, _ = entry
                        poll = getattr(proc, 'poll', None)
                        if poll and poll() is None:
                            log.cancel(
                                f'Actor {uid} IPC broke but proc is alive?'
                            )
            # ``Channel`` teardown and closure sequence
            # Drop ref to channel so it can be gc-ed and disconnected
@ -618,7 +693,7 @@ class Actor:
            if not chans:
                log.runtime(f"No more channels for {chan.uid}")
-                self._peers.pop(chan.uid, None)
+                self._peers.pop(uid, None)
                # for (uid, cid) in self._contexts.copy():
                #     if chan.uid == uid:
@ -626,11 +701,13 @@ class Actor:
            log.runtime(f"Peers is {self._peers}")
-            if not self._peers:  # no more channels connected
+            # No more channels to other actors (at all) registered
            # as connected.
            if not self._peers:
                log.runtime("Signalling no more peer channels")
                self._no_more_peers.set()
-            # # XXX: is this necessary (GC should do it?)
+            # XXX: is this necessary (GC should do it)?
            if chan.connected():
                # if the channel is still connected it may mean the far
                # end has not closed and we may have gotten here due to
@ -647,7 +724,7 @@ class Actor:
                    # await chan.aclose()
                except trio.BrokenResourceError:
-                    log.warning(f"Channel for {chan.uid} was already closed")
+                    log.runtime(f"Channel {chan.uid} was already closed")
    async def _push_result(
        self,
@ -665,7 +742,7 @@ class Actor:
            ctx = self._contexts[(uid, cid)]
        except KeyError:
            log.warning(
-                    f'Ignoring msg from [no-longer/un]known context with {uid}:'
+                f'Ignoring msg from [no-longer/un]known context {uid}:'
                f'\n{msg}')
            return
@ -813,7 +890,7 @@ class Actor:
        shield: bool = False,
        task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED,
-    ) -> None:
+    ) -> bool:
        '''
        Process messages for the channel async-RPC style.
@ -839,7 +916,7 @@ class Actor:
                    if msg is None:  # loop terminate sentinel
                        log.cancel(
-                            f"Channerl to {chan.uid} terminated?\n"
+                            f"Channel to {chan.uid} terminated?\n"
                            "Cancelling all associated tasks..")
                        for (channel, cid) in self._rpc_tasks.copy():
@ -881,14 +958,16 @@ class Actor:
                    log.runtime(
                        f"Processing request from {actorid}\n"
                        f"{ns}.{funcname}({kwargs})")
                    if ns == 'self':
                        func = getattr(self, funcname)
                        if funcname == 'cancel':
                            # don't start entire actor runtime
                            # cancellation if this actor is in debug
                            # mode
-                            pdb_complete = _debug._local_pdb_complete
+                            pdb_complete = _debug.Lock.local_pdb_complete
                            if pdb_complete:
                                await pdb_complete.wait()
@ -919,12 +998,22 @@ class Actor:
                                # ``_async_main()``
                                kwargs['chan'] = chan
                                log.cancel(
-                                    f'{self.uid} was remotely cancelled by\n'
+                                    f'Remote request to cancel task\n'
-                                    f'{chan.uid}!'
+                                    f'remote actor: {chan.uid}\n'
                                    f'task: {cid}'
                                )
                                try:
                                    await _invoke(
-                                    self, cid, chan, func, kwargs, is_rpc=False
+                                        self,
                                        cid,
                                        chan,
                                        func,
                                        kwargs,
                                        is_rpc=False,
                                    )
                                except BaseException:
                                    log.exception("failed to cancel task?")
                                continue
                    else:
                        # complain to client about restricted modules
@ -986,6 +1075,9 @@ class Actor:
            # up.
            log.runtime(f'channel from {chan.uid} closed abruptly:\n{chan}')
            # transport **was** disconnected
            return True
        except (Exception, trio.MultiError) as err:
            if nursery_cancelled_before_task:
                sn = self._service_n
@ -1010,6 +1102,9 @@ class Actor:
                f"Exiting msg loop for {chan} from {chan.uid} "
                f"with last msg:\n{msg}")
        # transport **was not** disconnected
        return False
    async def _from_parent(
        self,
        parent_addr: Optional[tuple[str, int]],
@ -1323,7 +1418,7 @@ class Actor:
            # kill any debugger request task to avoid deadlock
            # with the root actor in this tree
-            dbcs = _debug._debugger_request_cs
+            dbcs = _debug.Lock._debugger_request_cs
            if dbcs is not None:
                log.cancel("Cancelling active debugger request")
                dbcs.cancel()
@ -1356,12 +1451,14 @@ class Actor:
    #             n.cancel_scope.cancel()
    async def _cancel_task(self, cid, chan):
-        """Cancel a local task by call-id / channel.
+        '''
        Cancel a local task by call-id / channel.
        Note this method will be treated as a streaming function
        by remote actor-callers due to the declaration of ``ctx``
        in the signature (for now).
-        """
+
        '''
        # right now this is only implicitly called by
        # streaming IPC but it should be called
        # to cancel any remotely spawned task
--- a/tractor/_debug.py
+++ b/tractor/_debug.py
@ -18,8 +18,10 @@
 Multi-core debugging for da peeps!
 """
 from __future__ import annotations
 import bdb
 import sys
 import signal
 from functools import partial
 from contextlib import asynccontextmanager as acm
 from typing import (
@ -29,16 +31,18 @@ from typing import (
    AsyncIterator,
    AsyncGenerator,
 )
 from types import FrameType
 import tractor
 import trio
 from trio_typing import TaskStatus
 from .log import get_logger
 from . import _state
 from ._discovery import get_root
 from ._state import is_root_process, debug_mode
 from ._exceptions import is_multi_cancelled
 from ._ipc import Channel
 try:
    # wtf: only exported when installed in dev mode?
@ -46,7 +50,8 @@ try:
 except ImportError:
    # pdbpp is installed in regular mode...it monkey patches stuff
    import pdb
-    assert pdb.xpm, "pdbpp is not installed?"  # type: ignore
+    xpm = getattr(pdb, 'xpm', None)
    assert xpm, "pdbpp is not installed?"  # type: ignore
    pdbpp = pdb
 log = get_logger(__name__)
@ -55,102 +60,122 @@ log = get_logger(__name__)
 __all__ = ['breakpoint', 'post_mortem']
-# TODO: wrap all these in a static global class: ``DebugLock`` maybe?
+class Lock:
    '''
    Actor global debug lock state.
-# placeholder for function to set a ``trio.Event`` on debugger exit
+    Mostly to avoid a lot of ``global`` declarations for now XD.
 _pdb_release_hook: Optional[Callable] = None
-# actor-wide variable pointing to current task name using debugger
+    '''
-_local_task_in_debug: Optional[str] = None
+    # placeholder for function to set a ``trio.Event`` on debugger exit
    # pdb_release_hook: Optional[Callable] = None
-# actor tree-wide actor uid that supposedly has the tty lock
+    # actor-wide variable pointing to current task name using debugger
-_global_actor_in_debug: Optional[Tuple[str, str]] = None
+    local_task_in_debug: Optional[str] = None
-# lock in root actor preventing multi-access to local tty
+    # actor tree-wide actor uid that supposedly has the tty lock
-_debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock()
+    global_actor_in_debug: Optional[Tuple[str, str]] = None
 _local_pdb_complete: Optional[trio.Event] = None
 _no_remote_has_tty: Optional[trio.Event] = None
-# XXX: set by the current task waiting on the root tty lock
+    local_pdb_complete: Optional[trio.Event] = None
-# and must be cancelled if this actor is cancelled via message
+    no_remote_has_tty: Optional[trio.Event] = None
-# otherwise deadlocks with the parent actor may ensure
+
-_debugger_request_cs: Optional[trio.CancelScope] = None
+    # lock in root actor preventing multi-access to local tty
    _debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock()
    # XXX: set by the current task waiting on the root tty lock
    # and must be cancelled if this actor is cancelled via message
    # otherwise deadlocks with the parent actor may ensure
    _debugger_request_cs: Optional[trio.CancelScope] = None
    _orig_sigint_handler: Optional[Callable] = None
    @classmethod
    def shield_sigint(cls):
        cls._orig_sigint_handler = signal.signal(
                signal.SIGINT,
                shield_sigint,
            )
    @classmethod
    def unshield_sigint(cls):
        if cls._orig_sigint_handler is not None:
            # restore original sigint handler
            signal.signal(
                signal.SIGINT,
                cls._orig_sigint_handler
            )
        cls._orig_sigint_handler = None
    @classmethod
    def release(cls):
        try:
            cls._debug_lock.release()
        except RuntimeError:
            # uhhh makes no sense but been seeing the non-owner
            # release error even though this is definitely the task
            # that locked?
            owner = cls._debug_lock.statistics().owner
            if owner:
                raise
        # actor-local state, irrelevant for non-root.
        cls.global_actor_in_debug = None
        cls.local_task_in_debug = None
        try:
            # sometimes the ``trio`` might already be terminated in
            # which case this call will raise.
            cls.local_pdb_complete.set()
        finally:
            # restore original sigint handler
            cls.unshield_sigint()
 class TractorConfig(pdbpp.DefaultConfig):
-    """Custom ``pdbpp`` goodness.
+    '''
-    """
+    Custom ``pdbpp`` goodness.
    '''
    # use_pygments = True
    # sticky_by_default = True
    enable_hidden_frames = False
-class PdbwTeardown(pdbpp.Pdb):
+class MultiActorPdb(pdbpp.Pdb):
-    """Add teardown hooks to the regular ``pdbpp.Pdb``.
+    '''
-    """
+    Add teardown hooks to the regular ``pdbpp.Pdb``.
    '''
    # override the pdbpp config with our coolio one
    DefaultConfig = TractorConfig
    # def preloop(self):
    #     print('IN PRELOOP')
    #     super().preloop()
    # TODO: figure out how to disallow recursive .set_trace() entry
    # since that'll cause deadlock for us.
    def set_continue(self):
        try:
            super().set_continue()
        finally:
-            global _local_task_in_debug
+            Lock.release()
            _local_task_in_debug = None
            _pdb_release_hook()
    def set_quit(self):
        try:
            super().set_quit()
        finally:
-            global _local_task_in_debug
+            Lock.release()
            _local_task_in_debug = None
            _pdb_release_hook()
 # TODO: will be needed whenever we get to true remote debugging.
 # XXX see https://github.com/goodboy/tractor/issues/130
 # # TODO: is there some way to determine this programatically?
 # _pdb_exit_patterns = tuple(
 #     str.encode(patt + "\n") for patt in (
 #         'c', 'cont', 'continue', 'q', 'quit')
 # )
 # def subactoruid2proc(
 #     actor: 'Actor',  # noqa
 #     uid: Tuple[str, str]
 # ) -> trio.Process:
 #     n = actor._actoruid2nursery[uid]
 #     _, proc, _ = n._children[uid]
 #     return proc
 # async def hijack_stdin():
 #     log.info(f"Hijacking stdin from {actor.uid}")
 #     trap std in and relay to subproc
 #     async_stdin = trio.wrap_file(sys.stdin)
 #     async with aclosing(async_stdin):
 #         async for msg in async_stdin:
 #             log.runtime(f"Stdin input:\n{msg}")
 #             # encode to bytes
 #             bmsg = str.encode(msg)
 #             # relay bytes to subproc over pipe
 #             # await proc.stdin.send_all(bmsg)
 #             if bmsg in _pdb_exit_patterns:
 #                 log.info("Closing stdin hijack")
 #                 break
@acm
-async def _acquire_debug_lock(
+async def _acquire_debug_lock_from_root_task(
    uid: Tuple[str, str]
 ) -> AsyncIterator[trio.StrictFIFOLock]:
-    '''Acquire a root-actor local FIFO lock which tracks mutex access of
+    '''
    Acquire a root-actor local FIFO lock which tracks mutex access of
    the process tree's global debugger breakpoint.
    This lock avoids tty clobbering (by preventing multiple processes
@ -158,94 +183,86 @@ async def _acquire_debug_lock(
    to the ``pdb`` repl.
    '''
    global _debug_lock, _global_actor_in_debug, _no_remote_has_tty
    task_name = trio.lowlevel.current_task().name
-    log.debug(
+    log.runtime(
        f"Attempting to acquire TTY lock, remote task: {task_name}:{uid}"
    )
    we_acquired = False
    if _no_remote_has_tty is None:
        # mark the tty lock as being in use so that the runtime
        # can try to avoid clobbering any connection from a child
        # that's currently relying on it.
        _no_remote_has_tty = trio.Event()
    try:
-        log.debug(
+        log.runtime(
            f"entering lock checkpoint, remote task: {task_name}:{uid}"
        )
        we_acquired = True
-        await _debug_lock.acquire()
+        await Lock._debug_lock.acquire()
-        _global_actor_in_debug = uid
+        if Lock.no_remote_has_tty is None:
-        log.debug(f"TTY lock acquired, remote task: {task_name}:{uid}")
+            # mark the tty lock as being in use so that the runtime
            # can try to avoid clobbering any connection from a child
            # that's currently relying on it.
            Lock.no_remote_has_tty = trio.Event()
        Lock.global_actor_in_debug = uid
        log.runtime(f"TTY lock acquired, remote task: {task_name}:{uid}")
        # NOTE: critical section: this yield is unshielded!
        # IF we received a cancel during the shielded lock entry of some
        # next-in-queue requesting task, then the resumption here will
        # result in that ``trio.Cancelled`` being raised to our caller
-        # (likely from ``_hijack_stdin_for_child()`` below)!  In
+        # (likely from ``lock_tty_for_child()`` below)!  In
        # this case the ``finally:`` below should trigger and the
        # surrounding caller side context should cancel normally
        # relaying back to the caller.
-        yield _debug_lock
+        yield Lock._debug_lock
    finally:
-        # if _global_actor_in_debug == uid:
+        if (
-        if we_acquired and _debug_lock.locked():
+            we_acquired
-            _debug_lock.release()
+            and Lock._debug_lock.locked()
        ):
            Lock._debug_lock.release()
        # IFF there are no more requesting tasks queued up fire, the
        # "tty-unlocked" event thereby alerting any monitors of the lock that
        # we are now back in the "tty unlocked" state. This is basically
        # and edge triggered signal around an empty queue of sub-actor
        # tasks that may have tried to acquire the lock.
-        stats = _debug_lock.statistics()
+        stats = Lock._debug_lock.statistics()
        if (
            not stats.owner
        ):
-            log.debug(f"No more tasks waiting on tty lock! says {uid}")
+            log.runtime(f"No more tasks waiting on tty lock! says {uid}")
-            _no_remote_has_tty.set()
+            if Lock.no_remote_has_tty is not None:
-            _no_remote_has_tty = None
+                Lock.no_remote_has_tty.set()
                Lock.no_remote_has_tty = None
-        _global_actor_in_debug = None
+        Lock.global_actor_in_debug = None
-        log.debug(f"TTY lock released, remote task: {task_name}:{uid}")
+        log.runtime(
-
+            f"TTY lock released, remote task: {task_name}:{uid}"
 def handler(signum, frame, *args):
    """Specialized debugger compatible SIGINT handler.
    In childred we always ignore to avoid deadlocks since cancellation
    should always be managed by the parent supervising actor. The root
    is always cancelled on ctrl-c.
    """
    if is_root_process():
        tractor.current_actor().cancel_soon()
    else:
        print(
            "tractor ignores SIGINT while in debug mode\n"
            "If you have a special need for it please open an issue.\n"
        )
@tractor.context
-async def _hijack_stdin_for_child(
+async def lock_tty_for_child(
    ctx: tractor.Context,
    subactor_uid: Tuple[str, str]
 ) -> str:
    '''
-    Hijack the tty in the root process of an actor tree such that
+    Lock the TTY in the root process of an actor tree in a new
-    the pdbpp debugger console can be allocated to a sub-actor for repl
+    inter-actor-context-task such that the ``pdbpp`` debugger console
-    bossing.
+    can be mutex-allocated to the calling sub-actor for REPL control
    without interference by other processes / threads.
    NOTE: this task must be invoked in the root process of the actor
    tree. It is meant to be invoked as an rpc-task and should be
    highly reliable at releasing the mutex complete!
    '''
    task_name = trio.lowlevel.current_task().name
@ -259,68 +276,50 @@ async def _hijack_stdin_for_child(
    )
    log.debug(f"Actor {subactor_uid} is WAITING on stdin hijack lock")
-
+    Lock.shield_sigint()
    with trio.CancelScope(shield=True):
    try:
-            lock = None
+        with (
-            async with _acquire_debug_lock(subactor_uid) as lock:
+            trio.CancelScope(shield=True),
        ):
            async with _acquire_debug_lock_from_root_task(subactor_uid):
                # indicate to child that we've locked stdio
                await ctx.started('Locked')
-                log.debug(f"Actor {subactor_uid} acquired stdin hijack lock")
+                log.debug(
                    f"Actor {subactor_uid} acquired stdin hijack lock"
                )
                # wait for unlock pdb by child
                async with ctx.open_stream() as stream:
                    assert await stream.receive() == 'pdb_unlock'
                # try:
                #     assert await stream.receive() == 'pdb_unlock'
        except (
            # BaseException,
            trio.MultiError,
            trio.BrokenResourceError,
            trio.Cancelled,  # by local cancellation
            trio.ClosedResourceError,  # by self._rx_chan
        ) as err:
            # XXX: there may be a race with the portal teardown
            # with the calling actor which we can safely ignore.
            # The alternative would be sending an ack message
            # and allowing the client to wait for us to teardown
            # first?
            if lock and lock.locked():
                lock.release()
            if isinstance(err, trio.Cancelled):
                raise
        finally:
            log.debug(
                "TTY lock released, remote task:"
                f"{task_name}:{subactor_uid}")
        return "pdb_unlock_complete"
    finally:
        Lock.unshield_sigint()
 async def wait_for_parent_stdin_hijack(
    actor_uid: Tuple[str, str],
    task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED
 ):
    '''
-    Connect to the root actor via a ctx and invoke a task which locks
+    Connect to the root actor via a ``Context`` and invoke a task which
-    a root-local TTY lock.
+    locks a root-local TTY lock: ``lock_tty_for_child()``; this func
    should be called in a new task from a child actor **and never the
    root*.
    This function is used by any sub-actor to acquire mutex access to
-    pdb and the root's TTY for interactive debugging (see below inside
+    the ``pdb`` REPL and thus the root's TTY for interactive debugging
-    ``_breakpoint()``). It can be used to ensure that an intermediate
+    (see below inside ``_breakpoint()``). It can be used to ensure that
-    nursery-owning actor does not clobber its children if they are in
+    an intermediate nursery-owning actor does not clobber its children
-    debug (see below inside ``maybe_wait_for_debugger()``).
+    if they are in debug (see below inside
    ``maybe_wait_for_debugger()``).
    '''
    global _debugger_request_cs
    with trio.CancelScope(shield=True) as cs:
-        _debugger_request_cs = cs
+        Lock._debugger_request_cs = cs
        try:
            async with get_root() as portal:
@ -328,7 +327,7 @@ async def wait_for_parent_stdin_hijack(
                # this syncs to child's ``Context.started()`` call.
                async with portal.open_context(
-                    tractor._debug._hijack_stdin_for_child,
+                    tractor._debug.lock_tty_for_child,
                    subactor_uid=actor_uid,
                ) as (ctx, val):
@ -338,28 +337,45 @@ async def wait_for_parent_stdin_hijack(
                    async with ctx.open_stream() as stream:
                        # unblock local caller
                        task_status.started(cs)
                        try:
-                            assert _local_pdb_complete
+                            assert Lock.local_pdb_complete
-                            await _local_pdb_complete.wait()
+                            task_status.started(cs)
                            await Lock.local_pdb_complete.wait()
                        finally:
                            # TODO: shielding currently can cause hangs...
-                            with trio.CancelScope(shield=True):
+                            # with trio.CancelScope(shield=True):
                            await stream.send('pdb_unlock')
                        # sync with callee termination
                        assert await ctx.result() == "pdb_unlock_complete"
                log.pdb('unlocked context')
        except tractor.ContextCancelled:
            log.warning('Root actor cancelled debug lock')
        finally:
-            log.debug(f"Exiting debugger for actor {actor_uid}")
+            log.pdb(f"Exiting debugger for actor {actor_uid}")
-            global _local_task_in_debug
+            Lock.local_task_in_debug = None
-            _local_task_in_debug = None
+            log.pdb(f"Child {actor_uid} released parent stdio lock")
-            log.debug(f"Child {actor_uid} released parent stdio lock")
+
 def mk_mpdb() -> tuple[MultiActorPdb, Callable]:
    pdb = MultiActorPdb()
    # signal.signal = pdbpp.hideframe(signal.signal)
    Lock.shield_sigint()
    # XXX: These are the important flags mentioned in
    # https://github.com/python-trio/trio/issues/1155
    # which resolve the traceback spews to console.
    pdb.allow_kbdint = True
    pdb.nosigint = True
    return pdb, Lock.unshield_sigint
 async def _breakpoint(
@ -370,34 +386,39 @@ async def _breakpoint(
    # shield: bool = False
 ) -> None:
-    '''``tractor`` breakpoint entry for engaging pdb machinery
+    '''
-    in the root or a subactor.
+    Breakpoint entry for engaging debugger instance sync-interaction,
    from async code, executing in actor runtime (task).
    '''
    __tracebackhide__ = True
    pdb, undo_sigint = mk_mpdb()
    actor = tractor.current_actor()
    task_name = trio.lowlevel.current_task().name
    # TODO: is it possible to debug a trio.Cancelled except block?
    # right now it seems like we can kinda do with by shielding
    # around ``tractor.breakpoint()`` but not if we move the shielded
    # scope here???
    # with trio.CancelScope(shield=shield):
    #     await trio.lowlevel.checkpoint()
-    actor = tractor.current_actor()
+    if not Lock.local_pdb_complete or Lock.local_pdb_complete.is_set():
-    task_name = trio.lowlevel.current_task().name
+        Lock.local_pdb_complete = trio.Event()
    global _local_pdb_complete, _pdb_release_hook
    global _local_task_in_debug, _global_actor_in_debug
    await trio.lowlevel.checkpoint()
    if not _local_pdb_complete or _local_pdb_complete.is_set():
        _local_pdb_complete = trio.Event()
    # TODO: need a more robust check for the "root" actor
-    if actor._parent_chan and not is_root_process():
+    if (
        not is_root_process()
        and actor._parent_chan  # a connected child
    ):
-        if _local_task_in_debug:
+        if Lock.local_task_in_debug:
-            if _local_task_in_debug == task_name:
+
-                # this task already has the lock and is
+            # Recurrence entry case: this task already has the lock and
-                # likely recurrently entering a breakpoint
+            # is likely recurrently entering a breakpoint
            if Lock.local_task_in_debug == task_name:
                # noop on recurrent entry case
                return
            # if **this** actor is already in debug mode block here
@ -405,15 +426,12 @@ async def _breakpoint(
            # support for recursive entries to `tractor.breakpoint()`
            log.warning(f"{actor.uid} already has a debug lock, waiting...")
-            await _local_pdb_complete.wait()
+            await Lock.local_pdb_complete.wait()
            await trio.sleep(0.1)
        # mark local actor as "in debug mode" to avoid recurrent
        # entries/requests to the root process
-        _local_task_in_debug = task_name
+        Lock.local_task_in_debug = task_name
        # assign unlock callback for debugger teardown hooks
        _pdb_release_hook = _local_pdb_complete.set
        # this **must** be awaited by the caller and is done using the
        # root nursery so that the debugger can continue to run without
@ -423,101 +441,232 @@ async def _breakpoint(
        # we have to figure out how to avoid having the service nursery
        # cancel on this task start? I *think* this works below?
        # actor._service_n.cancel_scope.shield = shield
        try:
            with trio.CancelScope(shield=True):
                await actor._service_n.start(
                    wait_for_parent_stdin_hijack,
                    actor.uid,
                )
        except RuntimeError:
            Lock.release()
            raise
    elif is_root_process():
        # we also wait in the root-parent for any child that
        # may have the tty locked prior
        global _debug_lock
        # TODO: wait, what about multiple root tasks acquiring it though?
-        # root process (us) already has it; ignore
+        if Lock.global_actor_in_debug == actor.uid:
-        if _global_actor_in_debug == actor.uid:
+            # re-entrant root process already has it: noop.
            return
        # XXX: since we need to enter pdb synchronously below,
        # we have to release the lock manually from pdb completion
        # callbacks. Can't think of a nicer way then this atm.
-        if _debug_lock.locked():
+        if Lock._debug_lock.locked():
            log.warning(
                'Root actor attempting to shield-acquire active tty lock'
-                f' owned by {_global_actor_in_debug}')
+                f' owned by {Lock.global_actor_in_debug}')
            # must shield here to avoid hitting a ``Cancelled`` and
            # a child getting stuck bc we clobbered the tty
            with trio.CancelScope(shield=True):
-                await _debug_lock.acquire()
+                await Lock._debug_lock.acquire()
        else:
            # may be cancelled
-            await _debug_lock.acquire()
+            await Lock._debug_lock.acquire()
-        _global_actor_in_debug = actor.uid
+        Lock.global_actor_in_debug = actor.uid
-        _local_task_in_debug = task_name
+        Lock.local_task_in_debug = task_name
        # the lock must be released on pdb completion
        def teardown():
            global _local_pdb_complete, _debug_lock
            global _global_actor_in_debug, _local_task_in_debug
            _debug_lock.release()
            _global_actor_in_debug = None
            _local_task_in_debug = None
            _local_pdb_complete.set()
        _pdb_release_hook = teardown
    try:
        # block here one (at the appropriate frame *up*) where
        # ``breakpoint()`` was awaited and begin handling stdio.
        log.debug("Entering the synchronous world of pdb")
-    debug_func(actor)
+        debug_func(actor, pdb)
    except bdb.BdbQuit:
        Lock.release()
        raise
    # XXX: apparently we can't do this without showing this frame
    # in the backtrace on first entry to the REPL? Seems like an odd
    # behaviour that should have been fixed by now. This is also why
    # we scrapped all the @cm approaches that were tried previously.
    # finally:
    #     __tracebackhide__ = True
    #     # frame = sys._getframe()
    #     # last_f = frame.f_back
    #     # last_f.f_globals['__tracebackhide__'] = True
    #     # signal.signal = pdbpp.hideframe(signal.signal)
    #     signal.signal(
    #         signal.SIGINT,
    #         orig_handler
    #     )
-def _mk_pdb() -> PdbwTeardown:
+def shield_sigint(
    signum: int,
    frame: 'frame',  # type: ignore # noqa
    pdb_obj: Optional[MultiActorPdb] = None,
    *args,
-    # XXX: setting these flags on the pdb instance are absolutely
+) -> None:
-    # critical to having ctrl-c work in the ``trio`` standard way!  The
+    '''
-    # stdlib's pdb supports entering the current sync frame on a SIGINT,
+    Specialized debugger compatible SIGINT handler.
    # with ``trio`` we pretty much never want this and if we did we can
    # handle it in the ``tractor`` task runtime.
-    pdb = PdbwTeardown()
+    In childred we always ignore to avoid deadlocks since cancellation
-    pdb.allow_kbdint = True
+    should always be managed by the parent supervising actor. The root
-    pdb.nosigint = True
+    is always cancelled on ctrl-c.
-    return pdb
+    '''
    __tracebackhide__ = True
    uid_in_debug = Lock.global_actor_in_debug
-def _set_trace(actor=None):
+    actor = tractor.current_actor()
    pdb = _mk_pdb()
-    if actor is not None:
+    def do_cancel():
-        log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")
+        # If we haven't tried to cancel the runtime then do that instead
        # of raising a KBI (which may non-gracefully destroy
        # a ``trio.run()``).
        if not actor._cancel_called:
            actor.cancel_soon()
-        pdb.set_trace(
+        # If the runtime is already cancelled it likely means the user
-            # start 2 levels up in user code
+        # hit ctrl-c again because teardown didn't full take place in
-            frame=sys._getframe().f_back.f_back,
+        # which case we do the "hard" raising of a local KBI.
        else:
            raise KeyboardInterrupt
    any_connected = False
    if uid_in_debug is not None:
        # try to see if the supposed (sub)actor in debug still
        # has an active connection to *this* actor, and if not
        # it's likely they aren't using the TTY lock / debugger
        # and we should propagate SIGINT normally.
        chans = actor._peers.get(tuple(uid_in_debug))
        if chans:
            any_connected = any(chan.connected() for chan in chans)
            if not any_connected:
                log.warning(
                    'A global actor reported to be in debug '
                    'but no connection exists for this child:\n'
                    f'{uid_in_debug}\n'
                    'Allowing SIGINT propagation..'
                )
                return do_cancel()
    # root actor branch that reports whether or not a child
    # has locked debugger.
    if (
        is_root_process()
        and uid_in_debug is not None
        # XXX: only if there is an existing connection to the
        # (sub-)actor in debug do we ignore SIGINT in this
        # parent! Otherwise we may hang waiting for an actor
        # which has already terminated to unlock.
        and any_connected
    ):
        name = uid_in_debug[0]
        if name != 'root':
            log.pdb(
                f"Ignoring SIGINT while child in debug mode: `{uid_in_debug}`"
            )
        else:
-        # we entered the global ``breakpoint()`` built-in from sync code
+            log.pdb(
-        global _local_task_in_debug, _pdb_release_hook
+                "Ignoring SIGINT while in debug mode"
        _local_task_in_debug = 'sync'
        def nuttin():
            pass
        _pdb_release_hook = nuttin
        pdb.set_trace(
            # start 2 levels up in user code
            frame=sys._getframe().f_back,
            )
    # child actor that has locked the debugger
    elif not is_root_process():
        chan: Channel = actor._parent_chan
        if not chan or not chan.connected():
            log.warning(
                'A global actor reported to be in debug '
                'but no connection exists for its parent:\n'
                f'{uid_in_debug}\n'
                'Allowing SIGINT propagation..'
            )
            return do_cancel()
        task = Lock.local_task_in_debug
        if task:
            log.pdb(
                f"Ignoring SIGINT while task in debug mode: `{task}`"
            )
        # TODO: how to handle the case of an intermediary-child actor
        # that **is not** marked in debug mode? See oustanding issue:
        # https://github.com/goodboy/tractor/issues/320
        # elif debug_mode():
    else:
        log.pdb(
            "Ignoring SIGINT since debug mode is enabled"
        )
    # NOTE: currently (at least on ``fancycompleter`` 0.9.2)
    # it lookks to be that the last command that was run (eg. ll)
    # will be repeated by default.
    # TODO: maybe redraw/print last REPL output to console
    if (
        pdb_obj
        and sys.version_info <= (3, 10)
    ):
        # TODO: make this work like sticky mode where if there is output
        # detected as written to the tty we redraw this part underneath
        # and erase the past draw of this same bit above?
        # pdb_obj.sticky = True
        # pdb_obj._print_if_sticky()
        # also see these links for an approach from ``ptk``:
        # https://github.com/goodboy/tractor/issues/130#issuecomment-663752040
        # https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py
        # XXX: lol, see ``pdbpp`` issue:
        # https://github.com/pdbpp/pdbpp/issues/496
        # TODO: pretty sure this is what we should expect to have to run
        # in total but for now we're just going to wait until `pdbpp`
        # figures out it's own stuff on 3.10 (and maybe we'll help).
        # pdb_obj.do_longlist(None)
        # XXX: we were doing this but it shouldn't be required..
        print(pdb_obj.prompt, end='', flush=True)
 def _set_trace(
    actor: Optional[tractor._actor.Actor] = None,
    pdb: Optional[MultiActorPdb] = None,
 ):
    __tracebackhide__ = True
    actor = actor or tractor.current_actor()
    # start 2 levels up in user code
    frame: Optional[FrameType] = sys._getframe()
    if frame:
        frame = frame.f_back  # type: ignore
    if frame and pdb and actor is not None:
        log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")
        # no f!#$&* idea, but when we're in async land
        # we need 2x frames up?
        frame = frame.f_back
    else:
        pdb, undo_sigint = mk_mpdb()
        # we entered the global ``breakpoint()`` built-in from sync code?
        Lock.local_task_in_debug = 'sync'
    pdb.set_trace(frame=frame)
 breakpoint = partial(
    _breakpoint,
@ -525,11 +674,24 @@ breakpoint = partial(
 )
-def _post_mortem(actor):
+def _post_mortem(
-    log.pdb(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")
+    actor: tractor._actor.Actor,
-    pdb = _mk_pdb()
+    pdb: MultiActorPdb,
 ) -> None:
    '''
    Enter the ``pdbpp`` port mortem entrypoint using our custom
    debugger instance.
    '''
    log.pdb(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")
    # TODO: you need ``pdbpp`` master (at least this commit
    # https://github.com/pdbpp/pdbpp/commit/b757794857f98d53e3ebbe70879663d7d843a6c2)
    # to fix this and avoid the hang it causes. See issue:
    # https://github.com/pdbpp/pdbpp/issues/480
    # TODO: help with a 3.10+ major release if/when it arrives.
    # custom Pdb post-mortem entry
    pdbpp.xpm(Pdb=lambda: pdb)
@ -560,6 +722,7 @@ async def _maybe_enter_pm(err):
    ):
        log.debug("Actor crashed, entering debug mode")
        await post_mortem()
        Lock.release()
        return True
    else:
@ -575,7 +738,7 @@ async def acquire_debug_lock(
    This helper is for actor's who don't actually need
    to acquired the debugger but want to wait until the
-    lock is free in the tree root.
+    lock is free in the process-tree root.
    '''
    if not debug_mode():
@ -604,8 +767,6 @@ async def maybe_wait_for_debugger(
    if (
        is_root_process()
    ):
        global _no_remote_has_tty, _global_actor_in_debug, _wait_all_tasks_lock
        # If we error in the root but the debugger is
        # engaged we don't want to prematurely kill (and
        # thus clobber access to) the local tty since it
@ -617,11 +778,10 @@ async def maybe_wait_for_debugger(
        for _ in range(poll_steps):
-            if _global_actor_in_debug:
+            if Lock.global_actor_in_debug:
-                sub_in_debug = tuple(_global_actor_in_debug)
+                sub_in_debug = tuple(Lock.global_actor_in_debug)
-            log.debug(
+            log.debug('Root polling for debug')
                'Root polling for debug')
            with trio.CancelScope(shield=True):
                await trio.sleep(poll_delay)
@ -632,7 +792,7 @@ async def maybe_wait_for_debugger(
                # XXX: doesn't seem to work
                # await trio.testing.wait_all_tasks_blocked(cushion=0)
-                debug_complete = _no_remote_has_tty
+                debug_complete = Lock.no_remote_has_tty
                if (
                    (debug_complete and
                     not debug_complete.is_set())
--- a/tractor/_portal.py
+++ b/tractor/_portal.py
@ -24,7 +24,8 @@ import importlib
 import inspect
 from typing import (
    Any, Optional,
-    Callable, AsyncGenerator
+    Callable, AsyncGenerator,
    Type,
 )
 from functools import partial
 from dataclasses import dataclass
@ -442,6 +443,10 @@ class Portal:
        _err: Optional[BaseException] = None
        ctx._portal = self
        uid = self.channel.uid
        cid = ctx.cid
        etype: Optional[Type[BaseException]] = None
        # deliver context instance and .started() msg value in open tuple.
        try:
            async with trio.open_nursery() as scope_nursery:
@ -477,13 +482,24 @@ class Portal:
            # KeyboardInterrupt,
        ) as err:
-            _err = err
+            etype = type(err)
            # the context cancels itself on any cancel
            # causing error.
            log.cancel(
                f'Context to {self.channel.uid} sending cancel request..')
            if ctx.chan.connected():
                log.cancel(
                    'Context cancelled for task, sending cancel request..\n'
                    f'task:{cid}\n'
                    f'actor:{uid}'
                )
                await ctx.cancel()
            else:
                log.warning(
                    'IPC connection for context is broken?\n'
                    f'task:{cid}\n'
                    f'actor:{uid}'
                )
            raise
        finally:
@ -492,6 +508,12 @@ class Portal:
            # sure we get the error the underlying feeder mem chan.
            # if it's not raised here it *should* be raised from the
            # msg loop nursery right?
            if ctx.chan.connected():
                log.info(
                    'Waiting on final context-task result for\n'
                    f'task: {cid}\n'
                    f'actor: {uid}'
                )
                result = await ctx.result()
            # though it should be impossible for any tasks
@ -502,14 +524,17 @@ class Portal:
                # should we encapsulate this in the context api?
                await ctx._recv_chan.aclose()
-            if _err:
+            if etype:
                if ctx._cancel_called:
                    log.cancel(
-                        f'Context {fn_name} cancelled by caller with\n{_err}'
+                        f'Context {fn_name} cancelled by caller with\n{etype}'
                    )
                elif _err is not None:
                    log.cancel(
-                        f'Context {fn_name} cancelled by callee with\n{_err}'
+                        f'Context for task cancelled by callee with {etype}\n'
                        f'target: `{fn_name}`\n'
                        f'task:{cid}\n'
                        f'actor:{uid}'
                    )
            else:
                log.runtime(
@ -517,6 +542,17 @@ class Portal:
                    f'value from callee `{result}`'
                )
            # XXX: (MEGA IMPORTANT) if this is a root opened process we
            # wait for any immediate child in debug before popping the
            # context from the runtime msg loop otherwise inside
            # ``Actor._push_result()`` the msg will be discarded and in
            # the case where that msg is global debugger unlock (via
            # a "stop" msg for a stream), this can result in a deadlock
            # where the root is waiting on the lock to clear but the
            # child has already cleared it and clobbered IPC.
            from ._debug import maybe_wait_for_debugger
            await maybe_wait_for_debugger()
            # remove the context from runtime tracking
            self.actor._contexts.pop((self.channel.uid, ctx.cid))
--- a/tractor/_root.py
+++ b/tractor/_root.py
@ -103,13 +103,7 @@ async def open_root_actor(
        _default_arbiter_port,
    )
-    if loglevel is None:
+    loglevel = (loglevel or log._default_loglevel).upper()
        loglevel = log.get_loglevel()
    else:
        log._default_loglevel = loglevel
        log.get_console_log(loglevel)
    assert loglevel
    if debug_mode and _spawn._spawn_method == 'trio':
        _state._runtime_vars['_debug_mode'] = True
@ -124,7 +118,7 @@ async def open_root_actor(
            logging.getLevelName(
                # lul, need the upper case for the -> int map?
                # sweet "dynamic function behaviour" stdlib...
-                loglevel.upper()
+                loglevel,
            ) > logging.getLevelName('PDB')
        ):
            loglevel = 'PDB'
@ -134,19 +128,24 @@ async def open_root_actor(
            "Debug mode is only supported for the `trio` backend!"
        )
-    # make a temporary connection to see if an arbiter exists
+    log.get_console_log(loglevel)
    arbiter_found = False
    try:
        # make a temporary connection to see if an arbiter exists,
        # if one can't be made quickly we assume none exists.
        arbiter_found = False
        # TODO: this connect-and-bail forces us to have to carefully
        # rewrap TCP 104-connection-reset errors as EOF so as to avoid
        # propagating cancel-causing errors to the channel-msg loop
        # machinery.  Likely it would be better to eventually have
        # a "discovery" protocol with basic handshake instead.
        with trio.move_on_after(1):
            async with _connect_chan(host, port):
                arbiter_found = True
    except OSError:
        # TODO: make this a "discovery" log level?
        logger.warning(f"No actor could be found @ {host}:{port}")
    # create a local actor and start up its main routine/task
@ -216,7 +215,8 @@ async def open_root_actor(
            finally:
                # NOTE: not sure if we'll ever need this but it's
                # possibly better for even more determinism?
-                # logger.cancel(f'Waiting on {len(nurseries)} nurseries in root..')
+                # logger.cancel(
                #     f'Waiting on {len(nurseries)} nurseries in root..')
                # nurseries = actor._actoruid2nursery.values()
                # async with trio.open_nursery() as tempn:
                #     for an in nurseries:
--- a/tractor/_spawn.py
+++ b/tractor/_spawn.py
@ -307,7 +307,8 @@ async def new_proc(
        proc: Optional[trio.Process] = None
        try:
            try:
-                proc = await trio.open_process(spawn_cmd)
+                # TODO: needs ``trio_typing`` patch?
                proc = await trio.lowlevel.open_process(spawn_cmd)  # type: ignore
                log.runtime(f"Started {proc}")
@ -334,6 +335,9 @@ async def new_proc(
                                    await proc.wait()
                raise
            # a sub-proc ref **must** exist now
            assert proc
            portal = Portal(chan)
            actor_nursery._children[subactor.uid] = (
                subactor, proc, portal)
--- a/tractor/_streaming.py
+++ b/tractor/_streaming.py
@ -601,10 +601,18 @@ class Context:
            finally:
                if self._portal:
-                    self._portal._streams.remove(rchan)
+                    try:
                        self._portal._streams.remove(stream)
                    except KeyError:
                        log.warning(
                            f'Stream was already destroyed?\n'
                            f'actor: {self.chan.uid}\n'
                            f'ctx id: {self.cid}'
                        )
    async def result(self) -> Any:
-        '''From a caller side, wait for and return the final result from
+        '''
        From a caller side, wait for and return the final result from
        the callee side task.
        '''
--- a/tractor/to_asyncio.py
+++ b/tractor/to_asyncio.py
@ -271,7 +271,12 @@ def _run_asyncio_task(
            task.exception()
        except BaseException as terr:
            task_err = terr
            if isinstance(terr, CancelledError):
                log.cancel(f'`asyncio` task cancelled: {task.get_name()}')
            else:
                log.exception(f'`asyncio` task: {task.get_name()} errored')
            assert type(terr) is type(aio_err), 'Asyncio task error mismatch?'
        if aio_err is not None: