3 changed files with 44 additions and 105 deletions
--- a/tractor/_runtime.py
+++ b/tractor/_runtime.py
@ -444,7 +444,7 @@ class Actor:
            # inside ``open_root_actor()`` where there is a check for
            # a bound listener on the "arbiter" addr.  the reset will be
            # because the handshake was never meant took place.
-            log.runtime(
+            log.warning(
                con_status
                +
                ' -> But failed to handshake? Ignoring..\n'
@ -520,29 +520,8 @@ class Actor:
            # the peer was cancelled we try to wait for them
            # to tear down their side of the connection before
            # moving on with closing our own side.
-            if (
+            if local_nursery:
-                local_nursery
+                if chan._cancel_called:
                and (
                    self._cancel_called
                    or
                    chan._cancel_called
                )
                #
                # ^-TODO-^ along with this is there another condition
                # that we should filter with to avoid entering this
                # waiting block needlessly?
                # -[ ] maybe `and local_nursery.cancelled` and/or
                #     only if the `._children` table is empty or has
                #     only `Portal`s with .chan._cancel_called ==
                #     True` as per what we had below; the MAIN DIFF
                #     BEING that just bc one `Portal.cancel_actor()`
                #     was called, doesn't mean the whole actor-nurse
                #     is gonna exit any time soon right!?
                #
                # or
                # all(chan._cancel_called for chan in chans)
            ):
                    log.cancel(
                        'Waiting on cancel request to peer\n'
                        f'`Portal.cancel_actor()` => {chan.uid}\n'
@ -551,19 +530,14 @@ class Actor:
                # XXX: this is a soft wait on the channel (and its
                # underlying transport protocol) to close from the
                # remote peer side since we presume that any channel
-                # which is mapped to a sub-actor (i.e. it's managed
+                # which is mapped to a sub-actor (i.e. it's managed by
-                # by local actor-nursery) has a message that is sent
+                # one of our local nurseries) has a message is sent to
-                # to the peer likely by this actor (which may be in
+                # the peer likely by this actor (which is now in
-                # a shutdown sequence due to cancellation) when the
+                # a cancelled condition) when the local runtime here is
-                # local runtime here is now cancelled while
+                # now cancelled while (presumably) in the middle of msg
-                # (presumably) in the middle of msg loop processing.
+                # loop processing.
-                chan_info: str = (
+                with trio.move_on_after(0.5) as cs:
-                    f'{chan.uid}\n'
+                    cs.shield = True
                    f'|_{chan}\n'
                    f'  |_{chan.transport}\n\n'
                )
                with trio.move_on_after(0.5) as drain_cs:
                    drain_cs.shield = True
                    # attempt to wait for the far end to close the
                    # channel and bail after timeout (a 2-generals
@ -580,7 +554,10 @@ class Actor:
                        # TODO: factor this into a helper?
                        log.warning(
                            'Draining msg from disconnected peer\n'
-                            f'{chan_info}'
+                            f'{chan.uid}\n'
                            f'|_{chan}\n'
                            f'  |_{chan.transport}\n\n'
                            f'{pformat(msg)}\n'
                        )
                        # cid: str|None = msg.get('cid')
@ -592,60 +569,29 @@ class Actor:
                                cid,
                                msg,
                            )
                if drain_cs.cancelled_caught:
                    log.warning(
                        'Timed out waiting on IPC transport channel to drain?\n'
                        f'{chan_info}'
                    )
-                # XXX NOTE XXX when no explicit call to
+                    # NOTE: when no call to `open_root_actor()` was
-                # `open_root_actor()` was made by the application
+                    # made, we implicitly make that call inside
-                # (normally we implicitly make that call inside
+                    # the first `.open_nursery()`, in this case we
-                # the first `.open_nursery()` in root-actor
+                    # can assume that we are the root actor and do
-                # user/app code), we can assume that either we
+                    # not have to wait for the nursery-enterer to
-                # are NOT the root actor or are root but the
+                    # exit before shutting down the actor runtime.
                # runtime was started manually. and thus DO have
                # to wait for the nursery-enterer to exit before
                # shutting down the local runtime to avoid
                # clobbering any ongoing subactor
                # teardown/debugging/graceful-cancel.
                    #
                    # see matching  note inside `._supervise.open_nursery()`
-                #
+                    if not local_nursery._implicit_runtime_started:
                # TODO: should we have a separate cs + timeout
                # block here?
                if (
                    # XXX SO either,
                    #  - not root OR,
                    #  - is root but `open_root_actor()` was
                    #    entered manually (in which case we do
                    #    the equiv wait there using the
                    #    `devx._debug` sub-sys APIs).
                    not local_nursery._implicit_runtime_started
                ):
                        log.runtime(
                            'Waiting on local actor nursery to exit..\n'
                            f'|_{local_nursery}\n'
                        )
                    with trio.move_on_after(0.5) as an_exit_cs:
                        an_exit_cs.shield = True
                        await local_nursery.exited.wait()
                    # TODO: currently this is always triggering for every
                    # sub-daemon spawned from the `piker.services._mngr`?
                    # -[ ] how do we ensure that the IPC is supposed to
                    #      be long lived and isn't just a register?
                    # |_ in the register case how can we signal that the
                    #    ephemeral msg loop was intentional?
                if (
-                        # not local_nursery._implicit_runtime_started
+                    cs.cancelled_caught
-                        # and
+                    and not local_nursery._implicit_runtime_started
                        an_exit_cs.cancelled_caught
                ):
                    log.warning(
-                            'Timed out waiting on local actor-nursery to exit?\n'
+                        'Failed to exit local actor nursery?\n'
-                            f'{local_nursery}\n'
+                        f'|_{local_nursery}\n'
                            f' |_{pformat(local_nursery._children)}\n'
                    )
                    # await _debug.pause()
--- a/tractor/to_asyncio.py
+++ b/tractor/to_asyncio.py
@ -577,18 +577,14 @@ def run_as_asyncio_guest(
                log.runtime(f"trio_main finished: {main_outcome!r}")
        # start the infection: run trio on the asyncio loop in "guest mode"
-        log.runtime(
+        log.info(f"Infecting asyncio process with {trio_main}")
            'Infecting `asyncio`-process with a `trio` guest-run of\n\n'
            f'{trio_main!r}\n\n'
            f'{trio_done_callback}\n'
        )
        trio.lowlevel.start_guest_run(
            trio_main,
            run_sync_soon_threadsafe=loop.call_soon_threadsafe,
            done_callback=trio_done_callback,
        )
-        # NOTE `.unwrap()` will raise on error
+        # ``.unwrap()`` will raise here on error
        return (await trio_done_fut).unwrap()
    # might as well if it's installed.
--- a/tractor/trionics/_mngrs.py
+++ b/tractor/trionics/_mngrs.py
@ -271,11 +271,8 @@ async def maybe_open_context(
        yield False, yielded
    else:
        log.info(f'Reusing _Cached resource for {ctx_key}')
        _Cache.users += 1
        log.runtime(
            f'Reusing resource for `_Cache` user {_Cache.users}\n\n'
            f'{ctx_key!r} -> {yielded!r}\n'
        )
        lock.release()
        yield True, yielded