Use `._entry` proto-ed "lifetime ops" in logging

As per a WIP scribbled out TODO in `._entry.nest_from_op()`, change a bunch of "supervisor/lifetime mgmt ops" related log messages to contain some supervisor-annotation "headers" in an effort to give a terser "visual indication" of how some execution/scope/storage primitive entity (like an actor/task/ctx/connection) is being operated on (like, opening/started/closed/cancelled/erroring) from a "supervisor action" POV. Also tweak a bunch more emissions to lower levels to reduce noise around normal inter-actor operations like process and IPC ctx supervision.
2024-07-02 16:31:58 -04:00 · 2024-07-02 16:31:58 -04:00 · b46400a86f
parent 02812b9f51
commit b46400a86f
4 changed files with 62 additions and 33 deletions
--- a/tractor/_context.py
+++ b/tractor/_context.py
@ -933,11 +933,12 @@ class Context:
        self.cancel_called = True

        header: str = (
-            f'Cancelling ctx with peer from {side.upper()} side\n\n'
+            f'Cancelling ctx from {side.upper()}-side\n'
        )
        reminfo: str = (
            # ' =>\n'
-            f'Context.cancel() => {self.chan.uid}\n'
+            # f'Context.cancel() => {self.chan.uid}\n'
+            f'c)=> {self.chan.uid}\n'
            # f'{self.chan.uid}\n'
            f' |_ @{self.dst_maddr}\n'
            f'    >> {self.repr_rpc}\n'
@ -1267,6 +1268,12 @@ class Context:

    @property
    def maybe_error(self) -> BaseException|None:
+        '''
+        Return the (remote) error as outcome or `None`.
+
+        Remote errors take precedence over local ones.
+
+        '''
        le: BaseException|None = self._local_error
        re: RemoteActorError|ContextCancelled|None = self._remote_error

@ -2182,9 +2189,16 @@ async def open_context_from_portal(
        # handled in the block above ^^^ !!
        # await _debug.pause()
        # log.cancel(
-        log.exception(
-            f'{ctx.side}-side of `Context` terminated with '
-            f'.outcome => {ctx.repr_outcome()}\n'
+        match scope_err:
+            case trio.Cancelled:
+                logmeth = log.cancel
+
+            # XXX explicitly report on any non-graceful-taskc cases
+            case _:
+                logmeth = log.exception
+
+        logmeth(
+            f'ctx {ctx.side!r}-side exited with {ctx.repr_outcome()}\n'
        )

        if debug_mode():
--- a/tractor/_entry.py
+++ b/tractor/_entry.py
@ -265,7 +265,7 @@ def _trio_main(
    except BaseException as err:
        logmeth = log.error
        exit_status: str = (
-            'Main actor task crashed during exit?\n'
+            'Main actor task exited due to crash?\n'
            +
            nest_from_op(
                input_op='x)>',  # closed by error
--- a/tractor/_runtime.py
+++ b/tractor/_runtime.py
@ -66,10 +66,11 @@ from trio import (
 )

 from tractor.msg import (
-    pretty_struct,
-    NamespacePath,
-    types as msgtypes,
    MsgType,
+    NamespacePath,
+    Stop,
+    pretty_struct,
+    types as msgtypes,
 )
 from ._ipc import Channel
 from ._context import (
@ -545,7 +546,8 @@ class Actor:
            ):
                log.cancel(
                    'Waiting on cancel request to peer\n'
-                    f'`Portal.cancel_actor()` => {chan.uid}\n'
+                    f'c)=>\n'
+                    f' |_{chan.uid}\n'
                )

                # XXX: this is a soft wait on the channel (and its
@ -642,12 +644,14 @@ class Actor:
                        # and
                        an_exit_cs.cancelled_caught
                    ):
-                        log.warning(
+                        report: str = (
                            'Timed out waiting on local actor-nursery to exit?\n'
                            f'{local_nursery}\n'
-                            f' |_{pformat(local_nursery._children)}\n'
                        )
-                        # await _debug.pause()
+                        if children := local_nursery._children:
+                            report += f' |_{pformat(children)}\n'
+
+                        log.warning(report)

                if disconnected:
                    # if the transport died and this actor is still
@ -819,14 +823,17 @@ class Actor:
                # side,
            )]
        except KeyError:
-            log.warning(
+            report: str = (
                'Ignoring invalid IPC ctx msg!\n\n'
-                f'<= sender: {uid}\n\n'
-                # XXX don't need right since it's always in msg?
-                # f'=> cid: {cid}\n\n'
-
-                f'{pretty_struct.pformat(msg)}\n'
+                f'<=? {uid}\n\n'
+                f'  |_{pretty_struct.pformat(msg)}\n'
            )
+            match msg:
+                case Stop():
+                    log.runtime(report)
+                case _:
+                    log.warning(report)
+
            return

        # if isinstance(msg, MsgTypeError):
@ -1338,10 +1345,11 @@ class Actor:
            return True

        log.cancel(
-            'Cancel request for RPC task\n\n'
-            f'<= Actor._cancel_task(): {requesting_uid}\n\n'
-            f'=> {ctx._task}\n'
-            f'  |_ >> {ctx.repr_rpc}\n'
+            'Rxed cancel request for RPC task\n'
+            f'<=c) {requesting_uid}\n'
+            f'  |_{ctx._task}\n'
+            f'    >> {ctx.repr_rpc}\n'
+            # f'=> {ctx._task}\n'
            # f'  >> Actor._cancel_task() => {ctx._task}\n'
            # f'  |_ {ctx._task}\n\n'

--- a/tractor/_spawn.py
+++ b/tractor/_spawn.py
@ -246,7 +246,8 @@ async def hard_kill(

    '''
    log.cancel(
-        'Terminating sub-proc:\n'
+        'Terminating sub-proc\n'
+        f'>x)\n'
        f' |_{proc}\n'
    )
    # NOTE: this timeout used to do nothing since we were shielding
@ -293,7 +294,7 @@ async def hard_kill(
        log.critical(
            # 'Well, the #ZOMBIE_LORD_IS_HERE# to collect\n'
            '#T-800 deployed to collect zombie B0\n'
-            f'|\n'
+            f'>x)\n'
            f' |_{proc}\n'
        )
        proc.kill()
@ -322,7 +323,8 @@ async def soft_kill(
    uid: tuple[str, str] = portal.channel.uid
    try:
        log.cancel(
-            'Soft killing sub-actor via `Portal.cancel_actor()`\n'
+            'Soft killing sub-actor via portal request\n'
+            f'c)> {portal.chan.uid}\n'
            f' |_{proc}\n'
        )
        # wait on sub-proc to signal termination
@ -552,8 +554,9 @@ async def trio_proc(
            # cancel result waiter that may have been spawned in
            # tandem if not done already
            log.cancel(
-                'Cancelling existing result waiter task for '
-                f'{subactor.uid}'
+                'Cancelling portal result reaper task\n'
+                f'>c)\n'
+                f' |_{subactor.uid}\n'
            )
            nursery.cancel_scope.cancel()

@ -562,7 +565,11 @@ async def trio_proc(
        # allowed! Do this **after** cancellation/teardown to avoid
        # killing the process too early.
        if proc:
-            log.cancel(f'Hard reap sequence starting for {subactor.uid}')
+            log.cancel(
+                f'Hard reap sequence starting for subactor\n'
+                f'>x)\n'
+                f' |_{subactor}@{subactor.uid}\n'
+            )

            with trio.CancelScope(shield=True):
                # don't clobber an ongoing pdb