Add `shield: bool` support to `.pause()`

It's been on the todo for a while and I've given up trying to properly hide the `trio.CancelScope.__exit__()` frame for now instead opting to just `log.pdb()` a big apology XD Users can obvi still just not use the flag and wrap `tractor.pause()` in their own cs block if they want to avoid having to hit `'up'` in the pdb REPL if needed in a cancelled task-scope. Impl deatz: - factor orig `.pause()` impl into new `._pause()` so that we can more tersely wrap the original content depending on `shield: bool` input; only open the cancel-scope when shield is set to avoid aforemented extra strack frame annoyance. - pass through `shield` to underlying `_pause` and `debug_func()` so we can actually know when so log our apology. - add a buncha notes to new `.pause()` wrapper regarding the inability to hide the cancel-scope `.__exit__()`, inluding that overriding the code in `trio._core._run.CancelScope` doesn't seem to solve the issue either.. Unrelated `maybe_wait_for_debugger()` tweaks: - don't read `Lock.global_actor_in_debug` more then needed, rename local read var to `in_debug` (since it can also hold the root actor uid, not just sub-actors). - shield the `await debug_complete.wait()` since ideally we avoid the root cancellation child-actors in debug even when the root calls this func in a cancelled scope.
2024-03-06 14:37:54 -05:00 · 2024-03-06 14:37:54 -05:00 · 6156ff95f8
parent 9e3f41a5b1
commit 6156ff95f8
1 changed files with 145 additions and 61 deletions
--- a/tractor/devx/_debug.py
+++ b/tractor/devx/_debug.py
@ -619,13 +619,15 @@ def _set_trace(
    actor: tractor.Actor | None = None,
    pdb: MultiActorPdb | None = None,
    shield: bool = False,
    extra_frames_up_when_async: int = 1,
 ):
    __tracebackhide__: bool = True
    actor: tractor.Actor = actor or current_actor()
-    # start 2 levels up in user code
+    # always start 1 level up from THIS in user code.
-    frame: FrameType | None = sys._getframe()
+    frame: FrameType|None
-    if frame:
+    if frame := sys._getframe():
        frame: FrameType = frame.f_back  # type: ignore
    if (
@ -633,23 +635,39 @@ def _set_trace(
        and (
            pdb
            and actor is not None
-        ) or shield
+        )
        # or shield
    ):
        msg: str = _pause_msg
        if shield:
            # log.warning(
            msg = (
                '\n\n'
                '            ------ - ------\n'
                'Debugger invoked with `shield=True` so an extra\n'
                '`trio.CancelScope.__exit__()` frame is shown..\n'
                '\n'
                'Try going up one frame to see your pause point!\n'
                '\n'
                '          SORRY we need to fix this!\n'
                '            ------ - ------\n\n'
            ) + msg
        # pdbp.set_trace()
        # TODO: maybe print the actor supervion tree up to the
        # root here? Bo
        log.pdb(
-            f'{_pause_msg}\n'
+            f'{msg}\n'
            '|\n'
            f'|_ {actor.uid}\n'
        )
        # no f!#$&* idea, but when we're in async land
        # we need 2x frames up?
-        frame = frame.f_back
+        for i in range(extra_frames_up_when_async):
-        # frame = frame.f_back
+            frame: FrameType = frame.f_back
-
+            log.debug(
-        # if shield:
+                f'Going up frame {i} -> {frame}\n'
-        #     frame = frame.f_back
+            )
    else:
        pdb, undo_sigint = mk_mpdb()
@ -659,10 +677,9 @@ def _set_trace(
        Lock.local_task_in_debug = 'sync'
    pdb.set_trace(frame=frame)
    # undo_
-async def pause(
+async def _pause(
    debug_func: Callable = _set_trace,
    release_lock_signal: trio.Event | None = None,
@ -676,27 +693,19 @@ async def pause(
    # be no way to override it?..
    # shield: bool = False,
-    # TODO:
+    shield: bool = False,
    # shield: bool = False
    task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED
 ) -> None:
    '''
-    A pause point (more commonly known as a "breakpoint") interrupt
+    Inner impl for `pause()` to avoid the `trio.CancelScope.__exit__()`
-    instruction for engaging a blocking debugger instance to
+    stack frame when not shielded (since apparently i can't figure out
-    conduct manual console-based-REPL-interaction from within
+    how to hide it using the normal mechanisms..)
    `tractor`'s async runtime, normally from some single-threaded
    and currently executing actor-hosted-`trio`-task in some
    (remote) process.
-    NOTE: we use the semantics "pause" since it better encompasses
+    Hopefully we won't need this in the long run.
    the entirety of the necessary global-runtime-state-mutation any
    actor-task must access and lock in order to get full isolated
    control over the process tree's root TTY:
    https://en.wikipedia.org/wiki/Breakpoint
    '''
-    # __tracebackhide__ = True
+    __tracebackhide__: bool = True
    actor = current_actor()
    pdb, undo_sigint = mk_mpdb()
    task_name: str = trio.lowlevel.current_task().name
@ -707,23 +716,10 @@ async def pause(
    ):
        Lock.local_pdb_complete = trio.Event()
    # if shield:
    debug_func = partial(
        debug_func,
        # shield=shield,
    )
    # def _exit(self, *args, **kwargs):
    #     __tracebackhide__: bool = True
    #     super().__exit__(*args, **kwargs)
    # trio.CancelScope.__exit__.__tracebackhide__ = True
    # import types
    # with trio.CancelScope(shield=shield) as cs:
        # cs.__exit__ = types.MethodType(_exit, cs)
        # cs.__exit__.__tracebackhide__ = True
    # TODO: need a more robust check for the "root" actor
    if (
        not is_root_process()
@ -818,7 +814,7 @@ async def pause(
            #     'trace func provided!'
            # )
            print(f"{actor.uid} ENTERING WAIT")
-            task_status.started()
+            task_status.started(cs)
            # with trio.CancelScope(shield=True):
            #     await release_lock_signal.wait()
@ -827,7 +823,13 @@ async def pause(
            # block here one (at the appropriate frame *up*) where
            # ``breakpoint()`` was awaited and begin handling stdio.
            log.debug("Entering the synchronous world of pdb")
-            debug_func(actor, pdb)
+            debug_func(
                actor,
                pdb,
                extra_frames_up_when_async=2,
                shield=shield,
            )
            assert cs
    except bdb.BdbQuit:
        Lock.release()
@ -845,6 +847,81 @@ async def pause(
 #     # signal.signal = pdbp.hideframe(signal.signal)
 async def pause(
    debug_func: Callable = _set_trace,
    release_lock_signal: trio.Event | None = None,
    # TODO: allow caller to pause despite task cancellation,
    # exactly the same as wrapping with:
    # with CancelScope(shield=True):
    #     await pause()
    # => the REMAINING ISSUE is that the scope's .__exit__() frame
    # is always show in the debugger on entry.. and there seems to
    # be no way to override it?..
    # shield: bool = False,
    shield: bool = False,
    task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED
 ) -> None:
    '''
    A pause point (more commonly known as a "breakpoint") interrupt
    instruction for engaging a blocking debugger instance to
    conduct manual console-based-REPL-interaction from within
    `tractor`'s async runtime, normally from some single-threaded
    and currently executing actor-hosted-`trio`-task in some
    (remote) process.
    NOTE: we use the semantics "pause" since it better encompasses
    the entirety of the necessary global-runtime-state-mutation any
    actor-task must access and lock in order to get full isolated
    control over the process tree's root TTY:
    https://en.wikipedia.org/wiki/Breakpoint
    '''
    __tracebackhide__: bool = True
    if shield:
        # NOTE XXX: even hard coding this inside the `class CancelScope:`
        # doesn't seem to work for me!?
        # ^ XXX ^
        # def _exit(self, *args, **kwargs):
        #     __tracebackhide__: bool = True
        #     super().__exit__(*args, **kwargs)
        trio.CancelScope.__enter__.__tracebackhide__ = True
        trio.CancelScope.__exit__.__tracebackhide__ = True
        # import types
        # with trio.CancelScope(shield=shield) as cs:
            # cs.__exit__ = types.MethodType(_exit, cs)
            # cs.__exit__.__tracebackhide__ = True
        with trio.CancelScope(shield=shield) as cs:
            # setattr(cs.__exit__.__func__, '__tracebackhide__', True)
            # setattr(cs.__enter__.__func__, '__tracebackhide__', True)
            # NOTE: so the caller can always cancel even if shielded
            task_status.started(cs)
            await _pause(
                debug_func=debug_func,
                release_lock_signal=release_lock_signal,
                shield=True,
                task_status=task_status,
            )
    else:
        await _pause(
            debug_func=debug_func,
            release_lock_signal=release_lock_signal,
            shield=False,
            task_status=task_status,
        )
 # TODO: allow pausing from sync code.
 # normally by remapping python's builtin breakpoint() hook to this
 # runtime aware version which takes care of all .
@ -1043,12 +1120,20 @@ async def maybe_wait_for_debugger(
        # will make the pdb repl unusable.
        # Instead try to wait for pdb to be released before
        # tearing down.
-        sub_in_debug: tuple[str, str]|None = Lock.global_actor_in_debug
+        in_debug: tuple[str, str]|None = Lock.global_actor_in_debug
        debug_complete: trio.Event|None = Lock.no_remote_has_tty
-        if sub_in_debug := Lock.global_actor_in_debug:
+        if in_debug == current_actor().uid:
            log.debug(
                msg
                +
                'Root already owns the TTY LOCK'
            )
            return True
        elif in_debug:
            msg += (
-                f'Debug `Lock` in use by subactor: {sub_in_debug}\n'
+                f'Debug `Lock` in use by subactor: {in_debug}\n'
            )
            # TODO: could this make things more deterministic?
            # wait to see if a sub-actor task will be
@ -1065,27 +1150,26 @@ async def maybe_wait_for_debugger(
            return False
        for istep in range(poll_steps):
            if (
                debug_complete
                and not debug_complete.is_set()
-                and sub_in_debug is not None
+                and in_debug is not None
            ):
                log.pdb(
                    msg
                    +
                    'Root is waiting on tty lock to release..\n'
                )
                with trio.CancelScope(shield=True):
                    await debug_complete.wait()
                log.pdb(
                    f'Child subactor released debug lock:'
-                    f'|_{sub_in_debug}\n'
+                    f'|_{in_debug}\n'
                )
            # is no subactor locking debugger currently?
            if (
-                 sub_in_debug is None
+                in_debug is None
                and (
                    debug_complete is None
                    or debug_complete.is_set()