2021-12-13 18:08:32 +00:00
|
|
|
# tractor: structured concurrent "actors".
|
|
|
|
# Copyright 2018-eternity Tyler Goodlet.
|
|
|
|
|
2024-02-20 20:39:45 +00:00
|
|
|
# This program is free software: you can redistribute it and/or
|
|
|
|
# modify it under the terms of the GNU Affero General Public License
|
|
|
|
# as published by the Free Software Foundation, either version 3 of
|
|
|
|
# the License, or (at your option) any later version.
|
2021-12-13 18:08:32 +00:00
|
|
|
|
2024-02-20 20:39:45 +00:00
|
|
|
# This program is distributed in the hope that it will be useful, but
|
|
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
# Affero General Public License for more details.
|
2021-12-13 18:08:32 +00:00
|
|
|
|
2024-02-20 20:39:45 +00:00
|
|
|
# You should have received a copy of the GNU Affero General Public
|
|
|
|
# License along with this program. If not, see
|
|
|
|
# <https://www.gnu.org/licenses/>.
|
2021-12-13 18:08:32 +00:00
|
|
|
|
2020-07-23 17:23:55 +00:00
|
|
|
"""
|
|
|
|
Multi-core debugging for da peeps!
|
2021-05-12 16:01:43 +00:00
|
|
|
|
2020-07-23 17:23:55 +00:00
|
|
|
"""
|
2022-02-07 03:14:16 +00:00
|
|
|
from __future__ import annotations
|
2020-09-12 15:47:14 +00:00
|
|
|
import bdb
|
2023-01-26 20:26:43 +00:00
|
|
|
import os
|
2020-07-23 17:23:55 +00:00
|
|
|
import sys
|
2022-01-23 22:04:49 +00:00
|
|
|
import signal
|
2023-01-26 20:26:43 +00:00
|
|
|
from functools import (
|
|
|
|
partial,
|
|
|
|
cached_property,
|
|
|
|
)
|
2023-09-28 19:36:24 +00:00
|
|
|
from contextlib import (
|
|
|
|
asynccontextmanager as acm,
|
|
|
|
contextmanager as cm,
|
2023-10-02 22:10:34 +00:00
|
|
|
nullcontext,
|
2023-09-28 19:36:24 +00:00
|
|
|
)
|
2021-10-14 16:07:09 +00:00
|
|
|
from typing import (
|
2022-10-13 19:41:38 +00:00
|
|
|
Any,
|
2021-10-14 16:07:09 +00:00
|
|
|
Callable,
|
|
|
|
AsyncIterator,
|
|
|
|
AsyncGenerator,
|
|
|
|
)
|
2022-02-09 15:04:37 +00:00
|
|
|
from types import FrameType
|
2020-07-23 17:23:55 +00:00
|
|
|
|
2023-04-15 23:49:25 +00:00
|
|
|
import pdbp
|
2020-07-23 17:23:55 +00:00
|
|
|
import tractor
|
|
|
|
import trio
|
2024-02-20 20:39:45 +00:00
|
|
|
from trio.lowlevel import current_task
|
2023-07-07 18:51:44 +00:00
|
|
|
from trio_typing import (
|
|
|
|
TaskStatus,
|
|
|
|
# Task,
|
|
|
|
)
|
2020-07-23 17:23:55 +00:00
|
|
|
|
2023-09-28 18:14:50 +00:00
|
|
|
from ..log import get_logger
|
|
|
|
from .._state import (
|
2024-02-20 20:39:45 +00:00
|
|
|
current_actor,
|
2022-10-11 19:22:19 +00:00
|
|
|
is_root_process,
|
|
|
|
debug_mode,
|
|
|
|
)
|
2023-09-28 18:14:50 +00:00
|
|
|
from .._exceptions import (
|
2022-10-11 19:22:19 +00:00
|
|
|
is_multi_cancelled,
|
|
|
|
ContextCancelled,
|
|
|
|
)
|
2023-09-28 18:14:50 +00:00
|
|
|
from .._ipc import Channel
|
2022-02-07 03:14:16 +00:00
|
|
|
|
2020-07-23 17:23:55 +00:00
|
|
|
log = get_logger(__name__)
|
|
|
|
|
|
|
|
|
2023-09-28 18:14:50 +00:00
|
|
|
__all__ = [
|
|
|
|
'breakpoint',
|
|
|
|
'post_mortem',
|
|
|
|
]
|
2020-07-23 17:23:55 +00:00
|
|
|
|
2021-05-12 16:01:43 +00:00
|
|
|
|
2022-07-29 20:03:26 +00:00
|
|
|
class Lock:
|
|
|
|
'''
|
|
|
|
Actor global debug lock state.
|
|
|
|
|
|
|
|
Mostly to avoid a lot of ``global`` declarations for now XD.
|
|
|
|
|
|
|
|
'''
|
2023-01-26 16:55:32 +00:00
|
|
|
repl: MultiActorPdb | None = None
|
2022-07-29 20:03:26 +00:00
|
|
|
# placeholder for function to set a ``trio.Event`` on debugger exit
|
2023-07-07 18:51:44 +00:00
|
|
|
# pdb_release_hook: Callable | None = None
|
2021-05-12 16:01:43 +00:00
|
|
|
|
2022-10-13 19:41:38 +00:00
|
|
|
_trio_handler: Callable[
|
2023-07-07 18:51:44 +00:00
|
|
|
[int, FrameType | None], Any
|
2022-10-13 19:41:38 +00:00
|
|
|
] | int | None = None
|
2022-10-13 17:12:17 +00:00
|
|
|
|
2022-07-29 20:03:26 +00:00
|
|
|
# actor-wide variable pointing to current task name using debugger
|
2022-10-13 17:12:17 +00:00
|
|
|
local_task_in_debug: str | None = None
|
2020-07-23 17:23:55 +00:00
|
|
|
|
2022-10-11 19:22:19 +00:00
|
|
|
# NOTE: set by the current task waiting on the root tty lock from
|
|
|
|
# the CALLER side of the `lock_tty_for_child()` context entry-call
|
|
|
|
# and must be cancelled if this actor is cancelled via IPC
|
|
|
|
# request-message otherwise deadlocks with the parent actor may
|
|
|
|
# ensure
|
2023-07-07 18:51:44 +00:00
|
|
|
_debugger_request_cs: trio.CancelScope | None = None
|
2022-10-11 19:22:19 +00:00
|
|
|
|
|
|
|
# NOTE: set only in the root actor for the **local** root spawned task
|
|
|
|
# which has acquired the lock (i.e. this is on the callee side of
|
|
|
|
# the `lock_tty_for_child()` context entry).
|
2023-07-07 18:51:44 +00:00
|
|
|
_root_local_task_cs_in_debug: trio.CancelScope | None = None
|
2022-10-11 19:22:19 +00:00
|
|
|
|
2022-07-29 20:03:26 +00:00
|
|
|
# actor tree-wide actor uid that supposedly has the tty lock
|
2023-07-07 18:51:44 +00:00
|
|
|
global_actor_in_debug: tuple[str, str] = None
|
2021-05-12 16:01:43 +00:00
|
|
|
|
2023-07-07 18:51:44 +00:00
|
|
|
local_pdb_complete: trio.Event | None = None
|
|
|
|
no_remote_has_tty: trio.Event | None = None
|
2020-09-28 12:54:21 +00:00
|
|
|
|
2022-07-29 20:03:26 +00:00
|
|
|
# lock in root actor preventing multi-access to local tty
|
|
|
|
_debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock()
|
2020-09-28 12:54:21 +00:00
|
|
|
|
2023-07-07 18:51:44 +00:00
|
|
|
_orig_sigint_handler: Callable | None = None
|
2022-10-11 19:22:19 +00:00
|
|
|
_blocked: set[tuple[str, str]] = set()
|
2022-07-29 21:51:12 +00:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def shield_sigint(cls):
|
|
|
|
cls._orig_sigint_handler = signal.signal(
|
2022-10-13 17:12:17 +00:00
|
|
|
signal.SIGINT,
|
2023-01-26 16:55:32 +00:00
|
|
|
shield_sigint_handler,
|
2022-10-13 17:12:17 +00:00
|
|
|
)
|
2022-07-29 21:51:12 +00:00
|
|
|
|
|
|
|
@classmethod
|
2023-07-07 18:51:44 +00:00
|
|
|
@pdbp.hideframe # XXX NOTE XXX see below in `.pause_from_sync()`
|
2022-07-29 21:51:12 +00:00
|
|
|
def unshield_sigint(cls):
|
2022-10-13 17:12:17 +00:00
|
|
|
# always restore ``trio``'s sigint handler. see notes below in
|
|
|
|
# the pdb factory about the nightmare that is that code swapping
|
|
|
|
# out the handler when the repl activates...
|
|
|
|
signal.signal(signal.SIGINT, cls._trio_handler)
|
2022-07-29 21:51:12 +00:00
|
|
|
cls._orig_sigint_handler = None
|
|
|
|
|
2022-07-29 20:03:26 +00:00
|
|
|
@classmethod
|
2022-08-02 22:14:05 +00:00
|
|
|
def release(cls):
|
2022-07-29 21:51:12 +00:00
|
|
|
try:
|
|
|
|
cls._debug_lock.release()
|
|
|
|
except RuntimeError:
|
|
|
|
# uhhh makes no sense but been seeing the non-owner
|
|
|
|
# release error even though this is definitely the task
|
|
|
|
# that locked?
|
|
|
|
owner = cls._debug_lock.statistics().owner
|
|
|
|
if owner:
|
|
|
|
raise
|
|
|
|
|
|
|
|
try:
|
|
|
|
# sometimes the ``trio`` might already be terminated in
|
|
|
|
# which case this call will raise.
|
2022-08-23 10:50:56 +00:00
|
|
|
if cls.local_pdb_complete is not None:
|
|
|
|
cls.local_pdb_complete.set()
|
2022-07-29 21:51:12 +00:00
|
|
|
finally:
|
|
|
|
# restore original sigint handler
|
|
|
|
cls.unshield_sigint()
|
2023-01-26 16:55:32 +00:00
|
|
|
cls.repl = None
|
2022-07-29 21:51:12 +00:00
|
|
|
|
2023-07-07 18:51:44 +00:00
|
|
|
# actor-local state, irrelevant for non-root.
|
|
|
|
cls.global_actor_in_debug = None
|
|
|
|
cls.local_task_in_debug = None
|
|
|
|
|
|
|
|
|
2020-07-23 17:23:55 +00:00
|
|
|
|
2023-04-15 23:49:25 +00:00
|
|
|
class TractorConfig(pdbp.DefaultConfig):
|
2022-08-01 19:53:56 +00:00
|
|
|
'''
|
2023-04-19 19:31:02 +00:00
|
|
|
Custom ``pdbp`` goodness :surfer:
|
2022-08-01 19:53:56 +00:00
|
|
|
|
|
|
|
'''
|
2023-05-08 16:02:42 +00:00
|
|
|
use_pygments: bool = True
|
|
|
|
sticky_by_default: bool = False
|
2023-07-07 18:51:44 +00:00
|
|
|
enable_hidden_frames: bool = True
|
2020-07-30 14:42:22 +00:00
|
|
|
|
2023-04-19 19:31:02 +00:00
|
|
|
# much thanks @mdmintz for the hot tip!
|
|
|
|
# fixes line spacing issue when resizing terminal B)
|
2023-05-08 16:02:42 +00:00
|
|
|
truncate_long_lines: bool = False
|
2023-04-19 19:31:02 +00:00
|
|
|
|
2020-07-26 21:46:55 +00:00
|
|
|
|
2023-04-15 23:49:25 +00:00
|
|
|
class MultiActorPdb(pdbp.Pdb):
|
2022-02-07 11:55:38 +00:00
|
|
|
'''
|
2023-04-15 23:49:25 +00:00
|
|
|
Add teardown hooks to the regular ``pdbp.Pdb``.
|
2022-02-07 11:55:38 +00:00
|
|
|
|
|
|
|
'''
|
2023-04-15 23:49:25 +00:00
|
|
|
# override the pdbp config with our coolio one
|
2020-07-30 14:42:22 +00:00
|
|
|
DefaultConfig = TractorConfig
|
|
|
|
|
2022-07-28 17:45:17 +00:00
|
|
|
# def preloop(self):
|
|
|
|
# print('IN PRELOOP')
|
|
|
|
# super().preloop()
|
|
|
|
|
2021-08-01 14:43:21 +00:00
|
|
|
# TODO: figure out how to disallow recursive .set_trace() entry
|
2020-07-26 21:46:55 +00:00
|
|
|
# since that'll cause deadlock for us.
|
|
|
|
def set_continue(self):
|
2020-09-28 12:54:21 +00:00
|
|
|
try:
|
|
|
|
super().set_continue()
|
|
|
|
finally:
|
2022-08-02 22:14:05 +00:00
|
|
|
Lock.release()
|
2020-07-26 21:46:55 +00:00
|
|
|
|
|
|
|
def set_quit(self):
|
2020-09-28 12:54:21 +00:00
|
|
|
try:
|
|
|
|
super().set_quit()
|
|
|
|
finally:
|
2022-08-02 22:14:05 +00:00
|
|
|
Lock.release()
|
2022-07-28 13:27:39 +00:00
|
|
|
|
2023-01-26 16:55:32 +00:00
|
|
|
# XXX NOTE: we only override this because apparently the stdlib pdb
|
|
|
|
# bois likes to touch the SIGINT handler as much as i like to touch
|
|
|
|
# my d$%&.
|
|
|
|
def _cmdloop(self):
|
|
|
|
self.cmdloop()
|
|
|
|
|
2023-01-26 20:26:43 +00:00
|
|
|
@cached_property
|
2023-01-26 20:48:15 +00:00
|
|
|
def shname(self) -> str | None:
|
2023-01-26 20:26:43 +00:00
|
|
|
'''
|
|
|
|
Attempt to return the login shell name with a special check for
|
|
|
|
the infamous `xonsh` since it seems to have some issues much
|
|
|
|
different from std shells when it comes to flushing the prompt?
|
|
|
|
|
|
|
|
'''
|
|
|
|
# SUPER HACKY and only really works if `xonsh` is not used
|
|
|
|
# before spawning further sub-shells..
|
2023-01-26 20:48:15 +00:00
|
|
|
shpath = os.getenv('SHELL', None)
|
2023-01-26 20:26:43 +00:00
|
|
|
|
2023-01-26 20:48:15 +00:00
|
|
|
if shpath:
|
|
|
|
if (
|
|
|
|
os.getenv('XONSH_LOGIN', default=False)
|
|
|
|
or 'xonsh' in shpath
|
|
|
|
):
|
|
|
|
return 'xonsh'
|
|
|
|
|
|
|
|
return os.path.basename(shpath)
|
|
|
|
|
|
|
|
return None
|
2023-01-26 20:26:43 +00:00
|
|
|
|
2020-07-26 21:46:55 +00:00
|
|
|
|
2021-10-14 03:32:02 +00:00
|
|
|
@acm
|
2022-08-02 22:14:05 +00:00
|
|
|
async def _acquire_debug_lock_from_root_task(
|
2022-09-15 20:56:50 +00:00
|
|
|
uid: tuple[str, str]
|
2021-08-01 14:43:21 +00:00
|
|
|
|
2021-07-31 16:50:58 +00:00
|
|
|
) -> AsyncIterator[trio.StrictFIFOLock]:
|
2022-02-07 11:55:38 +00:00
|
|
|
'''
|
|
|
|
Acquire a root-actor local FIFO lock which tracks mutex access of
|
2021-08-01 14:43:21 +00:00
|
|
|
the process tree's global debugger breakpoint.
|
|
|
|
|
|
|
|
This lock avoids tty clobbering (by preventing multiple processes
|
|
|
|
reading from stdstreams) and ensures multi-actor, sequential access
|
|
|
|
to the ``pdb`` repl.
|
2021-06-29 19:15:32 +00:00
|
|
|
|
|
|
|
'''
|
2024-02-20 20:39:45 +00:00
|
|
|
task_name: str = current_task().name
|
2023-07-07 18:51:44 +00:00
|
|
|
we_acquired: bool = False
|
2020-12-26 20:11:18 +00:00
|
|
|
|
2022-01-23 22:04:49 +00:00
|
|
|
log.runtime(
|
2021-06-27 15:59:21 +00:00
|
|
|
f"Attempting to acquire TTY lock, remote task: {task_name}:{uid}"
|
|
|
|
)
|
2021-06-29 19:15:32 +00:00
|
|
|
try:
|
2022-01-23 22:04:49 +00:00
|
|
|
log.runtime(
|
2021-06-29 19:15:32 +00:00
|
|
|
f"entering lock checkpoint, remote task: {task_name}:{uid}"
|
|
|
|
)
|
2022-10-11 19:22:19 +00:00
|
|
|
# NOTE: if the surrounding cancel scope from the
|
|
|
|
# `lock_tty_for_child()` caller is cancelled, this line should
|
|
|
|
# unblock and NOT leave us in some kind of
|
|
|
|
# a "child-locked-TTY-but-child-is-uncontactable-over-IPC"
|
|
|
|
# condition.
|
2022-07-29 20:03:26 +00:00
|
|
|
await Lock._debug_lock.acquire()
|
2023-07-07 18:51:44 +00:00
|
|
|
we_acquired = True
|
2021-06-29 19:15:32 +00:00
|
|
|
|
2022-07-29 20:03:26 +00:00
|
|
|
if Lock.no_remote_has_tty is None:
|
2022-05-14 21:18:25 +00:00
|
|
|
# mark the tty lock as being in use so that the runtime
|
|
|
|
# can try to avoid clobbering any connection from a child
|
|
|
|
# that's currently relying on it.
|
2022-07-29 20:03:26 +00:00
|
|
|
Lock.no_remote_has_tty = trio.Event()
|
2022-05-14 21:18:25 +00:00
|
|
|
|
2022-07-29 20:03:26 +00:00
|
|
|
Lock.global_actor_in_debug = uid
|
2022-01-23 22:04:49 +00:00
|
|
|
log.runtime(f"TTY lock acquired, remote task: {task_name}:{uid}")
|
2021-06-27 15:59:21 +00:00
|
|
|
|
2021-08-01 14:43:21 +00:00
|
|
|
# NOTE: critical section: this yield is unshielded!
|
2021-06-29 19:15:32 +00:00
|
|
|
|
2021-08-01 14:43:21 +00:00
|
|
|
# IF we received a cancel during the shielded lock entry of some
|
|
|
|
# next-in-queue requesting task, then the resumption here will
|
|
|
|
# result in that ``trio.Cancelled`` being raised to our caller
|
2022-08-02 22:14:05 +00:00
|
|
|
# (likely from ``lock_tty_for_child()`` below)! In
|
2021-08-01 14:43:21 +00:00
|
|
|
# this case the ``finally:`` below should trigger and the
|
|
|
|
# surrounding caller side context should cancel normally
|
|
|
|
# relaying back to the caller.
|
2021-06-29 19:15:32 +00:00
|
|
|
|
2022-07-29 20:03:26 +00:00
|
|
|
yield Lock._debug_lock
|
2021-06-29 19:15:32 +00:00
|
|
|
|
|
|
|
finally:
|
2022-01-23 22:04:49 +00:00
|
|
|
if (
|
|
|
|
we_acquired
|
2022-07-29 20:03:26 +00:00
|
|
|
and Lock._debug_lock.locked()
|
2022-01-23 22:04:49 +00:00
|
|
|
):
|
2022-07-29 20:03:26 +00:00
|
|
|
Lock._debug_lock.release()
|
2021-06-29 19:15:32 +00:00
|
|
|
|
|
|
|
# IFF there are no more requesting tasks queued up fire, the
|
|
|
|
# "tty-unlocked" event thereby alerting any monitors of the lock that
|
|
|
|
# we are now back in the "tty unlocked" state. This is basically
|
|
|
|
# and edge triggered signal around an empty queue of sub-actor
|
|
|
|
# tasks that may have tried to acquire the lock.
|
2022-07-29 20:03:26 +00:00
|
|
|
stats = Lock._debug_lock.statistics()
|
2021-06-29 19:15:32 +00:00
|
|
|
if (
|
|
|
|
not stats.owner
|
|
|
|
):
|
2022-01-23 22:04:49 +00:00
|
|
|
log.runtime(f"No more tasks waiting on tty lock! says {uid}")
|
2022-07-29 20:03:26 +00:00
|
|
|
if Lock.no_remote_has_tty is not None:
|
|
|
|
Lock.no_remote_has_tty.set()
|
|
|
|
Lock.no_remote_has_tty = None
|
2020-12-26 20:11:18 +00:00
|
|
|
|
2022-07-29 20:03:26 +00:00
|
|
|
Lock.global_actor_in_debug = None
|
2021-06-29 19:15:32 +00:00
|
|
|
|
2022-05-14 21:18:25 +00:00
|
|
|
log.runtime(
|
|
|
|
f"TTY lock released, remote task: {task_name}:{uid}"
|
|
|
|
)
|
2020-07-23 17:23:55 +00:00
|
|
|
|
|
|
|
|
2021-05-10 11:25:55 +00:00
|
|
|
@tractor.context
|
2022-08-02 22:14:05 +00:00
|
|
|
async def lock_tty_for_child(
|
2021-05-10 11:25:55 +00:00
|
|
|
|
2021-05-12 16:01:43 +00:00
|
|
|
ctx: tractor.Context,
|
2022-09-15 20:56:50 +00:00
|
|
|
subactor_uid: tuple[str, str]
|
2021-05-10 11:25:55 +00:00
|
|
|
|
2021-06-14 00:21:18 +00:00
|
|
|
) -> str:
|
2021-10-08 22:13:55 +00:00
|
|
|
'''
|
2022-08-02 22:14:05 +00:00
|
|
|
Lock the TTY in the root process of an actor tree in a new
|
2023-04-15 23:49:25 +00:00
|
|
|
inter-actor-context-task such that the ``pdbp`` debugger console
|
2022-08-02 22:14:05 +00:00
|
|
|
can be mutex-allocated to the calling sub-actor for REPL control
|
|
|
|
without interference by other processes / threads.
|
2021-06-27 15:59:21 +00:00
|
|
|
|
2022-08-02 22:14:05 +00:00
|
|
|
NOTE: this task must be invoked in the root process of the actor
|
|
|
|
tree. It is meant to be invoked as an rpc-task and should be
|
|
|
|
highly reliable at releasing the mutex complete!
|
2022-05-14 21:18:25 +00:00
|
|
|
|
2021-06-27 15:59:21 +00:00
|
|
|
'''
|
2024-02-20 20:39:45 +00:00
|
|
|
task_name: str = current_task().name
|
2022-10-11 19:22:19 +00:00
|
|
|
if tuple(subactor_uid) in Lock._blocked:
|
|
|
|
log.warning(
|
|
|
|
f'Actor {subactor_uid} is blocked from acquiring debug lock\n'
|
|
|
|
f"remote task: {task_name}:{subactor_uid}"
|
|
|
|
)
|
2023-10-17 20:52:31 +00:00
|
|
|
ctx._enter_debugger_on_cancel: bool = False
|
2022-10-11 19:22:19 +00:00
|
|
|
await ctx.cancel(f'Debug lock blocked for {subactor_uid}')
|
|
|
|
return 'pdb_lock_blocked'
|
|
|
|
|
2020-07-26 21:46:55 +00:00
|
|
|
# TODO: when we get to true remote debugging
|
2021-05-10 11:25:55 +00:00
|
|
|
# this will deliver stdin data?
|
|
|
|
|
|
|
|
log.debug(
|
2021-06-29 19:15:32 +00:00
|
|
|
"Attempting to acquire TTY lock\n"
|
2021-05-10 11:25:55 +00:00
|
|
|
f"remote task: {task_name}:{subactor_uid}"
|
|
|
|
)
|
|
|
|
|
2021-05-12 16:01:43 +00:00
|
|
|
log.debug(f"Actor {subactor_uid} is WAITING on stdin hijack lock")
|
2022-07-29 21:51:12 +00:00
|
|
|
Lock.shield_sigint()
|
2021-05-10 11:25:55 +00:00
|
|
|
|
2022-02-07 11:55:38 +00:00
|
|
|
try:
|
|
|
|
with (
|
2022-10-11 19:22:19 +00:00
|
|
|
trio.CancelScope(shield=True) as debug_lock_cs,
|
2022-02-07 11:55:38 +00:00
|
|
|
):
|
2022-10-11 19:22:19 +00:00
|
|
|
Lock._root_local_task_cs_in_debug = debug_lock_cs
|
2022-08-02 22:14:05 +00:00
|
|
|
async with _acquire_debug_lock_from_root_task(subactor_uid):
|
2022-05-14 21:18:25 +00:00
|
|
|
|
|
|
|
# indicate to child that we've locked stdio
|
|
|
|
await ctx.started('Locked')
|
|
|
|
log.debug(
|
|
|
|
f"Actor {subactor_uid} acquired stdin hijack lock"
|
2022-02-07 11:55:38 +00:00
|
|
|
)
|
2020-07-23 17:23:55 +00:00
|
|
|
|
2022-05-14 21:18:25 +00:00
|
|
|
# wait for unlock pdb by child
|
|
|
|
async with ctx.open_stream() as stream:
|
|
|
|
assert await stream.receive() == 'pdb_unlock'
|
|
|
|
|
2022-02-07 11:55:38 +00:00
|
|
|
return "pdb_unlock_complete"
|
|
|
|
|
|
|
|
finally:
|
2022-10-11 19:22:19 +00:00
|
|
|
Lock._root_local_task_cs_in_debug = None
|
2022-07-29 21:51:12 +00:00
|
|
|
Lock.unshield_sigint()
|
2020-07-23 17:23:55 +00:00
|
|
|
|
|
|
|
|
2021-10-14 03:08:58 +00:00
|
|
|
async def wait_for_parent_stdin_hijack(
|
2022-09-15 20:56:50 +00:00
|
|
|
actor_uid: tuple[str, str],
|
2021-10-14 03:08:58 +00:00
|
|
|
task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED
|
|
|
|
):
|
|
|
|
'''
|
2022-08-02 22:14:05 +00:00
|
|
|
Connect to the root actor via a ``Context`` and invoke a task which
|
|
|
|
locks a root-local TTY lock: ``lock_tty_for_child()``; this func
|
|
|
|
should be called in a new task from a child actor **and never the
|
|
|
|
root*.
|
2021-10-14 03:08:58 +00:00
|
|
|
|
|
|
|
This function is used by any sub-actor to acquire mutex access to
|
2022-08-02 22:14:05 +00:00
|
|
|
the ``pdb`` REPL and thus the root's TTY for interactive debugging
|
2023-10-06 19:49:23 +00:00
|
|
|
(see below inside ``pause()``). It can be used to ensure that
|
2022-08-02 22:14:05 +00:00
|
|
|
an intermediate nursery-owning actor does not clobber its children
|
|
|
|
if they are in debug (see below inside
|
|
|
|
``maybe_wait_for_debugger()``).
|
2021-10-14 03:08:58 +00:00
|
|
|
|
|
|
|
'''
|
2023-10-17 20:52:31 +00:00
|
|
|
from .._discovery import get_root
|
|
|
|
|
2021-10-14 03:08:58 +00:00
|
|
|
with trio.CancelScope(shield=True) as cs:
|
2022-07-29 20:03:26 +00:00
|
|
|
Lock._debugger_request_cs = cs
|
2021-10-14 03:08:58 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
async with get_root() as portal:
|
|
|
|
|
|
|
|
# this syncs to child's ``Context.started()`` call.
|
|
|
|
async with portal.open_context(
|
|
|
|
|
2023-09-28 18:14:50 +00:00
|
|
|
lock_tty_for_child,
|
2021-10-14 16:07:09 +00:00
|
|
|
subactor_uid=actor_uid,
|
2021-10-14 03:08:58 +00:00
|
|
|
|
|
|
|
) as (ctx, val):
|
|
|
|
|
2022-10-14 19:42:10 +00:00
|
|
|
log.debug('locked context')
|
2021-10-14 03:08:58 +00:00
|
|
|
assert val == 'Locked'
|
|
|
|
|
|
|
|
async with ctx.open_stream() as stream:
|
|
|
|
try:
|
2024-02-20 20:39:45 +00:00
|
|
|
# unblock local caller
|
2022-07-29 20:03:26 +00:00
|
|
|
assert Lock.local_pdb_complete
|
2022-02-09 13:26:19 +00:00
|
|
|
task_status.started(cs)
|
2024-02-20 20:39:45 +00:00
|
|
|
|
|
|
|
# wait for local task to exit and
|
|
|
|
# release the REPL
|
2022-07-29 20:03:26 +00:00
|
|
|
await Lock.local_pdb_complete.wait()
|
2021-10-14 03:08:58 +00:00
|
|
|
|
|
|
|
finally:
|
|
|
|
# TODO: shielding currently can cause hangs...
|
2022-02-07 11:55:38 +00:00
|
|
|
# with trio.CancelScope(shield=True):
|
|
|
|
await stream.send('pdb_unlock')
|
2021-10-14 03:08:58 +00:00
|
|
|
|
|
|
|
# sync with callee termination
|
|
|
|
assert await ctx.result() == "pdb_unlock_complete"
|
|
|
|
|
2022-10-14 19:42:10 +00:00
|
|
|
log.debug('exitting child side locking task context')
|
2022-02-07 11:55:38 +00:00
|
|
|
|
2022-10-11 19:22:19 +00:00
|
|
|
except ContextCancelled:
|
2021-10-14 03:08:58 +00:00
|
|
|
log.warning('Root actor cancelled debug lock')
|
2023-01-26 16:55:32 +00:00
|
|
|
raise
|
2021-10-14 03:08:58 +00:00
|
|
|
|
|
|
|
finally:
|
2022-07-29 20:03:26 +00:00
|
|
|
Lock.local_task_in_debug = None
|
2022-10-14 21:43:45 +00:00
|
|
|
log.debug('Exiting debugger from child')
|
2021-10-14 03:08:58 +00:00
|
|
|
|
|
|
|
|
2022-02-09 15:04:37 +00:00
|
|
|
def mk_mpdb() -> tuple[MultiActorPdb, Callable]:
|
2022-02-09 12:51:34 +00:00
|
|
|
|
|
|
|
pdb = MultiActorPdb()
|
2023-04-15 23:49:25 +00:00
|
|
|
# signal.signal = pdbp.hideframe(signal.signal)
|
2022-07-29 21:51:12 +00:00
|
|
|
|
|
|
|
Lock.shield_sigint()
|
2022-06-26 17:41:32 +00:00
|
|
|
|
|
|
|
# XXX: These are the important flags mentioned in
|
|
|
|
# https://github.com/python-trio/trio/issues/1155
|
|
|
|
# which resolve the traceback spews to console.
|
2022-02-09 12:51:34 +00:00
|
|
|
pdb.allow_kbdint = True
|
|
|
|
pdb.nosigint = True
|
|
|
|
|
2022-07-29 21:51:12 +00:00
|
|
|
return pdb, Lock.unshield_sigint
|
2022-02-09 12:51:34 +00:00
|
|
|
|
|
|
|
|
2023-01-26 16:55:32 +00:00
|
|
|
def shield_sigint_handler(
|
2022-01-23 22:04:49 +00:00
|
|
|
signum: int,
|
|
|
|
frame: 'frame', # type: ignore # noqa
|
2023-07-07 18:51:44 +00:00
|
|
|
# pdb_obj: MultiActorPdb | None = None,
|
2022-01-23 22:04:49 +00:00
|
|
|
*args,
|
|
|
|
|
|
|
|
) -> None:
|
2022-01-23 00:32:26 +00:00
|
|
|
'''
|
2022-10-13 17:12:17 +00:00
|
|
|
Specialized, debugger-aware SIGINT handler.
|
2021-02-16 00:23:53 +00:00
|
|
|
|
2022-01-23 00:32:26 +00:00
|
|
|
In childred we always ignore to avoid deadlocks since cancellation
|
|
|
|
should always be managed by the parent supervising actor. The root
|
|
|
|
is always cancelled on ctrl-c.
|
2021-02-16 00:23:53 +00:00
|
|
|
|
2022-01-23 00:32:26 +00:00
|
|
|
'''
|
2022-02-07 11:55:38 +00:00
|
|
|
__tracebackhide__ = True
|
|
|
|
|
2023-07-07 18:51:44 +00:00
|
|
|
uid_in_debug: tuple[str, str] | None = Lock.global_actor_in_debug
|
2022-01-23 22:04:49 +00:00
|
|
|
|
2024-02-20 20:39:45 +00:00
|
|
|
actor = current_actor()
|
2023-01-26 16:55:32 +00:00
|
|
|
# print(f'{actor.uid} in HANDLER with ')
|
2021-02-16 00:23:53 +00:00
|
|
|
|
2022-06-26 17:41:32 +00:00
|
|
|
def do_cancel():
|
|
|
|
# If we haven't tried to cancel the runtime then do that instead
|
|
|
|
# of raising a KBI (which may non-gracefully destroy
|
|
|
|
# a ``trio.run()``).
|
|
|
|
if not actor._cancel_called:
|
|
|
|
actor.cancel_soon()
|
|
|
|
|
|
|
|
# If the runtime is already cancelled it likely means the user
|
|
|
|
# hit ctrl-c again because teardown didn't full take place in
|
|
|
|
# which case we do the "hard" raising of a local KBI.
|
|
|
|
else:
|
|
|
|
raise KeyboardInterrupt
|
|
|
|
|
2023-07-07 18:51:44 +00:00
|
|
|
any_connected: bool = False
|
2022-06-26 17:41:32 +00:00
|
|
|
|
2022-05-14 21:18:25 +00:00
|
|
|
if uid_in_debug is not None:
|
|
|
|
# try to see if the supposed (sub)actor in debug still
|
|
|
|
# has an active connection to *this* actor, and if not
|
|
|
|
# it's likely they aren't using the TTY lock / debugger
|
|
|
|
# and we should propagate SIGINT normally.
|
2023-07-07 18:51:44 +00:00
|
|
|
chans: list[tractor.Channel] = actor._peers.get(tuple(uid_in_debug))
|
2022-05-14 21:18:25 +00:00
|
|
|
if chans:
|
|
|
|
any_connected = any(chan.connected() for chan in chans)
|
|
|
|
if not any_connected:
|
|
|
|
log.warning(
|
|
|
|
'A global actor reported to be in debug '
|
|
|
|
'but no connection exists for this child:\n'
|
|
|
|
f'{uid_in_debug}\n'
|
|
|
|
'Allowing SIGINT propagation..'
|
|
|
|
)
|
2022-06-26 17:41:32 +00:00
|
|
|
return do_cancel()
|
2022-05-14 21:18:25 +00:00
|
|
|
|
2023-01-26 16:55:32 +00:00
|
|
|
# only set in the actor actually running the REPL
|
2023-07-07 18:51:44 +00:00
|
|
|
pdb_obj: MultiActorPdb | None = Lock.repl
|
2023-01-26 16:55:32 +00:00
|
|
|
|
2022-01-23 22:04:49 +00:00
|
|
|
# root actor branch that reports whether or not a child
|
|
|
|
# has locked debugger.
|
2022-01-23 00:32:26 +00:00
|
|
|
if (
|
|
|
|
is_root_process()
|
2022-05-14 21:18:25 +00:00
|
|
|
and uid_in_debug is not None
|
|
|
|
|
|
|
|
# XXX: only if there is an existing connection to the
|
|
|
|
# (sub-)actor in debug do we ignore SIGINT in this
|
|
|
|
# parent! Otherwise we may hang waiting for an actor
|
|
|
|
# which has already terminated to unlock.
|
|
|
|
and any_connected
|
2022-01-23 00:32:26 +00:00
|
|
|
):
|
2022-10-13 17:12:17 +00:00
|
|
|
# we are root and some actor is in debug mode
|
|
|
|
# if uid_in_debug is not None:
|
2022-02-14 15:30:55 +00:00
|
|
|
|
2023-01-26 16:55:32 +00:00
|
|
|
if pdb_obj:
|
|
|
|
name = uid_in_debug[0]
|
|
|
|
if name != 'root':
|
|
|
|
log.pdb(
|
|
|
|
f"Ignoring SIGINT, child in debug mode: `{uid_in_debug}`"
|
|
|
|
)
|
|
|
|
|
|
|
|
else:
|
|
|
|
log.pdb(
|
|
|
|
"Ignoring SIGINT while in debug mode"
|
|
|
|
)
|
2022-10-13 17:12:17 +00:00
|
|
|
elif (
|
|
|
|
is_root_process()
|
|
|
|
):
|
2023-01-26 16:55:32 +00:00
|
|
|
if pdb_obj:
|
|
|
|
log.pdb(
|
|
|
|
"Ignoring SIGINT since debug mode is enabled"
|
|
|
|
)
|
2022-10-13 17:12:17 +00:00
|
|
|
|
|
|
|
if (
|
|
|
|
Lock._root_local_task_cs_in_debug
|
|
|
|
and not Lock._root_local_task_cs_in_debug.cancel_called
|
|
|
|
):
|
|
|
|
Lock._root_local_task_cs_in_debug.cancel()
|
|
|
|
|
2023-01-26 16:55:32 +00:00
|
|
|
# revert back to ``trio`` handler asap!
|
|
|
|
Lock.unshield_sigint()
|
2022-01-23 22:04:49 +00:00
|
|
|
|
|
|
|
# child actor that has locked the debugger
|
2022-08-02 22:14:05 +00:00
|
|
|
elif not is_root_process():
|
|
|
|
|
2022-06-26 17:41:32 +00:00
|
|
|
chan: Channel = actor._parent_chan
|
|
|
|
if not chan or not chan.connected():
|
|
|
|
log.warning(
|
|
|
|
'A global actor reported to be in debug '
|
|
|
|
'but no connection exists for its parent:\n'
|
|
|
|
f'{uid_in_debug}\n'
|
|
|
|
'Allowing SIGINT propagation..'
|
|
|
|
)
|
|
|
|
return do_cancel()
|
|
|
|
|
2023-07-07 18:51:44 +00:00
|
|
|
task: str | None = Lock.local_task_in_debug
|
2023-01-26 16:55:32 +00:00
|
|
|
if (
|
|
|
|
task
|
|
|
|
and pdb_obj
|
|
|
|
):
|
2022-01-23 22:04:49 +00:00
|
|
|
log.pdb(
|
|
|
|
f"Ignoring SIGINT while task in debug mode: `{task}`"
|
|
|
|
)
|
|
|
|
|
|
|
|
# TODO: how to handle the case of an intermediary-child actor
|
2022-08-02 16:48:40 +00:00
|
|
|
# that **is not** marked in debug mode? See oustanding issue:
|
|
|
|
# https://github.com/goodboy/tractor/issues/320
|
2022-01-23 22:04:49 +00:00
|
|
|
# elif debug_mode():
|
|
|
|
|
2022-10-13 17:12:17 +00:00
|
|
|
else: # XXX: shouldn't ever get here?
|
2023-06-21 20:08:18 +00:00
|
|
|
raise RuntimeError("WTFWTFWTF")
|
|
|
|
# raise KeyboardInterrupt("WTFWTFWTF")
|
2022-01-23 00:32:26 +00:00
|
|
|
|
2022-07-12 17:01:43 +00:00
|
|
|
# NOTE: currently (at least on ``fancycompleter`` 0.9.2)
|
2023-01-26 20:26:43 +00:00
|
|
|
# it looks to be that the last command that was run (eg. ll)
|
2022-07-12 17:01:43 +00:00
|
|
|
# will be repeated by default.
|
|
|
|
|
2023-01-26 16:55:32 +00:00
|
|
|
# maybe redraw/print last REPL output to console since
|
|
|
|
# we want to alert the user that more input is expect since
|
|
|
|
# nothing has been done dur to ignoring sigint.
|
2022-07-28 17:45:17 +00:00
|
|
|
if (
|
2023-01-26 20:26:43 +00:00
|
|
|
pdb_obj # only when this actor has a REPL engaged
|
2022-07-28 17:45:17 +00:00
|
|
|
):
|
2023-01-26 20:26:43 +00:00
|
|
|
# XXX: yah, mega hack, but how else do we catch this madness XD
|
|
|
|
if pdb_obj.shname == 'xonsh':
|
|
|
|
pdb_obj.stdout.write(pdb_obj.prompt)
|
|
|
|
|
2023-01-26 16:55:32 +00:00
|
|
|
pdb_obj.stdout.flush()
|
|
|
|
|
2022-01-23 22:04:49 +00:00
|
|
|
# TODO: make this work like sticky mode where if there is output
|
|
|
|
# detected as written to the tty we redraw this part underneath
|
|
|
|
# and erase the past draw of this same bit above?
|
2022-02-07 11:55:38 +00:00
|
|
|
# pdb_obj.sticky = True
|
|
|
|
# pdb_obj._print_if_sticky()
|
2022-01-23 22:04:49 +00:00
|
|
|
|
|
|
|
# also see these links for an approach from ``ptk``:
|
|
|
|
# https://github.com/goodboy/tractor/issues/130#issuecomment-663752040
|
|
|
|
# https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py
|
|
|
|
|
2022-01-23 22:33:09 +00:00
|
|
|
|
2022-02-07 11:55:38 +00:00
|
|
|
def _set_trace(
|
2023-03-07 22:37:36 +00:00
|
|
|
actor: tractor.Actor | None = None,
|
|
|
|
pdb: MultiActorPdb | None = None,
|
2023-10-06 19:49:23 +00:00
|
|
|
shield: bool = False,
|
2022-02-07 11:55:38 +00:00
|
|
|
):
|
2023-10-17 20:52:31 +00:00
|
|
|
__tracebackhide__: bool = True
|
2024-02-20 20:39:45 +00:00
|
|
|
actor: tractor.Actor = actor or current_actor()
|
2022-01-23 00:32:26 +00:00
|
|
|
|
2022-02-09 15:04:37 +00:00
|
|
|
# start 2 levels up in user code
|
2023-06-21 20:08:18 +00:00
|
|
|
frame: FrameType | None = sys._getframe()
|
2022-02-09 15:04:37 +00:00
|
|
|
if frame:
|
2023-06-21 20:08:18 +00:00
|
|
|
frame: FrameType = frame.f_back # type: ignore
|
2022-02-09 15:04:37 +00:00
|
|
|
|
2023-03-07 22:37:36 +00:00
|
|
|
if (
|
|
|
|
frame
|
2023-10-06 19:49:23 +00:00
|
|
|
and (
|
|
|
|
pdb
|
|
|
|
and actor is not None
|
|
|
|
) or shield
|
2023-03-07 22:37:36 +00:00
|
|
|
):
|
2023-10-06 19:49:23 +00:00
|
|
|
# pdbp.set_trace()
|
2022-02-07 11:55:38 +00:00
|
|
|
log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")
|
2022-07-11 13:42:26 +00:00
|
|
|
# no f!#$&* idea, but when we're in async land
|
|
|
|
# we need 2x frames up?
|
2022-07-07 20:06:44 +00:00
|
|
|
frame = frame.f_back
|
2023-10-10 13:55:11 +00:00
|
|
|
# frame = frame.f_back
|
2022-01-23 00:32:26 +00:00
|
|
|
|
2023-10-06 19:49:23 +00:00
|
|
|
# if shield:
|
|
|
|
# frame = frame.f_back
|
|
|
|
|
2022-02-07 11:55:38 +00:00
|
|
|
else:
|
2022-02-09 12:51:34 +00:00
|
|
|
pdb, undo_sigint = mk_mpdb()
|
|
|
|
|
2023-03-07 22:37:36 +00:00
|
|
|
# we entered the global ``breakpoint()`` built-in from sync
|
|
|
|
# code?
|
2022-07-29 20:03:26 +00:00
|
|
|
Lock.local_task_in_debug = 'sync'
|
2021-02-16 00:23:53 +00:00
|
|
|
|
2022-02-09 15:04:37 +00:00
|
|
|
pdb.set_trace(frame=frame)
|
2023-07-07 18:51:44 +00:00
|
|
|
# undo_
|
2020-07-23 17:23:55 +00:00
|
|
|
|
|
|
|
|
2023-10-06 19:49:23 +00:00
|
|
|
async def pause(
|
|
|
|
|
|
|
|
debug_func: Callable = _set_trace,
|
|
|
|
release_lock_signal: trio.Event | None = None,
|
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
# TODO: allow caller to pause despite task cancellation,
|
2023-10-06 19:49:23 +00:00
|
|
|
# exactly the same as wrapping with:
|
|
|
|
# with CancelScope(shield=True):
|
|
|
|
# await pause()
|
2023-10-10 13:55:11 +00:00
|
|
|
# => the REMAINING ISSUE is that the scope's .__exit__() frame
|
|
|
|
# is always show in the debugger on entry.. and there seems to
|
|
|
|
# be no way to override it?..
|
|
|
|
# shield: bool = False,
|
2023-10-06 19:49:23 +00:00
|
|
|
|
|
|
|
# TODO:
|
|
|
|
# shield: bool = False
|
|
|
|
task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED
|
|
|
|
|
|
|
|
) -> None:
|
|
|
|
'''
|
|
|
|
A pause point (more commonly known as a "breakpoint") interrupt
|
|
|
|
instruction for engaging a blocking debugger instance to
|
|
|
|
conduct manual console-based-REPL-interaction from within
|
|
|
|
`tractor`'s async runtime, normally from some single-threaded
|
|
|
|
and currently executing actor-hosted-`trio`-task in some
|
|
|
|
(remote) process.
|
|
|
|
|
|
|
|
NOTE: we use the semantics "pause" since it better encompasses
|
|
|
|
the entirety of the necessary global-runtime-state-mutation any
|
|
|
|
actor-task must access and lock in order to get full isolated
|
|
|
|
control over the process tree's root TTY:
|
|
|
|
https://en.wikipedia.org/wiki/Breakpoint
|
|
|
|
|
|
|
|
'''
|
2024-01-02 16:28:02 +00:00
|
|
|
# __tracebackhide__ = True
|
2024-02-20 20:39:45 +00:00
|
|
|
actor = current_actor()
|
2023-10-06 19:49:23 +00:00
|
|
|
pdb, undo_sigint = mk_mpdb()
|
2024-02-20 20:39:45 +00:00
|
|
|
task_name: str = trio.lowlevel.current_task().name
|
2023-10-06 19:49:23 +00:00
|
|
|
|
|
|
|
if (
|
|
|
|
not Lock.local_pdb_complete
|
|
|
|
or Lock.local_pdb_complete.is_set()
|
|
|
|
):
|
|
|
|
Lock.local_pdb_complete = trio.Event()
|
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
# if shield:
|
|
|
|
debug_func = partial(
|
|
|
|
debug_func,
|
|
|
|
# shield=shield,
|
|
|
|
)
|
2023-10-06 19:49:23 +00:00
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
# def _exit(self, *args, **kwargs):
|
|
|
|
# __tracebackhide__: bool = True
|
|
|
|
# super().__exit__(*args, **kwargs)
|
|
|
|
|
|
|
|
# trio.CancelScope.__exit__.__tracebackhide__ = True
|
|
|
|
|
|
|
|
# import types
|
|
|
|
# with trio.CancelScope(shield=shield) as cs:
|
|
|
|
# cs.__exit__ = types.MethodType(_exit, cs)
|
|
|
|
# cs.__exit__.__tracebackhide__ = True
|
2023-10-06 19:49:23 +00:00
|
|
|
|
|
|
|
# TODO: need a more robust check for the "root" actor
|
2023-10-10 13:55:11 +00:00
|
|
|
if (
|
|
|
|
not is_root_process()
|
|
|
|
and actor._parent_chan # a connected child
|
|
|
|
):
|
2023-10-06 19:49:23 +00:00
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
if Lock.local_task_in_debug:
|
2023-10-06 19:49:23 +00:00
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
# Recurrence entry case: this task already has the lock and
|
|
|
|
# is likely recurrently entering a breakpoint
|
|
|
|
if Lock.local_task_in_debug == task_name:
|
|
|
|
# noop on recurrent entry case but we want to trigger
|
|
|
|
# a checkpoint to allow other actors error-propagate and
|
|
|
|
# potetially avoid infinite re-entries in some subactor.
|
|
|
|
await trio.lowlevel.checkpoint()
|
|
|
|
return
|
2023-10-06 19:49:23 +00:00
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
# if **this** actor is already in debug mode block here
|
|
|
|
# waiting for the control to be released - this allows
|
|
|
|
# support for recursive entries to `tractor.breakpoint()`
|
|
|
|
log.warning(f"{actor.uid} already has a debug lock, waiting...")
|
|
|
|
|
|
|
|
await Lock.local_pdb_complete.wait()
|
|
|
|
await trio.sleep(0.1)
|
|
|
|
|
|
|
|
# mark local actor as "in debug mode" to avoid recurrent
|
|
|
|
# entries/requests to the root process
|
|
|
|
Lock.local_task_in_debug = task_name
|
|
|
|
|
|
|
|
# this **must** be awaited by the caller and is done using the
|
|
|
|
# root nursery so that the debugger can continue to run without
|
|
|
|
# being restricted by the scope of a new task nursery.
|
|
|
|
|
|
|
|
# TODO: if we want to debug a trio.Cancelled triggered exception
|
|
|
|
# we have to figure out how to avoid having the service nursery
|
|
|
|
# cancel on this task start? I *think* this works below:
|
|
|
|
# ```python
|
|
|
|
# actor._service_n.cancel_scope.shield = shield
|
|
|
|
# ```
|
|
|
|
# but not entirely sure if that's a sane way to implement it?
|
|
|
|
try:
|
|
|
|
with trio.CancelScope(shield=True):
|
|
|
|
await actor._service_n.start(
|
|
|
|
wait_for_parent_stdin_hijack,
|
|
|
|
actor.uid,
|
|
|
|
)
|
|
|
|
Lock.repl = pdb
|
|
|
|
except RuntimeError:
|
|
|
|
Lock.release()
|
2023-10-06 19:49:23 +00:00
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
if actor._cancel_called:
|
|
|
|
# service nursery won't be usable and we
|
|
|
|
# don't want to lock up the root either way since
|
|
|
|
# we're in (the midst of) cancellation.
|
2023-10-06 19:49:23 +00:00
|
|
|
return
|
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
raise
|
2023-10-06 19:49:23 +00:00
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
elif is_root_process():
|
2023-10-06 19:49:23 +00:00
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
# we also wait in the root-parent for any child that
|
|
|
|
# may have the tty locked prior
|
|
|
|
# TODO: wait, what about multiple root tasks acquiring it though?
|
|
|
|
if Lock.global_actor_in_debug == actor.uid:
|
|
|
|
# re-entrant root process already has it: noop.
|
|
|
|
return
|
2023-10-06 19:49:23 +00:00
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
# XXX: since we need to enter pdb synchronously below,
|
|
|
|
# we have to release the lock manually from pdb completion
|
|
|
|
# callbacks. Can't think of a nicer way then this atm.
|
|
|
|
if Lock._debug_lock.locked():
|
|
|
|
log.warning(
|
|
|
|
'Root actor attempting to shield-acquire active tty lock'
|
|
|
|
f' owned by {Lock.global_actor_in_debug}')
|
2023-10-06 19:49:23 +00:00
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
# must shield here to avoid hitting a ``Cancelled`` and
|
|
|
|
# a child getting stuck bc we clobbered the tty
|
|
|
|
with trio.CancelScope(shield=True):
|
|
|
|
await Lock._debug_lock.acquire()
|
|
|
|
else:
|
|
|
|
# may be cancelled
|
|
|
|
await Lock._debug_lock.acquire()
|
2023-10-06 19:49:23 +00:00
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
Lock.global_actor_in_debug = actor.uid
|
|
|
|
Lock.local_task_in_debug = task_name
|
|
|
|
Lock.repl = pdb
|
2023-10-06 19:49:23 +00:00
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
try:
|
|
|
|
if debug_func is None:
|
|
|
|
# assert release_lock_signal, (
|
|
|
|
# 'Must pass `release_lock_signal: trio.Event` if no '
|
|
|
|
# 'trace func provided!'
|
|
|
|
# )
|
|
|
|
print(f"{actor.uid} ENTERING WAIT")
|
|
|
|
task_status.started()
|
|
|
|
|
|
|
|
# with trio.CancelScope(shield=True):
|
|
|
|
# await release_lock_signal.wait()
|
2023-10-06 19:49:23 +00:00
|
|
|
|
2023-10-10 13:55:11 +00:00
|
|
|
else:
|
|
|
|
# block here one (at the appropriate frame *up*) where
|
|
|
|
# ``breakpoint()`` was awaited and begin handling stdio.
|
|
|
|
log.debug("Entering the synchronous world of pdb")
|
|
|
|
debug_func(actor, pdb)
|
|
|
|
|
|
|
|
except bdb.BdbQuit:
|
|
|
|
Lock.release()
|
|
|
|
raise
|
|
|
|
|
|
|
|
# XXX: apparently we can't do this without showing this frame
|
|
|
|
# in the backtrace on first entry to the REPL? Seems like an odd
|
|
|
|
# behaviour that should have been fixed by now. This is also why
|
|
|
|
# we scrapped all the @cm approaches that were tried previously.
|
|
|
|
# finally:
|
|
|
|
# __tracebackhide__ = True
|
|
|
|
# # frame = sys._getframe()
|
|
|
|
# # last_f = frame.f_back
|
|
|
|
# # last_f.f_globals['__tracebackhide__'] = True
|
|
|
|
# # signal.signal = pdbp.hideframe(signal.signal)
|
2023-10-06 19:49:23 +00:00
|
|
|
|
|
|
|
|
|
|
|
# TODO: allow pausing from sync code.
|
|
|
|
# normally by remapping python's builtin breakpoint() hook to this
|
|
|
|
# runtime aware version which takes care of all .
|
2023-06-21 20:08:18 +00:00
|
|
|
def pause_from_sync() -> None:
|
2023-07-07 18:51:44 +00:00
|
|
|
print("ENTER SYNC PAUSE")
|
2024-02-20 20:39:45 +00:00
|
|
|
actor: tractor.Actor = current_actor(
|
2024-01-02 16:28:02 +00:00
|
|
|
err_on_no_runtime=False,
|
|
|
|
)
|
|
|
|
if actor:
|
|
|
|
try:
|
|
|
|
import greenback
|
|
|
|
# __tracebackhide__ = True
|
|
|
|
|
|
|
|
|
|
|
|
# task_can_release_tty_lock = trio.Event()
|
|
|
|
|
|
|
|
# spawn bg task which will lock out the TTY, we poll
|
|
|
|
# just below until the release event is reporting that task as
|
|
|
|
# waiting.. not the most ideal but works for now ;)
|
|
|
|
greenback.await_(
|
|
|
|
actor._service_n.start(partial(
|
|
|
|
pause,
|
|
|
|
debug_func=None,
|
|
|
|
# release_lock_signal=task_can_release_tty_lock,
|
|
|
|
))
|
|
|
|
)
|
|
|
|
|
|
|
|
except ModuleNotFoundError:
|
|
|
|
log.warning('NO GREENBACK FOUND')
|
|
|
|
else:
|
|
|
|
log.warning('Not inside actor-runtime')
|
2023-06-21 20:08:18 +00:00
|
|
|
|
2023-07-07 18:51:44 +00:00
|
|
|
db, undo_sigint = mk_mpdb()
|
|
|
|
Lock.local_task_in_debug = 'sync'
|
|
|
|
# db.config.enable_hidden_frames = True
|
2023-06-21 20:08:18 +00:00
|
|
|
|
2023-07-07 18:51:44 +00:00
|
|
|
# we entered the global ``breakpoint()`` built-in from sync
|
|
|
|
# code?
|
|
|
|
frame: FrameType | None = sys._getframe()
|
|
|
|
# print(f'FRAME: {str(frame)}')
|
|
|
|
# assert not db._is_hidden(frame)
|
|
|
|
|
|
|
|
frame: FrameType = frame.f_back # type: ignore
|
|
|
|
# print(f'FRAME: {str(frame)}')
|
|
|
|
# if not db._is_hidden(frame):
|
|
|
|
# pdbp.set_trace()
|
|
|
|
# db._hidden_frames.append(
|
|
|
|
# (frame, frame.f_lineno)
|
|
|
|
# )
|
|
|
|
db.set_trace(frame=frame)
|
|
|
|
# NOTE XXX: see the `@pdbp.hideframe` decoration
|
|
|
|
# on `Lock.unshield_sigint()`.. I have NO CLUE why
|
|
|
|
# the next instruction's def frame is being shown
|
|
|
|
# in the tb but it seems to be something wonky with
|
|
|
|
# the way `pdb` core works?
|
|
|
|
# undo_sigint()
|
|
|
|
|
|
|
|
# Lock.global_actor_in_debug = actor.uid
|
|
|
|
# Lock.release()
|
|
|
|
# task_can_release_tty_lock.set()
|
2023-06-21 20:08:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
# using the "pause" semantics instead since
|
|
|
|
# that better covers actually somewhat "pausing the runtime"
|
|
|
|
# for this particular paralell task to do debugging B)
|
2023-09-28 18:14:50 +00:00
|
|
|
# pp = pause # short-hand for "pause point"
|
2023-06-21 20:08:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
async def breakpoint(**kwargs):
|
|
|
|
log.warning(
|
|
|
|
'`tractor.breakpoint()` is deprecated!\n'
|
|
|
|
'Please use `tractor.pause()` instead!\n'
|
|
|
|
)
|
|
|
|
await pause(**kwargs)
|
2020-07-23 17:23:55 +00:00
|
|
|
|
|
|
|
|
2022-02-07 11:55:38 +00:00
|
|
|
def _post_mortem(
|
2022-08-29 19:13:16 +00:00
|
|
|
actor: tractor.Actor,
|
2022-02-07 11:55:38 +00:00
|
|
|
pdb: MultiActorPdb,
|
2022-02-07 03:14:16 +00:00
|
|
|
|
2022-02-07 11:55:38 +00:00
|
|
|
) -> None:
|
|
|
|
'''
|
|
|
|
Enter the ``pdbpp`` port mortem entrypoint using our custom
|
|
|
|
debugger instance.
|
2022-01-23 00:32:26 +00:00
|
|
|
|
2022-02-07 11:55:38 +00:00
|
|
|
'''
|
|
|
|
log.pdb(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")
|
2022-02-09 13:26:19 +00:00
|
|
|
|
2022-08-02 16:48:40 +00:00
|
|
|
# TODO: you need ``pdbpp`` master (at least this commit
|
2022-02-09 13:26:19 +00:00
|
|
|
# https://github.com/pdbpp/pdbpp/commit/b757794857f98d53e3ebbe70879663d7d843a6c2)
|
2022-08-02 16:48:40 +00:00
|
|
|
# to fix this and avoid the hang it causes. See issue:
|
|
|
|
# https://github.com/pdbpp/pdbpp/issues/480
|
|
|
|
# TODO: help with a 3.10+ major release if/when it arrives.
|
2022-02-09 13:26:19 +00:00
|
|
|
|
2023-04-15 23:49:25 +00:00
|
|
|
pdbp.xpm(Pdb=lambda: pdb)
|
2020-07-23 17:23:55 +00:00
|
|
|
|
|
|
|
|
|
|
|
post_mortem = partial(
|
2023-10-06 19:49:23 +00:00
|
|
|
pause,
|
2020-07-23 17:23:55 +00:00
|
|
|
_post_mortem,
|
|
|
|
)
|
2020-09-12 15:47:14 +00:00
|
|
|
|
|
|
|
|
|
|
|
async def _maybe_enter_pm(err):
|
|
|
|
if (
|
2021-10-15 13:29:25 +00:00
|
|
|
debug_mode()
|
2020-12-21 14:09:55 +00:00
|
|
|
|
|
|
|
# NOTE: don't enter debug mode recursively after quitting pdb
|
|
|
|
# Iow, don't re-enter the repl if the `quit` command was issued
|
|
|
|
# by the user.
|
2020-09-12 15:47:14 +00:00
|
|
|
and not isinstance(err, bdb.BdbQuit)
|
|
|
|
|
|
|
|
# XXX: if the error is the likely result of runtime-wide
|
|
|
|
# cancellation, we don't want to enter the debugger since
|
|
|
|
# there's races between when the parent actor has killed all
|
|
|
|
# comms and when the child tries to contact said parent to
|
|
|
|
# acquire the tty lock.
|
2020-09-28 12:54:21 +00:00
|
|
|
|
2020-09-12 15:47:14 +00:00
|
|
|
# Really we just want to mostly avoid catching KBIs here so there
|
|
|
|
# might be a simpler check we can do?
|
2020-12-25 20:10:20 +00:00
|
|
|
and not is_multi_cancelled(err)
|
2020-09-12 15:47:14 +00:00
|
|
|
):
|
2020-12-26 20:11:18 +00:00
|
|
|
log.debug("Actor crashed, entering debug mode")
|
2022-10-11 19:22:19 +00:00
|
|
|
try:
|
|
|
|
await post_mortem()
|
|
|
|
finally:
|
|
|
|
Lock.release()
|
|
|
|
return True
|
2020-12-26 20:11:18 +00:00
|
|
|
|
|
|
|
else:
|
|
|
|
return False
|
2021-10-08 22:13:55 +00:00
|
|
|
|
|
|
|
|
2021-10-14 03:32:02 +00:00
|
|
|
@acm
|
|
|
|
async def acquire_debug_lock(
|
2022-09-15 20:56:50 +00:00
|
|
|
subactor_uid: tuple[str, str],
|
2021-10-14 16:07:09 +00:00
|
|
|
) -> AsyncGenerator[None, tuple]:
|
2021-10-14 03:32:02 +00:00
|
|
|
'''
|
|
|
|
Grab root's debug lock on entry, release on exit.
|
|
|
|
|
2024-02-20 20:39:45 +00:00
|
|
|
This helper is for actor's who don't actually need to acquired
|
|
|
|
the debugger but want to wait until the lock is free in the
|
|
|
|
process-tree root such that they don't clobber an ongoing pdb
|
|
|
|
REPL session in some peer or child!
|
2021-11-04 15:47:28 +00:00
|
|
|
|
2021-10-14 03:32:02 +00:00
|
|
|
'''
|
2021-10-25 14:22:41 +00:00
|
|
|
if not debug_mode():
|
2021-11-04 15:47:28 +00:00
|
|
|
yield None
|
2021-10-25 14:22:41 +00:00
|
|
|
return
|
|
|
|
|
2021-10-14 03:32:02 +00:00
|
|
|
async with trio.open_nursery() as n:
|
|
|
|
cs = await n.start(
|
|
|
|
wait_for_parent_stdin_hijack,
|
2021-10-14 16:07:09 +00:00
|
|
|
subactor_uid,
|
2021-10-14 03:32:02 +00:00
|
|
|
)
|
2021-10-14 16:07:09 +00:00
|
|
|
yield None
|
2021-10-14 03:32:02 +00:00
|
|
|
cs.cancel()
|
|
|
|
|
|
|
|
|
|
|
|
async def maybe_wait_for_debugger(
|
|
|
|
poll_steps: int = 2,
|
2021-12-10 16:54:27 +00:00
|
|
|
poll_delay: float = 0.1,
|
2021-12-09 22:50:16 +00:00
|
|
|
child_in_debug: bool = False,
|
|
|
|
|
2024-02-22 20:08:10 +00:00
|
|
|
header_msg: str = '',
|
|
|
|
|
2021-10-14 03:32:02 +00:00
|
|
|
) -> None:
|
2021-10-08 22:13:55 +00:00
|
|
|
|
2023-01-26 16:55:32 +00:00
|
|
|
if (
|
|
|
|
not debug_mode()
|
|
|
|
and not child_in_debug
|
|
|
|
):
|
2021-10-14 03:32:02 +00:00
|
|
|
return
|
2021-10-08 22:13:55 +00:00
|
|
|
|
2024-02-22 20:08:10 +00:00
|
|
|
|
|
|
|
msg: str = header_msg
|
2021-10-08 22:13:55 +00:00
|
|
|
if (
|
|
|
|
is_root_process()
|
|
|
|
):
|
2021-10-14 03:32:02 +00:00
|
|
|
# If we error in the root but the debugger is
|
|
|
|
# engaged we don't want to prematurely kill (and
|
|
|
|
# thus clobber access to) the local tty since it
|
|
|
|
# will make the pdb repl unusable.
|
|
|
|
# Instead try to wait for pdb to be released before
|
|
|
|
# tearing down.
|
2024-02-22 20:08:10 +00:00
|
|
|
sub_in_debug: tuple[str, str]|None = Lock.global_actor_in_debug
|
|
|
|
debug_complete: trio.Event|None = Lock.no_remote_has_tty
|
|
|
|
|
|
|
|
if sub_in_debug := Lock.global_actor_in_debug:
|
|
|
|
msg += (
|
|
|
|
'Debug `Lock` in use by subactor\n'
|
|
|
|
f'|_{sub_in_debug}\n'
|
|
|
|
)
|
|
|
|
# TODO: could this make things more deterministic?
|
|
|
|
# wait to see if a sub-actor task will be
|
|
|
|
# scheduled and grab the tty lock on the next
|
|
|
|
# tick?
|
|
|
|
# XXX => but it doesn't seem to work..
|
|
|
|
# await trio.testing.wait_all_tasks_blocked(cushion=0)
|
|
|
|
else:
|
|
|
|
log.pdb(
|
|
|
|
msg
|
|
|
|
+
|
|
|
|
'Root immediately acquired debug TTY LOCK'
|
|
|
|
)
|
|
|
|
return
|
2021-10-08 22:13:55 +00:00
|
|
|
|
2024-02-20 20:39:45 +00:00
|
|
|
for istep in range(poll_steps):
|
2021-10-08 22:13:55 +00:00
|
|
|
|
2024-02-22 20:08:10 +00:00
|
|
|
|
2024-02-20 20:39:45 +00:00
|
|
|
if (
|
|
|
|
debug_complete
|
|
|
|
and not debug_complete.is_set()
|
|
|
|
and sub_in_debug is not None
|
|
|
|
):
|
|
|
|
log.pdb(
|
2024-02-22 20:08:10 +00:00
|
|
|
msg
|
|
|
|
+
|
|
|
|
'Root is waiting on tty lock to release..\n'
|
2024-02-20 20:39:45 +00:00
|
|
|
)
|
|
|
|
await debug_complete.wait()
|
|
|
|
log.pdb(
|
2024-02-22 20:08:10 +00:00
|
|
|
f'Child subactor released debug lock:'
|
|
|
|
f'|_{sub_in_debug}\n'
|
2024-02-20 20:39:45 +00:00
|
|
|
)
|
2021-10-14 03:08:58 +00:00
|
|
|
|
2024-02-20 20:39:45 +00:00
|
|
|
# is no subactor locking debugger currently?
|
2024-02-22 20:08:10 +00:00
|
|
|
if (
|
|
|
|
sub_in_debug is None
|
|
|
|
and (
|
|
|
|
debug_complete is None
|
|
|
|
or debug_complete.is_set()
|
|
|
|
)
|
2024-02-20 20:39:45 +00:00
|
|
|
):
|
|
|
|
log.pdb(
|
2024-02-22 20:08:10 +00:00
|
|
|
msg
|
|
|
|
+
|
|
|
|
'Root acquired tty lock!'
|
2024-02-20 20:39:45 +00:00
|
|
|
)
|
|
|
|
break
|
2021-10-08 22:13:55 +00:00
|
|
|
|
2024-02-20 20:39:45 +00:00
|
|
|
else:
|
|
|
|
# TODO: don't need this right?
|
|
|
|
# await trio.lowlevel.checkpoint()
|
2021-10-13 13:33:33 +00:00
|
|
|
|
2024-02-20 20:39:45 +00:00
|
|
|
log.debug(
|
|
|
|
'Root polling for debug:\n'
|
|
|
|
f'poll step: {istep}\n'
|
|
|
|
f'poll delya: {poll_delay}'
|
|
|
|
)
|
|
|
|
with trio.CancelScope(shield=True):
|
|
|
|
await trio.sleep(poll_delay)
|
|
|
|
continue
|
2024-02-22 20:08:10 +00:00
|
|
|
|
|
|
|
# fallthrough on failure to acquire..
|
2021-10-08 22:13:55 +00:00
|
|
|
else:
|
2024-02-22 20:08:10 +00:00
|
|
|
raise RuntimeError(
|
|
|
|
msg
|
|
|
|
+
|
|
|
|
'Root actor failed to acquire debug lock?'
|
|
|
|
)
|
2024-02-20 20:39:45 +00:00
|
|
|
|
|
|
|
# else:
|
|
|
|
# # TODO: non-root call for #320?
|
|
|
|
# this_uid: tuple[str, str] = current_actor().uid
|
|
|
|
# async with acquire_debug_lock(
|
|
|
|
# subactor_uid=this_uid,
|
|
|
|
# ):
|
|
|
|
# pass
|
2023-09-28 18:14:50 +00:00
|
|
|
|
|
|
|
# TODO: better naming and what additionals?
|
2023-10-16 19:45:34 +00:00
|
|
|
# - [ ] optional runtime plugging?
|
|
|
|
# - [ ] detection for sync vs. async code?
|
|
|
|
# - [ ] specialized REPL entry when in distributed mode?
|
|
|
|
# - [x] allow ignoring kbi Bo
|
2023-09-28 18:14:50 +00:00
|
|
|
@cm
|
|
|
|
def open_crash_handler(
|
|
|
|
catch: set[BaseException] = {
|
|
|
|
Exception,
|
|
|
|
BaseException,
|
2023-10-16 19:45:34 +00:00
|
|
|
},
|
|
|
|
ignore: set[BaseException] = {
|
|
|
|
KeyboardInterrupt,
|
|
|
|
},
|
2023-09-28 18:14:50 +00:00
|
|
|
):
|
|
|
|
'''
|
|
|
|
Generic "post mortem" crash handler using `pdbp` REPL debugger.
|
|
|
|
|
|
|
|
We expose this as a CLI framework addon to both `click` and
|
|
|
|
`typer` users so they can quickly wrap cmd endpoints which get
|
|
|
|
automatically wrapped to use the runtime's `debug_mode: bool`
|
|
|
|
AND `pdbp.pm()` around any code that is PRE-runtime entry
|
|
|
|
- any sync code which runs BEFORE the main call to
|
|
|
|
`trio.run()`.
|
|
|
|
|
|
|
|
'''
|
|
|
|
try:
|
|
|
|
yield
|
2023-10-16 19:45:34 +00:00
|
|
|
except tuple(catch) as err:
|
|
|
|
|
|
|
|
if type(err) not in ignore:
|
|
|
|
pdbp.xpm()
|
|
|
|
|
2023-09-28 18:14:50 +00:00
|
|
|
raise
|
2023-10-02 22:10:34 +00:00
|
|
|
|
|
|
|
|
|
|
|
@cm
|
|
|
|
def maybe_open_crash_handler(pdb: bool = False):
|
|
|
|
'''
|
|
|
|
Same as `open_crash_handler()` but with bool input flag
|
|
|
|
to allow conditional handling.
|
|
|
|
|
|
|
|
Normally this is used with CLI endpoints such that if the --pdb
|
|
|
|
flag is passed the pdb REPL is engaed on any crashes B)
|
|
|
|
'''
|
|
|
|
rtctx = nullcontext
|
|
|
|
if pdb:
|
|
|
|
rtctx = open_crash_handler
|
|
|
|
|
|
|
|
with rtctx():
|
|
|
|
yield
|