forked from goodboy/tractor
Add multi-process debugging support using `pdbpp`
This is the first step in addressing #113 and the initial support of #130. Basically this allows (sub)processes to engage the `pdbpp` debug machinery which read/writes the root actor's tty but only in a FIFO semaphored way such that no two processes are using it simultaneously. That means you can have multiple actors enter a trace or crash and run the debugger in a sensible way without clobbering each other's access to stdio. It required adding some "tear down hooks" to a custom `pdbpp.Pdb` type such that we release a child's lock on the parent on debugger exit (in this case when either of the "continue" or "quit" commands are issued to the debugger console). There's some code left commented in anticipation of full support for issue #130 where we're need to actually capture and feed stdin to the target (remote) actor which won't necessarily being running on the same host.debug_tests
parent
e7ee0fec34
commit
8eb9a742dd
|
@ -17,12 +17,14 @@ async def main():
|
||||||
"""
|
"""
|
||||||
async with tractor.open_nursery() as n:
|
async with tractor.open_nursery() as n:
|
||||||
|
|
||||||
# portal = await n.run_in_actor('future_self', bubble)
|
portal1 = await n.run_in_actor('bubble', bubble)
|
||||||
portal = await n.run_in_actor('future_self', bail)
|
portal = await n.run_in_actor('bail', bail)
|
||||||
|
# await portal.result()
|
||||||
|
# await portal1.result()
|
||||||
|
|
||||||
# The ``async with`` will unblock here since the 'some_linguist'
|
# The ``async with`` will unblock here since the 'some_linguist'
|
||||||
# actor has completed its main task ``cellar_door``.
|
# actor has completed its main task ``cellar_door``.
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
tractor.run(main, loglevel='info', debug_mode=True)
|
tractor.run(main, loglevel='critical', debug_mode=True)
|
||||||
|
|
|
@ -126,13 +126,14 @@ async def _invoke(
|
||||||
task_status.started(cs)
|
task_status.started(cs)
|
||||||
await chan.send({'return': await coro, 'cid': cid})
|
await chan.send({'return': await coro, 'cid': cid})
|
||||||
except (Exception, trio.MultiError) as err:
|
except (Exception, trio.MultiError) as err:
|
||||||
log.exception("Actor errored:")
|
|
||||||
|
|
||||||
# NOTE: don't enter debug mode recursively after quitting pdb
|
# NOTE: don't enter debug mode recursively after quitting pdb
|
||||||
if _state.debug_mode() and not isinstance(err, bdb.BdbQuit):
|
if _state.debug_mode() and not isinstance(err, bdb.BdbQuit):
|
||||||
# Allow for pdb control in parent
|
# Allow for pdb control in parent
|
||||||
from ._debug import post_mortem
|
from ._debug import post_mortem
|
||||||
|
log.exception("Actor crashed, entering debug mode:")
|
||||||
await post_mortem()
|
await post_mortem()
|
||||||
|
else:
|
||||||
|
log.exception("Actor crashed:")
|
||||||
|
|
||||||
# always ship errors back to caller
|
# always ship errors back to caller
|
||||||
err_msg = pack_error(err)
|
err_msg = pack_error(err)
|
||||||
|
@ -182,6 +183,7 @@ class Actor:
|
||||||
|
|
||||||
# Information about `__main__` from parent
|
# Information about `__main__` from parent
|
||||||
_parent_main_data: Dict[str, str]
|
_parent_main_data: Dict[str, str]
|
||||||
|
_parent_chan_cs: Optional[trio.CancelScope] = None
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -840,6 +842,8 @@ class Actor:
|
||||||
# for n in root.child_nurseries:
|
# for n in root.child_nurseries:
|
||||||
# n.cancel_scope.cancel()
|
# n.cancel_scope.cancel()
|
||||||
|
|
||||||
|
self._parent_chan_cs.cancel()
|
||||||
|
|
||||||
async def _cancel_task(self, cid, chan):
|
async def _cancel_task(self, cid, chan):
|
||||||
"""Cancel a local task by call-id / channel.
|
"""Cancel a local task by call-id / channel.
|
||||||
|
|
||||||
|
|
|
@ -1,9 +1,7 @@
|
||||||
"""
|
"""
|
||||||
Multi-core debugging for da peeps!
|
Multi-core debugging for da peeps!
|
||||||
"""
|
"""
|
||||||
import pdb
|
|
||||||
import sys
|
import sys
|
||||||
import tty
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from typing import Awaitable, Tuple
|
from typing import Awaitable, Tuple
|
||||||
|
|
||||||
|
@ -13,6 +11,16 @@ import trio
|
||||||
|
|
||||||
from .log import get_logger
|
from .log import get_logger
|
||||||
|
|
||||||
|
try:
|
||||||
|
# wtf: only exported when installed in dev mode?
|
||||||
|
import pdbpp
|
||||||
|
except ImportError:
|
||||||
|
# pdbpp is installed in regular mode...
|
||||||
|
import pdb
|
||||||
|
assert pdb.xpm, "pdbpp is not installed?"
|
||||||
|
pdbpp = pdb
|
||||||
|
|
||||||
|
|
||||||
log = get_logger(__name__)
|
log = get_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ -25,56 +33,96 @@ _pdb_exit_patterns = tuple(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def subactoruid2proc(
|
_pdb_release_hook = None
|
||||||
actor: 'Actor', # noqa
|
|
||||||
uid: Tuple[str, str]
|
|
||||||
) -> trio.Process:
|
class PdbwTeardown(pdbpp.Pdb):
|
||||||
n = actor._actoruid2nursery[uid]
|
"""Add teardown hooks to the regular ``pdbpp.Pdb``.
|
||||||
_, proc, _ = n._children[uid]
|
"""
|
||||||
return proc
|
# TODO: figure out how to dissallow recursive .set_trace() entry
|
||||||
|
# since that'll cause deadlock for us.
|
||||||
|
def set_continue(self):
|
||||||
|
super().set_continue()
|
||||||
|
self.config.teardown(self)
|
||||||
|
|
||||||
|
def set_quit(self):
|
||||||
|
super().set_quit()
|
||||||
|
self.config.teardown(self)
|
||||||
|
|
||||||
|
|
||||||
|
class TractorConfig(pdbpp.DefaultConfig):
|
||||||
|
"""Custom ``pdbpp`` goodness.
|
||||||
|
"""
|
||||||
|
sticky_by_default = True
|
||||||
|
|
||||||
|
def teardown(self, _pdb):
|
||||||
|
_pdb_release_hook(_pdb)
|
||||||
|
|
||||||
|
|
||||||
|
# override the pdbpp config with our coolio one
|
||||||
|
pdbpp.Pdb.DefaultConfig = TractorConfig
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: will be needed whenever we get to true remote debugging.
|
||||||
|
# XXX see https://github.com/goodboy/tractor/issues/130
|
||||||
|
|
||||||
|
# def subactoruid2proc(
|
||||||
|
# actor: 'Actor', # noqa
|
||||||
|
# uid: Tuple[str, str]
|
||||||
|
# ) -> trio.Process:
|
||||||
|
# n = actor._actoruid2nursery[uid]
|
||||||
|
# _, proc, _ = n._children[uid]
|
||||||
|
# return proc
|
||||||
|
|
||||||
|
# async def hijack_stdin():
|
||||||
|
# log.info(f"Hijacking stdin from {actor.uid}")
|
||||||
|
|
||||||
|
# trap std in and relay to subproc
|
||||||
|
# async_stdin = trio.wrap_file(sys.stdin)
|
||||||
|
|
||||||
|
# async with aclosing(async_stdin):
|
||||||
|
# async for msg in async_stdin:
|
||||||
|
# log.trace(f"Stdin input:\n{msg}")
|
||||||
|
# # encode to bytes
|
||||||
|
# bmsg = str.encode(msg)
|
||||||
|
|
||||||
|
# # relay bytes to subproc over pipe
|
||||||
|
# # await proc.stdin.send_all(bmsg)
|
||||||
|
|
||||||
|
# if bmsg in _pdb_exit_patterns:
|
||||||
|
# log.info("Closing stdin hijack")
|
||||||
|
# break
|
||||||
|
|
||||||
|
|
||||||
async def _hijack_stdin_relay_to_child(
|
async def _hijack_stdin_relay_to_child(
|
||||||
subactor_uid: Tuple[str, str]
|
subactor_uid: Tuple[str, str]
|
||||||
) -> None:
|
) -> None:
|
||||||
actor = tractor.current_actor()
|
actor = tractor.current_actor()
|
||||||
proc = subactoruid2proc(actor, subactor_uid)
|
debug_lock = actor.statespace.setdefault(
|
||||||
|
'_debug_lock', trio.StrictFIFOLock()
|
||||||
|
)
|
||||||
|
|
||||||
# nlb = []
|
log.debug(f"Actor {subactor_uid} is waiting on stdin hijack lock")
|
||||||
|
await debug_lock.acquire()
|
||||||
|
log.warning(f"Actor {subactor_uid} acquired stdin hijack lock")
|
||||||
|
|
||||||
async def hijack_stdin():
|
# TODO: when we get to true remote debugging
|
||||||
log.info(f"Hijacking stdin from {actor.uid}")
|
# this will deliver stdin data
|
||||||
# try:
|
try:
|
||||||
# # disable cooked mode
|
# indicate to child that we've locked stdio
|
||||||
# fd = sys.stdin.fileno()
|
yield 'Locked'
|
||||||
# old = tty.tcgetattr(fd)
|
|
||||||
# tty.setcbreak(fd)
|
|
||||||
|
|
||||||
# trap std in and relay to subproc
|
# wait for cancellation of stream by child
|
||||||
async_stdin = trio.wrap_file(sys.stdin)
|
await trio.sleep_forever()
|
||||||
|
|
||||||
async with aclosing(async_stdin):
|
# TODO: for remote debugging schedule hijacking in root scope
|
||||||
# while True:
|
# (see above)
|
||||||
async for msg in async_stdin:
|
# actor._root_nursery.start_soon(hijack_stdin)
|
||||||
log.trace(f"Stdin input:\n{msg}")
|
|
||||||
# nlb.append(msg)
|
|
||||||
# encode to bytes
|
|
||||||
bmsg = str.encode(msg)
|
|
||||||
|
|
||||||
# relay bytes to subproc over pipe
|
finally:
|
||||||
await proc.stdin.send_all(bmsg)
|
if debug_lock.locked():
|
||||||
|
debug_lock.release()
|
||||||
# line = str.encode(''.join(nlb))
|
log.debug(f"Actor {subactor_uid} released stdin hijack lock")
|
||||||
# print(line)
|
|
||||||
|
|
||||||
if bmsg in _pdb_exit_patterns:
|
|
||||||
log.info("Closing stdin hijack")
|
|
||||||
break
|
|
||||||
# finally:
|
|
||||||
# tty.tcsetattr(fd, tty.TCSAFLUSH, old)
|
|
||||||
|
|
||||||
# schedule hijacking in root scope
|
|
||||||
actor._root_nursery.start_soon(hijack_stdin)
|
|
||||||
|
|
||||||
|
|
||||||
# XXX: We only make this sync in case someone wants to
|
# XXX: We only make this sync in case someone wants to
|
||||||
|
@ -84,35 +132,61 @@ def _breakpoint(debug_func) -> Awaitable[None]:
|
||||||
in subactors.
|
in subactors.
|
||||||
"""
|
"""
|
||||||
actor = tractor.current_actor()
|
actor = tractor.current_actor()
|
||||||
|
do_unlock = trio.Event()
|
||||||
|
|
||||||
async def wait_for_parent_stdin_hijack():
|
async def wait_for_parent_stdin_hijack(
|
||||||
log.debug('Breakpoint engaged!')
|
task_status=trio.TASK_STATUS_IGNORED
|
||||||
|
):
|
||||||
|
|
||||||
# TODO: need a more robust check for the "root" actor
|
# TODO: need a more robust check for the "root" actor
|
||||||
if actor._parent_chan:
|
if actor._parent_chan:
|
||||||
async with tractor._portal.open_portal(
|
async with tractor._portal.open_portal(
|
||||||
actor._parent_chan,
|
actor._parent_chan,
|
||||||
start_msg_loop=False,
|
start_msg_loop=False,
|
||||||
|
shield=True,
|
||||||
) as portal:
|
) as portal:
|
||||||
# with trio.fail_after(1):
|
# with trio.fail_after(1):
|
||||||
await portal.run(
|
agen = await portal.run(
|
||||||
'tractor._debug',
|
'tractor._debug',
|
||||||
'_hijack_stdin_relay_to_child',
|
'_hijack_stdin_relay_to_child',
|
||||||
subactor_uid=actor.uid,
|
subactor_uid=actor.uid,
|
||||||
)
|
)
|
||||||
|
async with aclosing(agen):
|
||||||
|
async for val in agen:
|
||||||
|
assert val == 'Locked'
|
||||||
|
task_status.started()
|
||||||
|
with trio.CancelScope(shield=True):
|
||||||
|
await do_unlock.wait()
|
||||||
|
|
||||||
|
# trigger cancellation of remote stream
|
||||||
|
break
|
||||||
|
|
||||||
|
log.debug(f"Child {actor} released parent stdio lock")
|
||||||
|
|
||||||
|
def unlock(_pdb):
|
||||||
|
do_unlock.set()
|
||||||
|
|
||||||
|
global _pdb_release_hook
|
||||||
|
_pdb_release_hook = unlock
|
||||||
|
|
||||||
|
async def _bp():
|
||||||
|
# this must be awaited by caller
|
||||||
|
await actor._root_nursery.start(
|
||||||
|
wait_for_parent_stdin_hijack
|
||||||
|
)
|
||||||
|
|
||||||
# block here one frame up where ``breakpoint()``
|
# block here one frame up where ``breakpoint()``
|
||||||
# was awaited and begin handling stdin
|
# was awaited and begin handling stdin
|
||||||
debug_func(actor)
|
debug_func(actor)
|
||||||
|
|
||||||
# this must be awaited by caller
|
# return wait_for_parent_stdin_hijack()
|
||||||
return wait_for_parent_stdin_hijack()
|
return _bp()
|
||||||
|
|
||||||
|
|
||||||
def _set_trace(actor):
|
def _set_trace(actor):
|
||||||
pdb.set_trace(
|
log.critical(f"\nAttaching pdb to actor: {actor.uid}\n")
|
||||||
header=f"\nAttaching pdb to actor: {actor.uid}\n",
|
PdbwTeardown().set_trace(
|
||||||
# start 2 levels up
|
# start 2 levels up in user code
|
||||||
frame=sys._getframe().f_back.f_back,
|
frame=sys._getframe().f_back.f_back,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -125,7 +199,7 @@ breakpoint = partial(
|
||||||
|
|
||||||
def _post_mortem(actor):
|
def _post_mortem(actor):
|
||||||
log.error(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")
|
log.error(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")
|
||||||
pdb.post_mortem()
|
pdbpp.xpm(Pdb=PdbwTeardown)
|
||||||
|
|
||||||
|
|
||||||
post_mortem = partial(
|
post_mortem = partial(
|
||||||
|
|
Loading…
Reference in New Issue