Compare commits
22 Commits
Author | SHA1 | Date |
---|---|---|
Tyler Goodlet | 534277daa5 | |
Tyler Goodlet | 8d77fa91b1 | |
Tyler Goodlet | a6884e32cf | |
Tyler Goodlet | 266b0053dc | |
Tyler Goodlet | 81aa12b46d | |
Tyler Goodlet | f4af2b9fda | |
Tyler Goodlet | b8117dad2a | |
Tyler Goodlet | a80591b914 | |
Tyler Goodlet | 52ad597c20 | |
Tyler Goodlet | 4973deb55f | |
Tyler Goodlet | 872c47213a | |
Tyler Goodlet | fddaaa289c | |
Tyler Goodlet | 85aa8899b3 | |
Tyler Goodlet | eb017c8da6 | |
Tyler Goodlet | 05e41ecd85 | |
Tyler Goodlet | 8fda172ae2 | |
Tyler Goodlet | a6de29517a | |
Tyler Goodlet | 0183c5bf9b | |
Tyler Goodlet | 741dff6795 | |
Tyler Goodlet | 94dc5f9114 | |
Tyler Goodlet | cb77216110 | |
Tyler Goodlet | 28513bc601 |
|
@ -0,0 +1,50 @@
|
||||||
|
import tractor
|
||||||
|
import trio
|
||||||
|
|
||||||
|
|
||||||
|
async def gen():
|
||||||
|
yield 'yo'
|
||||||
|
await tractor.breakpoint()
|
||||||
|
yield 'yo'
|
||||||
|
await tractor.breakpoint()
|
||||||
|
|
||||||
|
|
||||||
|
@tractor.context
|
||||||
|
async def just_bp(
|
||||||
|
ctx: tractor.Context,
|
||||||
|
) -> None:
|
||||||
|
|
||||||
|
await ctx.started()
|
||||||
|
await tractor.breakpoint()
|
||||||
|
|
||||||
|
# TODO: bps and errors in this call..
|
||||||
|
async for val in gen():
|
||||||
|
print(val)
|
||||||
|
|
||||||
|
# await trio.sleep(0.5)
|
||||||
|
|
||||||
|
# prematurely destroy the connection
|
||||||
|
await ctx.chan.aclose()
|
||||||
|
|
||||||
|
# THIS CAUSES AN UNRECOVERABLE HANG
|
||||||
|
# without latest ``pdbpp``:
|
||||||
|
assert 0
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
async with tractor.open_nursery(
|
||||||
|
debug_mode=True,
|
||||||
|
) as n:
|
||||||
|
p = await n.start_actor(
|
||||||
|
'bp_boi',
|
||||||
|
enable_modules=[__name__],
|
||||||
|
)
|
||||||
|
async with p.open_context(
|
||||||
|
just_bp,
|
||||||
|
) as (ctx, first):
|
||||||
|
await trio.sleep_forever()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
trio.run(main)
|
|
@ -26,8 +26,11 @@ import importlib
|
||||||
import importlib.util
|
import importlib.util
|
||||||
import inspect
|
import inspect
|
||||||
import uuid
|
import uuid
|
||||||
import typing
|
from typing import (
|
||||||
from typing import Any, Optional, Union
|
Any, Optional,
|
||||||
|
Union, TYPE_CHECKING,
|
||||||
|
Callable,
|
||||||
|
)
|
||||||
from types import ModuleType
|
from types import ModuleType
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
@ -57,6 +60,10 @@ from . import _state
|
||||||
from . import _mp_fixup_main
|
from . import _mp_fixup_main
|
||||||
|
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from ._supervise import ActorNursery
|
||||||
|
|
||||||
|
|
||||||
log = get_logger('tractor')
|
log = get_logger('tractor')
|
||||||
|
|
||||||
|
|
||||||
|
@ -65,7 +72,7 @@ async def _invoke(
|
||||||
actor: 'Actor',
|
actor: 'Actor',
|
||||||
cid: str,
|
cid: str,
|
||||||
chan: Channel,
|
chan: Channel,
|
||||||
func: typing.Callable,
|
func: Callable,
|
||||||
kwargs: dict[str, Any],
|
kwargs: dict[str, Any],
|
||||||
|
|
||||||
is_rpc: bool = True,
|
is_rpc: bool = True,
|
||||||
|
@ -200,7 +207,7 @@ async def _invoke(
|
||||||
ctx = actor._contexts.pop((chan.uid, cid))
|
ctx = actor._contexts.pop((chan.uid, cid))
|
||||||
if ctx:
|
if ctx:
|
||||||
log.runtime(
|
log.runtime(
|
||||||
f'Context entrypoint for {func} was terminated:\n{ctx}'
|
f'Context entrypoint {func} was terminated:\n{ctx}'
|
||||||
)
|
)
|
||||||
|
|
||||||
assert cs
|
assert cs
|
||||||
|
@ -316,7 +323,9 @@ async def try_ship_error_to_parent(
|
||||||
trio.ClosedResourceError,
|
trio.ClosedResourceError,
|
||||||
trio.BrokenResourceError,
|
trio.BrokenResourceError,
|
||||||
):
|
):
|
||||||
log.error(
|
# in SC terms this is one of the worst things that can
|
||||||
|
# happen and creates the 2-general's dilemma.
|
||||||
|
log.critical(
|
||||||
f"Failed to ship error to parent "
|
f"Failed to ship error to parent "
|
||||||
f"{channel.uid}, channel was closed"
|
f"{channel.uid}, channel was closed"
|
||||||
)
|
)
|
||||||
|
@ -424,7 +433,7 @@ class Actor:
|
||||||
# (chan, cid) -> (cancel_scope, func)
|
# (chan, cid) -> (cancel_scope, func)
|
||||||
self._rpc_tasks: dict[
|
self._rpc_tasks: dict[
|
||||||
tuple[Channel, str],
|
tuple[Channel, str],
|
||||||
tuple[trio.CancelScope, typing.Callable, trio.Event]
|
tuple[trio.CancelScope, Callable, trio.Event]
|
||||||
] = {}
|
] = {}
|
||||||
|
|
||||||
# map {actor uids -> Context}
|
# map {actor uids -> Context}
|
||||||
|
@ -513,6 +522,7 @@ class Actor:
|
||||||
self._no_more_peers = trio.Event() # unset
|
self._no_more_peers = trio.Event() # unset
|
||||||
|
|
||||||
chan = Channel.from_stream(stream)
|
chan = Channel.from_stream(stream)
|
||||||
|
uid: Optional[tuple[str, str]] = chan.uid
|
||||||
log.runtime(f"New connection to us {chan}")
|
log.runtime(f"New connection to us {chan}")
|
||||||
|
|
||||||
# send/receive initial handshake response
|
# send/receive initial handshake response
|
||||||
|
@ -560,33 +570,51 @@ class Actor:
|
||||||
# append new channel
|
# append new channel
|
||||||
self._peers[uid].append(chan)
|
self._peers[uid].append(chan)
|
||||||
|
|
||||||
|
local_nursery: Optional[ActorNursery] = None # noqa
|
||||||
|
disconnected: bool = False
|
||||||
|
|
||||||
# Begin channel management - respond to remote requests and
|
# Begin channel management - respond to remote requests and
|
||||||
# process received reponses.
|
# process received reponses.
|
||||||
try:
|
try:
|
||||||
await self._process_messages(chan)
|
disconnected = await self._process_messages(chan)
|
||||||
|
|
||||||
except trio.Cancelled:
|
except (
|
||||||
|
trio.Cancelled,
|
||||||
|
):
|
||||||
log.cancel(f"Msg loop was cancelled for {chan}")
|
log.cancel(f"Msg loop was cancelled for {chan}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
local_nursery = self._actoruid2nursery.get(uid, local_nursery)
|
||||||
|
|
||||||
# This is set in ``Portal.cancel_actor()``. So if
|
# This is set in ``Portal.cancel_actor()``. So if
|
||||||
# the peer was cancelled we try to wait for them
|
# the peer was cancelled we try to wait for them
|
||||||
# to tear down their side of the connection before
|
# to tear down their side of the connection before
|
||||||
# moving on with closing our own side.
|
# moving on with closing our own side.
|
||||||
local_nursery = self._actoruid2nursery.get(chan.uid)
|
|
||||||
if (
|
if (
|
||||||
local_nursery
|
local_nursery
|
||||||
):
|
):
|
||||||
|
if disconnected:
|
||||||
|
# if the transport died and this actor is still
|
||||||
|
# registered within a local nursery, we report that the
|
||||||
|
# IPC layer may have failed unexpectedly since it may be
|
||||||
|
# the cause of other downstream errors.
|
||||||
|
entry = local_nursery._children.get(uid)
|
||||||
|
if entry:
|
||||||
|
_, proc, _ = entry
|
||||||
|
log.error(f'Actor {uid}@{proc} IPC connection broke!?')
|
||||||
|
# if proc.poll() is not None:
|
||||||
|
# log.error('Actor {uid} proc died and IPC broke?')
|
||||||
|
|
||||||
log.cancel(f"Waiting on cancel request to peer {chan.uid}")
|
log.cancel(f"Waiting on cancel request to peer {chan.uid}")
|
||||||
# XXX: this is a soft wait on the channel (and its
|
# XXX: this is a soft wait on the channel (and its
|
||||||
# underlying transport protocol) to close from the remote
|
# underlying transport protocol) to close from the
|
||||||
# peer side since we presume that any channel which
|
# remote peer side since we presume that any channel
|
||||||
# is mapped to a sub-actor (i.e. it's managed by
|
# which is mapped to a sub-actor (i.e. it's managed by
|
||||||
# one of our local nurseries)
|
# one of our local nurseries) has a message is sent to
|
||||||
# message is sent to the peer likely by this actor which is
|
# the peer likely by this actor (which is now in
|
||||||
# now in a cancelled condition) when the local runtime here
|
# a cancelled condition) when the local runtime here is
|
||||||
# is now cancelled while (presumably) in the middle of msg
|
# now cancelled while (presumably) in the middle of msg
|
||||||
# loop processing.
|
# loop processing.
|
||||||
with trio.move_on_after(0.5) as cs:
|
with trio.move_on_after(0.5) as cs:
|
||||||
cs.shield = True
|
cs.shield = True
|
||||||
|
@ -609,6 +637,8 @@ class Actor:
|
||||||
|
|
||||||
await local_nursery.exited.wait()
|
await local_nursery.exited.wait()
|
||||||
|
|
||||||
|
# if local_nursery._children
|
||||||
|
|
||||||
# ``Channel`` teardown and closure sequence
|
# ``Channel`` teardown and closure sequence
|
||||||
|
|
||||||
# Drop ref to channel so it can be gc-ed and disconnected
|
# Drop ref to channel so it can be gc-ed and disconnected
|
||||||
|
@ -618,7 +648,7 @@ class Actor:
|
||||||
|
|
||||||
if not chans:
|
if not chans:
|
||||||
log.runtime(f"No more channels for {chan.uid}")
|
log.runtime(f"No more channels for {chan.uid}")
|
||||||
self._peers.pop(chan.uid, None)
|
self._peers.pop(uid, None)
|
||||||
|
|
||||||
# for (uid, cid) in self._contexts.copy():
|
# for (uid, cid) in self._contexts.copy():
|
||||||
# if chan.uid == uid:
|
# if chan.uid == uid:
|
||||||
|
@ -626,11 +656,13 @@ class Actor:
|
||||||
|
|
||||||
log.runtime(f"Peers is {self._peers}")
|
log.runtime(f"Peers is {self._peers}")
|
||||||
|
|
||||||
if not self._peers: # no more channels connected
|
# No more channels to other actors (at all) registered
|
||||||
|
# as connected.
|
||||||
|
if not self._peers:
|
||||||
log.runtime("Signalling no more peer channels")
|
log.runtime("Signalling no more peer channels")
|
||||||
self._no_more_peers.set()
|
self._no_more_peers.set()
|
||||||
|
|
||||||
# # XXX: is this necessary (GC should do it?)
|
# XXX: is this necessary (GC should do it)?
|
||||||
if chan.connected():
|
if chan.connected():
|
||||||
# if the channel is still connected it may mean the far
|
# if the channel is still connected it may mean the far
|
||||||
# end has not closed and we may have gotten here due to
|
# end has not closed and we may have gotten here due to
|
||||||
|
@ -665,8 +697,8 @@ class Actor:
|
||||||
ctx = self._contexts[(uid, cid)]
|
ctx = self._contexts[(uid, cid)]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
log.warning(
|
log.warning(
|
||||||
f'Ignoring msg from [no-longer/un]known context with {uid}:'
|
f'Ignoring msg from [no-longer/un]known context {uid}:'
|
||||||
f'\n{msg}')
|
f'\n{msg}')
|
||||||
return
|
return
|
||||||
|
|
||||||
send_chan = ctx._send_chan
|
send_chan = ctx._send_chan
|
||||||
|
@ -813,7 +845,7 @@ class Actor:
|
||||||
shield: bool = False,
|
shield: bool = False,
|
||||||
task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED,
|
task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED,
|
||||||
|
|
||||||
) -> None:
|
) -> bool:
|
||||||
'''
|
'''
|
||||||
Process messages for the channel async-RPC style.
|
Process messages for the channel async-RPC style.
|
||||||
|
|
||||||
|
@ -839,7 +871,7 @@ class Actor:
|
||||||
if msg is None: # loop terminate sentinel
|
if msg is None: # loop terminate sentinel
|
||||||
|
|
||||||
log.cancel(
|
log.cancel(
|
||||||
f"Channerl to {chan.uid} terminated?\n"
|
f"Channel to {chan.uid} terminated?\n"
|
||||||
"Cancelling all associated tasks..")
|
"Cancelling all associated tasks..")
|
||||||
|
|
||||||
for (channel, cid) in self._rpc_tasks.copy():
|
for (channel, cid) in self._rpc_tasks.copy():
|
||||||
|
@ -986,6 +1018,9 @@ class Actor:
|
||||||
# up.
|
# up.
|
||||||
log.runtime(f'channel from {chan.uid} closed abruptly:\n{chan}')
|
log.runtime(f'channel from {chan.uid} closed abruptly:\n{chan}')
|
||||||
|
|
||||||
|
# transport **was** disconnected
|
||||||
|
return True
|
||||||
|
|
||||||
except (Exception, trio.MultiError) as err:
|
except (Exception, trio.MultiError) as err:
|
||||||
if nursery_cancelled_before_task:
|
if nursery_cancelled_before_task:
|
||||||
sn = self._service_n
|
sn = self._service_n
|
||||||
|
@ -1010,6 +1045,9 @@ class Actor:
|
||||||
f"Exiting msg loop for {chan} from {chan.uid} "
|
f"Exiting msg loop for {chan} from {chan.uid} "
|
||||||
f"with last msg:\n{msg}")
|
f"with last msg:\n{msg}")
|
||||||
|
|
||||||
|
# transport **was not** disconnected
|
||||||
|
return False
|
||||||
|
|
||||||
async def _from_parent(
|
async def _from_parent(
|
||||||
self,
|
self,
|
||||||
parent_addr: Optional[tuple[str, int]],
|
parent_addr: Optional[tuple[str, int]],
|
||||||
|
|
|
@ -18,8 +18,10 @@
|
||||||
Multi-core debugging for da peeps!
|
Multi-core debugging for da peeps!
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
import bdb
|
import bdb
|
||||||
import sys
|
import sys
|
||||||
|
import signal
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from contextlib import asynccontextmanager as acm
|
from contextlib import asynccontextmanager as acm
|
||||||
from typing import (
|
from typing import (
|
||||||
|
@ -29,24 +31,26 @@ from typing import (
|
||||||
AsyncIterator,
|
AsyncIterator,
|
||||||
AsyncGenerator,
|
AsyncGenerator,
|
||||||
)
|
)
|
||||||
|
from types import FrameType
|
||||||
|
|
||||||
import tractor
|
import tractor
|
||||||
import trio
|
import trio
|
||||||
from trio_typing import TaskStatus
|
from trio_typing import TaskStatus
|
||||||
|
|
||||||
from .log import get_logger
|
from .log import get_logger
|
||||||
from . import _state
|
|
||||||
from ._discovery import get_root
|
from ._discovery import get_root
|
||||||
from ._state import is_root_process, debug_mode
|
from ._state import is_root_process, debug_mode
|
||||||
from ._exceptions import is_multi_cancelled
|
from ._exceptions import is_multi_cancelled
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# wtf: only exported when installed in dev mode?
|
# wtf: only exported when installed in dev mode?
|
||||||
import pdbpp
|
import pdbpp
|
||||||
except ImportError:
|
except ImportError:
|
||||||
# pdbpp is installed in regular mode...it monkey patches stuff
|
# pdbpp is installed in regular mode...it monkey patches stuff
|
||||||
import pdb
|
import pdb
|
||||||
assert pdb.xpm, "pdbpp is not installed?" # type: ignore
|
xpm = getattr(pdb, 'xpm', None)
|
||||||
|
assert xpm, "pdbpp is not installed?" # type: ignore
|
||||||
pdbpp = pdb
|
pdbpp = pdb
|
||||||
|
|
||||||
log = get_logger(__name__)
|
log = get_logger(__name__)
|
||||||
|
@ -81,11 +85,14 @@ class TractorConfig(pdbpp.DefaultConfig):
|
||||||
"""Custom ``pdbpp`` goodness.
|
"""Custom ``pdbpp`` goodness.
|
||||||
"""
|
"""
|
||||||
# sticky_by_default = True
|
# sticky_by_default = True
|
||||||
|
enable_hidden_frames = False
|
||||||
|
|
||||||
|
|
||||||
class PdbwTeardown(pdbpp.Pdb):
|
class MultiActorPdb(pdbpp.Pdb):
|
||||||
"""Add teardown hooks to the regular ``pdbpp.Pdb``.
|
'''
|
||||||
"""
|
Add teardown hooks to the regular ``pdbpp.Pdb``.
|
||||||
|
|
||||||
|
'''
|
||||||
# override the pdbpp config with our coolio one
|
# override the pdbpp config with our coolio one
|
||||||
DefaultConfig = TractorConfig
|
DefaultConfig = TractorConfig
|
||||||
|
|
||||||
|
@ -95,17 +102,19 @@ class PdbwTeardown(pdbpp.Pdb):
|
||||||
try:
|
try:
|
||||||
super().set_continue()
|
super().set_continue()
|
||||||
finally:
|
finally:
|
||||||
global _local_task_in_debug
|
global _local_task_in_debug, _pdb_release_hook
|
||||||
_local_task_in_debug = None
|
_local_task_in_debug = None
|
||||||
_pdb_release_hook()
|
if _pdb_release_hook:
|
||||||
|
_pdb_release_hook()
|
||||||
|
|
||||||
def set_quit(self):
|
def set_quit(self):
|
||||||
try:
|
try:
|
||||||
super().set_quit()
|
super().set_quit()
|
||||||
finally:
|
finally:
|
||||||
global _local_task_in_debug
|
global _local_task_in_debug, _pdb_release_hook
|
||||||
_local_task_in_debug = None
|
_local_task_in_debug = None
|
||||||
_pdb_release_hook()
|
if _pdb_release_hook:
|
||||||
|
_pdb_release_hook()
|
||||||
|
|
||||||
|
|
||||||
# TODO: will be needed whenever we get to true remote debugging.
|
# TODO: will be needed whenever we get to true remote debugging.
|
||||||
|
@ -150,7 +159,8 @@ async def _acquire_debug_lock(
|
||||||
uid: Tuple[str, str]
|
uid: Tuple[str, str]
|
||||||
|
|
||||||
) -> AsyncIterator[trio.StrictFIFOLock]:
|
) -> AsyncIterator[trio.StrictFIFOLock]:
|
||||||
'''Acquire a root-actor local FIFO lock which tracks mutex access of
|
'''
|
||||||
|
Acquire a root-actor local FIFO lock which tracks mutex access of
|
||||||
the process tree's global debugger breakpoint.
|
the process tree's global debugger breakpoint.
|
||||||
|
|
||||||
This lock avoids tty clobbering (by preventing multiple processes
|
This lock avoids tty clobbering (by preventing multiple processes
|
||||||
|
@ -162,7 +172,7 @@ async def _acquire_debug_lock(
|
||||||
|
|
||||||
task_name = trio.lowlevel.current_task().name
|
task_name = trio.lowlevel.current_task().name
|
||||||
|
|
||||||
log.debug(
|
log.runtime(
|
||||||
f"Attempting to acquire TTY lock, remote task: {task_name}:{uid}"
|
f"Attempting to acquire TTY lock, remote task: {task_name}:{uid}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -175,14 +185,14 @@ async def _acquire_debug_lock(
|
||||||
_no_remote_has_tty = trio.Event()
|
_no_remote_has_tty = trio.Event()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
log.debug(
|
log.runtime(
|
||||||
f"entering lock checkpoint, remote task: {task_name}:{uid}"
|
f"entering lock checkpoint, remote task: {task_name}:{uid}"
|
||||||
)
|
)
|
||||||
we_acquired = True
|
we_acquired = True
|
||||||
await _debug_lock.acquire()
|
await _debug_lock.acquire()
|
||||||
|
|
||||||
_global_actor_in_debug = uid
|
_global_actor_in_debug = uid
|
||||||
log.debug(f"TTY lock acquired, remote task: {task_name}:{uid}")
|
log.runtime(f"TTY lock acquired, remote task: {task_name}:{uid}")
|
||||||
|
|
||||||
# NOTE: critical section: this yield is unshielded!
|
# NOTE: critical section: this yield is unshielded!
|
||||||
|
|
||||||
|
@ -198,7 +208,10 @@ async def _acquire_debug_lock(
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# if _global_actor_in_debug == uid:
|
# if _global_actor_in_debug == uid:
|
||||||
if we_acquired and _debug_lock.locked():
|
if (
|
||||||
|
we_acquired
|
||||||
|
and _debug_lock.locked()
|
||||||
|
):
|
||||||
_debug_lock.release()
|
_debug_lock.release()
|
||||||
|
|
||||||
# IFF there are no more requesting tasks queued up fire, the
|
# IFF there are no more requesting tasks queued up fire, the
|
||||||
|
@ -210,29 +223,13 @@ async def _acquire_debug_lock(
|
||||||
if (
|
if (
|
||||||
not stats.owner
|
not stats.owner
|
||||||
):
|
):
|
||||||
log.debug(f"No more tasks waiting on tty lock! says {uid}")
|
log.runtime(f"No more tasks waiting on tty lock! says {uid}")
|
||||||
_no_remote_has_tty.set()
|
_no_remote_has_tty.set()
|
||||||
_no_remote_has_tty = None
|
_no_remote_has_tty = None
|
||||||
|
|
||||||
_global_actor_in_debug = None
|
_global_actor_in_debug = None
|
||||||
|
|
||||||
log.debug(f"TTY lock released, remote task: {task_name}:{uid}")
|
log.runtime(f"TTY lock released, remote task: {task_name}:{uid}")
|
||||||
|
|
||||||
|
|
||||||
def handler(signum, frame, *args):
|
|
||||||
"""Specialized debugger compatible SIGINT handler.
|
|
||||||
|
|
||||||
In childred we always ignore to avoid deadlocks since cancellation
|
|
||||||
should always be managed by the parent supervising actor. The root
|
|
||||||
is always cancelled on ctrl-c.
|
|
||||||
"""
|
|
||||||
if is_root_process():
|
|
||||||
tractor.current_actor().cancel_soon()
|
|
||||||
else:
|
|
||||||
print(
|
|
||||||
"tractor ignores SIGINT while in debug mode\n"
|
|
||||||
"If you have a special need for it please open an issue.\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@tractor.context
|
@tractor.context
|
||||||
|
@ -260,46 +257,62 @@ async def _hijack_stdin_for_child(
|
||||||
|
|
||||||
log.debug(f"Actor {subactor_uid} is WAITING on stdin hijack lock")
|
log.debug(f"Actor {subactor_uid} is WAITING on stdin hijack lock")
|
||||||
|
|
||||||
with trio.CancelScope(shield=True):
|
orig_handler = signal.signal(
|
||||||
|
signal.SIGINT,
|
||||||
|
shield_sigint,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with (
|
||||||
|
trio.CancelScope(shield=True),
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
lock = None
|
||||||
|
async with _acquire_debug_lock(subactor_uid) as lock:
|
||||||
|
|
||||||
try:
|
# indicate to child that we've locked stdio
|
||||||
lock = None
|
await ctx.started('Locked')
|
||||||
async with _acquire_debug_lock(subactor_uid) as lock:
|
log.debug(
|
||||||
|
f"Actor {subactor_uid} acquired stdin hijack lock"
|
||||||
|
)
|
||||||
|
|
||||||
# indicate to child that we've locked stdio
|
# wait for unlock pdb by child
|
||||||
await ctx.started('Locked')
|
async with ctx.open_stream() as stream:
|
||||||
log.debug(f"Actor {subactor_uid} acquired stdin hijack lock")
|
assert await stream.receive() == 'pdb_unlock'
|
||||||
|
|
||||||
# wait for unlock pdb by child
|
except (
|
||||||
async with ctx.open_stream() as stream:
|
BaseException,
|
||||||
assert await stream.receive() == 'pdb_unlock'
|
# trio.MultiError,
|
||||||
|
# Exception,
|
||||||
|
# trio.BrokenResourceError,
|
||||||
|
# trio.Cancelled, # by local cancellation
|
||||||
|
# trio.ClosedResourceError, # by self._rx_chan
|
||||||
|
# ContextCancelled,
|
||||||
|
# ConnectionResetError,
|
||||||
|
):
|
||||||
|
# XXX: there may be a race with the portal teardown
|
||||||
|
# with the calling actor which we can safely ignore.
|
||||||
|
# The alternative would be sending an ack message
|
||||||
|
# and allowing the client to wait for us to teardown
|
||||||
|
# first?
|
||||||
|
if lock and lock.locked():
|
||||||
|
lock.release()
|
||||||
|
|
||||||
# try:
|
# if isinstance(err, trio.Cancelled):
|
||||||
# assert await stream.receive() == 'pdb_unlock'
|
|
||||||
|
|
||||||
except (
|
|
||||||
# BaseException,
|
|
||||||
trio.MultiError,
|
|
||||||
trio.BrokenResourceError,
|
|
||||||
trio.Cancelled, # by local cancellation
|
|
||||||
trio.ClosedResourceError, # by self._rx_chan
|
|
||||||
) as err:
|
|
||||||
# XXX: there may be a race with the portal teardown
|
|
||||||
# with the calling actor which we can safely ignore.
|
|
||||||
# The alternative would be sending an ack message
|
|
||||||
# and allowing the client to wait for us to teardown
|
|
||||||
# first?
|
|
||||||
if lock and lock.locked():
|
|
||||||
lock.release()
|
|
||||||
|
|
||||||
if isinstance(err, trio.Cancelled):
|
|
||||||
raise
|
raise
|
||||||
finally:
|
|
||||||
log.debug(
|
|
||||||
"TTY lock released, remote task:"
|
|
||||||
f"{task_name}:{subactor_uid}")
|
|
||||||
|
|
||||||
return "pdb_unlock_complete"
|
finally:
|
||||||
|
log.runtime(
|
||||||
|
"TTY lock released, remote task:"
|
||||||
|
f"{task_name}:{subactor_uid}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return "pdb_unlock_complete"
|
||||||
|
|
||||||
|
finally:
|
||||||
|
signal.signal(
|
||||||
|
signal.SIGINT,
|
||||||
|
orig_handler
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def wait_for_parent_stdin_hijack(
|
async def wait_for_parent_stdin_hijack(
|
||||||
|
@ -338,20 +351,22 @@ async def wait_for_parent_stdin_hijack(
|
||||||
|
|
||||||
async with ctx.open_stream() as stream:
|
async with ctx.open_stream() as stream:
|
||||||
# unblock local caller
|
# unblock local caller
|
||||||
task_status.started(cs)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
assert _local_pdb_complete
|
assert _local_pdb_complete
|
||||||
|
task_status.started(cs)
|
||||||
await _local_pdb_complete.wait()
|
await _local_pdb_complete.wait()
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# TODO: shielding currently can cause hangs...
|
# TODO: shielding currently can cause hangs...
|
||||||
with trio.CancelScope(shield=True):
|
# with trio.CancelScope(shield=True):
|
||||||
await stream.send('pdb_unlock')
|
await stream.send('pdb_unlock')
|
||||||
|
|
||||||
# sync with callee termination
|
# sync with callee termination
|
||||||
assert await ctx.result() == "pdb_unlock_complete"
|
assert await ctx.result() == "pdb_unlock_complete"
|
||||||
|
|
||||||
|
log.pdb('unlocked context')
|
||||||
|
|
||||||
except tractor.ContextCancelled:
|
except tractor.ContextCancelled:
|
||||||
log.warning('Root actor cancelled debug lock')
|
log.warning('Root actor cancelled debug lock')
|
||||||
|
|
||||||
|
@ -362,6 +377,28 @@ async def wait_for_parent_stdin_hijack(
|
||||||
log.debug(f"Child {actor_uid} released parent stdio lock")
|
log.debug(f"Child {actor_uid} released parent stdio lock")
|
||||||
|
|
||||||
|
|
||||||
|
def mk_mpdb() -> tuple[MultiActorPdb, Callable]:
|
||||||
|
|
||||||
|
pdb = MultiActorPdb()
|
||||||
|
signal.signal = pdbpp.hideframe(signal.signal)
|
||||||
|
orig_handler = signal.signal(
|
||||||
|
signal.SIGINT,
|
||||||
|
partial(shield_sigint, pdb_obj=pdb),
|
||||||
|
)
|
||||||
|
pdb.allow_kbdint = True
|
||||||
|
pdb.nosigint = True
|
||||||
|
|
||||||
|
# TODO: add this as method on our pdb obj?
|
||||||
|
def undo_sigint():
|
||||||
|
# restore original sigint handler
|
||||||
|
signal.signal(
|
||||||
|
signal.SIGINT,
|
||||||
|
orig_handler
|
||||||
|
)
|
||||||
|
|
||||||
|
return pdb, undo_sigint
|
||||||
|
|
||||||
|
|
||||||
async def _breakpoint(
|
async def _breakpoint(
|
||||||
|
|
||||||
debug_func,
|
debug_func,
|
||||||
|
@ -370,23 +407,26 @@ async def _breakpoint(
|
||||||
# shield: bool = False
|
# shield: bool = False
|
||||||
|
|
||||||
) -> None:
|
) -> None:
|
||||||
'''``tractor`` breakpoint entry for engaging pdb machinery
|
'''
|
||||||
in the root or a subactor.
|
breakpoint entry for engaging pdb machinery in the root or
|
||||||
|
a subactor.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
# TODO: is it possible to debug a trio.Cancelled except block?
|
__tracebackhide__ = True
|
||||||
# right now it seems like we can kinda do with by shielding
|
|
||||||
# around ``tractor.breakpoint()`` but not if we move the shielded
|
|
||||||
# scope here???
|
|
||||||
# with trio.CancelScope(shield=shield):
|
|
||||||
|
|
||||||
|
pdb, undo_sigint = mk_mpdb()
|
||||||
actor = tractor.current_actor()
|
actor = tractor.current_actor()
|
||||||
task_name = trio.lowlevel.current_task().name
|
task_name = trio.lowlevel.current_task().name
|
||||||
|
|
||||||
global _local_pdb_complete, _pdb_release_hook
|
global _local_pdb_complete, _pdb_release_hook
|
||||||
global _local_task_in_debug, _global_actor_in_debug
|
global _local_task_in_debug, _global_actor_in_debug
|
||||||
|
|
||||||
await trio.lowlevel.checkpoint()
|
# TODO: is it possible to debug a trio.Cancelled except block?
|
||||||
|
# right now it seems like we can kinda do with by shielding
|
||||||
|
# around ``tractor.breakpoint()`` but not if we move the shielded
|
||||||
|
# scope here???
|
||||||
|
# with trio.CancelScope(shield=shield):
|
||||||
|
# await trio.lowlevel.checkpoint()
|
||||||
|
|
||||||
if not _local_pdb_complete or _local_pdb_complete.is_set():
|
if not _local_pdb_complete or _local_pdb_complete.is_set():
|
||||||
_local_pdb_complete = trio.Event()
|
_local_pdb_complete = trio.Event()
|
||||||
|
@ -412,8 +452,16 @@ async def _breakpoint(
|
||||||
# entries/requests to the root process
|
# entries/requests to the root process
|
||||||
_local_task_in_debug = task_name
|
_local_task_in_debug = task_name
|
||||||
|
|
||||||
|
def child_release_hook():
|
||||||
|
# _local_task_in_debug = None
|
||||||
|
_local_pdb_complete.set()
|
||||||
|
|
||||||
|
# restore original sigint handler
|
||||||
|
undo_sigint()
|
||||||
|
|
||||||
# assign unlock callback for debugger teardown hooks
|
# assign unlock callback for debugger teardown hooks
|
||||||
_pdb_release_hook = _local_pdb_complete.set
|
# _pdb_release_hook = _local_pdb_complete.set
|
||||||
|
_pdb_release_hook = child_release_hook
|
||||||
|
|
||||||
# this **must** be awaited by the caller and is done using the
|
# this **must** be awaited by the caller and is done using the
|
||||||
# root nursery so that the debugger can continue to run without
|
# root nursery so that the debugger can continue to run without
|
||||||
|
@ -464,59 +512,174 @@ async def _breakpoint(
|
||||||
global _local_pdb_complete, _debug_lock
|
global _local_pdb_complete, _debug_lock
|
||||||
global _global_actor_in_debug, _local_task_in_debug
|
global _global_actor_in_debug, _local_task_in_debug
|
||||||
|
|
||||||
_debug_lock.release()
|
try:
|
||||||
|
_debug_lock.release()
|
||||||
|
except RuntimeError:
|
||||||
|
# uhhh makes no sense but been seeing the non-owner
|
||||||
|
# release error even though this is definitely the task
|
||||||
|
# that locked?
|
||||||
|
owner = _debug_lock.statistics().owner
|
||||||
|
if owner:
|
||||||
|
raise
|
||||||
|
|
||||||
_global_actor_in_debug = None
|
_global_actor_in_debug = None
|
||||||
_local_task_in_debug = None
|
_local_task_in_debug = None
|
||||||
_local_pdb_complete.set()
|
_local_pdb_complete.set()
|
||||||
|
|
||||||
|
# restore original sigint handler
|
||||||
|
undo_sigint()
|
||||||
|
|
||||||
_pdb_release_hook = teardown
|
_pdb_release_hook = teardown
|
||||||
|
|
||||||
# block here one (at the appropriate frame *up*) where
|
# frame = sys._getframe()
|
||||||
# ``breakpoint()`` was awaited and begin handling stdio.
|
# last_f = frame.f_back
|
||||||
log.debug("Entering the synchronous world of pdb")
|
# last_f.f_globals['__tracebackhide__'] = True
|
||||||
debug_func(actor)
|
|
||||||
|
try:
|
||||||
|
# block here one (at the appropriate frame *up*) where
|
||||||
|
# ``breakpoint()`` was awaited and begin handling stdio.
|
||||||
|
log.debug("Entering the synchronous world of pdb")
|
||||||
|
debug_func(actor, pdb)
|
||||||
|
|
||||||
|
except bdb.BdbQuit:
|
||||||
|
if _pdb_release_hook:
|
||||||
|
_pdb_release_hook()
|
||||||
|
raise
|
||||||
|
|
||||||
|
# XXX: apparently we can't do this without showing this frame
|
||||||
|
# in the backtrace on first entry to the REPL? Seems like an odd
|
||||||
|
# behaviour that should have been fixed by now. This is also why
|
||||||
|
# we scrapped all the @cm approaches that were tried previously.
|
||||||
|
|
||||||
|
# finally:
|
||||||
|
# __tracebackhide__ = True
|
||||||
|
# # frame = sys._getframe()
|
||||||
|
# # last_f = frame.f_back
|
||||||
|
# # last_f.f_globals['__tracebackhide__'] = True
|
||||||
|
# # signal.signal = pdbpp.hideframe(signal.signal)
|
||||||
|
# signal.signal(
|
||||||
|
# signal.SIGINT,
|
||||||
|
# orig_handler
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
def _mk_pdb() -> PdbwTeardown:
|
def shield_sigint(
|
||||||
|
signum: int,
|
||||||
|
frame: 'frame', # type: ignore # noqa
|
||||||
|
pdb_obj: Optional[MultiActorPdb] = None,
|
||||||
|
*args,
|
||||||
|
|
||||||
# XXX: setting these flags on the pdb instance are absolutely
|
) -> None:
|
||||||
# critical to having ctrl-c work in the ``trio`` standard way! The
|
'''
|
||||||
# stdlib's pdb supports entering the current sync frame on a SIGINT,
|
Specialized debugger compatible SIGINT handler.
|
||||||
# with ``trio`` we pretty much never want this and if we did we can
|
|
||||||
# handle it in the ``tractor`` task runtime.
|
|
||||||
|
|
||||||
pdb = PdbwTeardown()
|
In childred we always ignore to avoid deadlocks since cancellation
|
||||||
pdb.allow_kbdint = True
|
should always be managed by the parent supervising actor. The root
|
||||||
pdb.nosigint = True
|
is always cancelled on ctrl-c.
|
||||||
|
|
||||||
return pdb
|
'''
|
||||||
|
__tracebackhide__ = True
|
||||||
|
|
||||||
|
global _local_task_in_debug, _global_actor_in_debug
|
||||||
|
in_debug = _global_actor_in_debug
|
||||||
|
|
||||||
|
actor = tractor.current_actor()
|
||||||
|
|
||||||
|
# root actor branch that reports whether or not a child
|
||||||
|
# has locked debugger.
|
||||||
|
if (
|
||||||
|
is_root_process()
|
||||||
|
and in_debug
|
||||||
|
):
|
||||||
|
name = in_debug[0]
|
||||||
|
if name != 'root':
|
||||||
|
log.pdb(
|
||||||
|
f"Ignoring SIGINT while child in debug mode: `{in_debug}`"
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
log.pdb(
|
||||||
|
"Ignoring SIGINT while in debug mode"
|
||||||
|
)
|
||||||
|
|
||||||
|
# child actor that has locked the debugger
|
||||||
|
elif (
|
||||||
|
not is_root_process()
|
||||||
|
):
|
||||||
|
task = _local_task_in_debug
|
||||||
|
if task:
|
||||||
|
log.pdb(
|
||||||
|
f"Ignoring SIGINT while task in debug mode: `{task}`"
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO: how to handle the case of an intermediary-child actor
|
||||||
|
# that **is not** marked in debug mode?
|
||||||
|
# elif debug_mode():
|
||||||
|
|
||||||
|
else:
|
||||||
|
log.pdb(
|
||||||
|
"Ignoring SIGINT since debug mode is enabled"
|
||||||
|
)
|
||||||
|
|
||||||
|
# noone has the debugger so raise KBI
|
||||||
|
else:
|
||||||
|
# If we haven't tried to cancel the runtime then do that instead
|
||||||
|
# of raising a KBI (which may non-gracefully destroy
|
||||||
|
# a ``trio.run()``).
|
||||||
|
if not actor._cancel_called:
|
||||||
|
actor.cancel_soon()
|
||||||
|
|
||||||
|
# If the runtime is already cancelled it likely means the user
|
||||||
|
# hit ctrl-c again because teardown didn't full take place in
|
||||||
|
# which case we do the "hard" raising of a local KBI.
|
||||||
|
else:
|
||||||
|
raise KeyboardInterrupt
|
||||||
|
|
||||||
|
# maybe redraw/print last REPL output to console
|
||||||
|
if pdb_obj:
|
||||||
|
|
||||||
|
# TODO: make this work like sticky mode where if there is output
|
||||||
|
# detected as written to the tty we redraw this part underneath
|
||||||
|
# and erase the past draw of this same bit above?
|
||||||
|
# pdb_obj.sticky = True
|
||||||
|
# pdb_obj._print_if_sticky()
|
||||||
|
|
||||||
|
# also see these links for an approach from ``ptk``:
|
||||||
|
# https://github.com/goodboy/tractor/issues/130#issuecomment-663752040
|
||||||
|
# https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py
|
||||||
|
|
||||||
|
pdb_obj.do_longlist(None)
|
||||||
|
print(pdb_obj.prompt, end='', flush=True)
|
||||||
|
|
||||||
|
|
||||||
def _set_trace(actor=None):
|
def _set_trace(
|
||||||
pdb = _mk_pdb()
|
actor: Optional[tractor._actor.Actor] = None,
|
||||||
|
pdb: Optional[MultiActorPdb] = None,
|
||||||
|
):
|
||||||
|
__tracebackhide__ = True
|
||||||
|
actor = actor or tractor.current_actor()
|
||||||
|
|
||||||
if actor is not None:
|
# XXX: on latest ``pdbpp`` i guess we don't need this?
|
||||||
|
# frame = sys._getframe()
|
||||||
|
# last_f = frame.f_back
|
||||||
|
# last_f.f_globals['__tracebackhide__'] = True
|
||||||
|
|
||||||
|
# start 2 levels up in user code
|
||||||
|
frame: FrameType = sys._getframe()
|
||||||
|
if frame:
|
||||||
|
frame = frame.f_back.f_back # type: ignore
|
||||||
|
|
||||||
|
if pdb and actor is not None:
|
||||||
log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")
|
log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")
|
||||||
|
|
||||||
pdb.set_trace(
|
|
||||||
# start 2 levels up in user code
|
|
||||||
frame=sys._getframe().f_back.f_back,
|
|
||||||
)
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# we entered the global ``breakpoint()`` built-in from sync code
|
pdb, undo_sigint = mk_mpdb()
|
||||||
|
|
||||||
|
# we entered the global ``breakpoint()`` built-in from sync code?
|
||||||
global _local_task_in_debug, _pdb_release_hook
|
global _local_task_in_debug, _pdb_release_hook
|
||||||
_local_task_in_debug = 'sync'
|
_local_task_in_debug = 'sync'
|
||||||
|
|
||||||
def nuttin():
|
pdb.set_trace(frame=frame)
|
||||||
pass
|
|
||||||
|
|
||||||
_pdb_release_hook = nuttin
|
|
||||||
|
|
||||||
pdb.set_trace(
|
|
||||||
# start 2 levels up in user code
|
|
||||||
frame=sys._getframe().f_back,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
breakpoint = partial(
|
breakpoint = partial(
|
||||||
|
@ -525,11 +688,40 @@ breakpoint = partial(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _post_mortem(actor):
|
def _post_mortem(
|
||||||
log.pdb(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")
|
actor: tractor._actor.Actor,
|
||||||
pdb = _mk_pdb()
|
pdb: MultiActorPdb,
|
||||||
|
|
||||||
|
) -> None:
|
||||||
|
'''
|
||||||
|
Enter the ``pdbpp`` port mortem entrypoint using our custom
|
||||||
|
debugger instance.
|
||||||
|
|
||||||
|
'''
|
||||||
|
log.pdb(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")
|
||||||
|
|
||||||
|
# XXX: on py3.10 if you don't have latest ``pdbpp`` installed.
|
||||||
|
# The exception looks something like:
|
||||||
|
# Traceback (most recent call last):
|
||||||
|
# File ".../tractor/_debug.py", line 729, in _post_mortem
|
||||||
|
# for _ in range(100):
|
||||||
|
# File "../site-packages/pdb.py", line 1227, in xpm
|
||||||
|
# post_mortem(info[2], Pdb)
|
||||||
|
# File "../site-packages/pdb.py", line 1175, in post_mortem
|
||||||
|
# p.interaction(None, t)
|
||||||
|
# File "../site-packages/pdb.py", line 216, in interaction
|
||||||
|
# ret = self.setup(frame, traceback)
|
||||||
|
# File "../site-packages/pdb.py", line 259, in setup
|
||||||
|
# ret = super(Pdb, self).setup(frame, tb)
|
||||||
|
# File "/usr/lib/python3.10/pdb.py", line 217, in setup
|
||||||
|
# self.curframe = self.stack[self.curindex][0]
|
||||||
|
# IndexError: list index out of range
|
||||||
|
|
||||||
|
# NOTE: you need ``pdbpp`` master (at least this commit
|
||||||
|
# https://github.com/pdbpp/pdbpp/commit/b757794857f98d53e3ebbe70879663d7d843a6c2)
|
||||||
|
# to fix this and avoid the hang it causes XD.
|
||||||
|
# see also: https://github.com/pdbpp/pdbpp/issues/480
|
||||||
|
|
||||||
# custom Pdb post-mortem entry
|
|
||||||
pdbpp.xpm(Pdb=lambda: pdb)
|
pdbpp.xpm(Pdb=lambda: pdb)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,8 @@ import importlib
|
||||||
import inspect
|
import inspect
|
||||||
from typing import (
|
from typing import (
|
||||||
Any, Optional,
|
Any, Optional,
|
||||||
Callable, AsyncGenerator
|
Callable, AsyncGenerator,
|
||||||
|
Type,
|
||||||
)
|
)
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
@ -442,6 +443,10 @@ class Portal:
|
||||||
_err: Optional[BaseException] = None
|
_err: Optional[BaseException] = None
|
||||||
ctx._portal = self
|
ctx._portal = self
|
||||||
|
|
||||||
|
uid = self.channel.uid
|
||||||
|
cid = ctx.cid
|
||||||
|
etype: Optional[Type[BaseException]] = None
|
||||||
|
|
||||||
# deliver context instance and .started() msg value in open tuple.
|
# deliver context instance and .started() msg value in open tuple.
|
||||||
try:
|
try:
|
||||||
async with trio.open_nursery() as scope_nursery:
|
async with trio.open_nursery() as scope_nursery:
|
||||||
|
@ -477,13 +482,24 @@ class Portal:
|
||||||
# KeyboardInterrupt,
|
# KeyboardInterrupt,
|
||||||
|
|
||||||
) as err:
|
) as err:
|
||||||
_err = err
|
etype = type(err)
|
||||||
# the context cancels itself on any cancel
|
# the context cancels itself on any cancel
|
||||||
# causing error.
|
# causing error.
|
||||||
log.cancel(
|
|
||||||
f'Context to {self.channel.uid} sending cancel request..')
|
|
||||||
|
|
||||||
await ctx.cancel()
|
if ctx.chan.connected():
|
||||||
|
log.cancel(
|
||||||
|
'Context cancelled for task, sending cancel request..\n'
|
||||||
|
f'task:{cid}\n'
|
||||||
|
f'actor:{uid}'
|
||||||
|
)
|
||||||
|
await ctx.cancel()
|
||||||
|
else:
|
||||||
|
log.warning(
|
||||||
|
'IPC connection for context is broken?\n'
|
||||||
|
f'task:{cid}\n'
|
||||||
|
f'actor:{uid}'
|
||||||
|
)
|
||||||
|
|
||||||
raise
|
raise
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
@ -492,7 +508,13 @@ class Portal:
|
||||||
# sure we get the error the underlying feeder mem chan.
|
# sure we get the error the underlying feeder mem chan.
|
||||||
# if it's not raised here it *should* be raised from the
|
# if it's not raised here it *should* be raised from the
|
||||||
# msg loop nursery right?
|
# msg loop nursery right?
|
||||||
result = await ctx.result()
|
if ctx.chan.connected():
|
||||||
|
log.info(
|
||||||
|
'Waiting on final context-task result for\n'
|
||||||
|
f'task:{cid}\n'
|
||||||
|
f'actor:{uid}'
|
||||||
|
)
|
||||||
|
result = await ctx.result()
|
||||||
|
|
||||||
# though it should be impossible for any tasks
|
# though it should be impossible for any tasks
|
||||||
# operating *in* this scope to have survived
|
# operating *in* this scope to have survived
|
||||||
|
@ -502,14 +524,17 @@ class Portal:
|
||||||
# should we encapsulate this in the context api?
|
# should we encapsulate this in the context api?
|
||||||
await ctx._recv_chan.aclose()
|
await ctx._recv_chan.aclose()
|
||||||
|
|
||||||
if _err:
|
if etype:
|
||||||
if ctx._cancel_called:
|
if ctx._cancel_called:
|
||||||
log.cancel(
|
log.cancel(
|
||||||
f'Context {fn_name} cancelled by caller with\n{_err}'
|
f'Context {fn_name} cancelled by caller with\n{etype}'
|
||||||
)
|
)
|
||||||
elif _err is not None:
|
elif _err is not None:
|
||||||
log.cancel(
|
log.cancel(
|
||||||
f'Context {fn_name} cancelled by callee with\n{_err}'
|
f'Context for task cancelled by callee with {etype}\n'
|
||||||
|
f'target: `{fn_name}`\n'
|
||||||
|
f'task:{cid}\n'
|
||||||
|
f'actor:{uid}'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
log.runtime(
|
log.runtime(
|
||||||
|
|
|
@ -295,7 +295,7 @@ async def new_proc(
|
||||||
# the OS; it otherwise can be passed via the parent channel if
|
# the OS; it otherwise can be passed via the parent channel if
|
||||||
# we prefer in the future (for privacy).
|
# we prefer in the future (for privacy).
|
||||||
"--uid",
|
"--uid",
|
||||||
str(subactor.uid),
|
str(uid),
|
||||||
# Address the child must connect to on startup
|
# Address the child must connect to on startup
|
||||||
"--parent_addr",
|
"--parent_addr",
|
||||||
str(parent_addr)
|
str(parent_addr)
|
||||||
|
@ -321,8 +321,7 @@ async def new_proc(
|
||||||
# wait for actor to spawn and connect back to us
|
# wait for actor to spawn and connect back to us
|
||||||
# channel should have handshake completed by the
|
# channel should have handshake completed by the
|
||||||
# local actor by the time we get a ref to it
|
# local actor by the time we get a ref to it
|
||||||
event, chan = await actor_nursery._actor.wait_for_peer(
|
event, chan = await actor_nursery._actor.wait_for_peer(uid)
|
||||||
subactor.uid)
|
|
||||||
|
|
||||||
except trio.Cancelled:
|
except trio.Cancelled:
|
||||||
cancelled_during_spawn = True
|
cancelled_during_spawn = True
|
||||||
|
@ -363,10 +362,54 @@ async def new_proc(
|
||||||
task_status.started(portal)
|
task_status.started(portal)
|
||||||
|
|
||||||
# wait for ActorNursery.wait() to be called
|
# wait for ActorNursery.wait() to be called
|
||||||
|
n_exited = actor_nursery._join_procs
|
||||||
with trio.CancelScope(shield=True):
|
with trio.CancelScope(shield=True):
|
||||||
await actor_nursery._join_procs.wait()
|
await n_exited.wait()
|
||||||
|
|
||||||
async with trio.open_nursery() as nursery:
|
async with trio.open_nursery() as nursery:
|
||||||
|
|
||||||
|
async def soft_wait_and_maybe_cancel_ria_task():
|
||||||
|
# This is a "soft" (cancellable) join/reap which
|
||||||
|
# will remote cancel the actor on a ``trio.Cancelled``
|
||||||
|
# condition.
|
||||||
|
await soft_wait(
|
||||||
|
proc,
|
||||||
|
trio.Process.wait,
|
||||||
|
portal
|
||||||
|
)
|
||||||
|
|
||||||
|
if n_exited.is_set():
|
||||||
|
# cancel result waiter that may have been spawned in
|
||||||
|
# tandem if not done already
|
||||||
|
log.warning(
|
||||||
|
"Cancelling existing result waiter task for "
|
||||||
|
f"{subactor.uid}"
|
||||||
|
)
|
||||||
|
nursery.cancel_scope.cancel()
|
||||||
|
|
||||||
|
else:
|
||||||
|
log.warning(
|
||||||
|
f'Process for actor {uid} terminated before'
|
||||||
|
'nursery exit. ' 'This may mean an IPC'
|
||||||
|
'connection failed!'
|
||||||
|
)
|
||||||
|
|
||||||
|
nursery.start_soon(soft_wait_and_maybe_cancel_ria_task)
|
||||||
|
|
||||||
|
# TODO: when we finally remove the `.run_in_actor()` api
|
||||||
|
# we should be able to entirely drop these 2 blocking calls:
|
||||||
|
# - we don't need to wait on nursery exit to capture
|
||||||
|
# process-spawn-machinery level errors (and propagate them).
|
||||||
|
# - we don't need to wait on final results from ria portals
|
||||||
|
# since this will be done in some higher level wrapper API.
|
||||||
|
|
||||||
|
# XXX: interestingly we can't put this here bc it causes
|
||||||
|
# the pub-sub tests to fail? wth.. should probably drop
|
||||||
|
# those XD
|
||||||
|
# wait for ActorNursery.wait() to be called
|
||||||
|
# with trio.CancelScope(shield=True):
|
||||||
|
# await n_exited.wait()
|
||||||
|
|
||||||
if portal in actor_nursery._cancel_after_result_on_exit:
|
if portal in actor_nursery._cancel_after_result_on_exit:
|
||||||
nursery.start_soon(
|
nursery.start_soon(
|
||||||
cancel_on_completion,
|
cancel_on_completion,
|
||||||
|
@ -375,22 +418,6 @@ async def new_proc(
|
||||||
errors
|
errors
|
||||||
)
|
)
|
||||||
|
|
||||||
# This is a "soft" (cancellable) join/reap which
|
|
||||||
# will remote cancel the actor on a ``trio.Cancelled``
|
|
||||||
# condition.
|
|
||||||
await soft_wait(
|
|
||||||
proc,
|
|
||||||
trio.Process.wait,
|
|
||||||
portal
|
|
||||||
)
|
|
||||||
|
|
||||||
# cancel result waiter that may have been spawned in
|
|
||||||
# tandem if not done already
|
|
||||||
log.warning(
|
|
||||||
"Cancelling existing result waiter task for "
|
|
||||||
f"{subactor.uid}")
|
|
||||||
nursery.cancel_scope.cancel()
|
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# The "hard" reap since no actor zombies are allowed!
|
# The "hard" reap since no actor zombies are allowed!
|
||||||
# XXX: do this **after** cancellation/tearfown to avoid
|
# XXX: do this **after** cancellation/tearfown to avoid
|
||||||
|
@ -407,8 +434,10 @@ async def new_proc(
|
||||||
await proc.wait()
|
await proc.wait()
|
||||||
|
|
||||||
if is_root_process():
|
if is_root_process():
|
||||||
|
|
||||||
await maybe_wait_for_debugger(
|
await maybe_wait_for_debugger(
|
||||||
child_in_debug=_runtime_vars.get('_debug_mode', False),
|
child_in_debug=_runtime_vars.get(
|
||||||
|
'_debug_mode', False)
|
||||||
)
|
)
|
||||||
|
|
||||||
if proc.poll() is None:
|
if proc.poll() is None:
|
||||||
|
|
|
@ -604,7 +604,8 @@ class Context:
|
||||||
self._portal._streams.remove(rchan)
|
self._portal._streams.remove(rchan)
|
||||||
|
|
||||||
async def result(self) -> Any:
|
async def result(self) -> Any:
|
||||||
'''From a caller side, wait for and return the final result from
|
'''
|
||||||
|
From a caller side, wait for and return the final result from
|
||||||
the callee side task.
|
the callee side task.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
Loading…
Reference in New Issue