forked from goodboy/tractor
Avoid `ctx.cancel()` after ctxc rxed in `.open_context()`
In the case where the callee side delivers us a ctxc with `.canceller` set we can presume that remote cancellation already has taken place and thus we don't need to do the normal call-`Context.cancel()`-on-error step. Further, in the case where we do call it also handle any `trio.CloseResourceError` gracefully with a `.warning()`. Also, originally I had added a post-`yield`-maybe-raise to attempt handling any remote ctxc the same as for the local case (i.e. raised from `yield` line) wherein if we get a remote ctxc the same handler branch-path would trigger, thus avoiding different behaviour in that case. I ended up masking it out (but can't member why.. ) as it seems the normal `.result()` call and its internal handling gets the same behaviour? I've left in the heavily commented code in case it ends up being the better way to go; likely making the move to having a single code in both cases is better even if it is just a matter of deciding whether to swallow the ctxc or not in the `.cancel_acked` case. Further teensie improvements: - obvi improve/simplify log msg contents as in prior patches. - use the new `maybe_wait_for_debugger(header_msg: str)` if/when waiting to exit in debug mode. - another `hide_tb: bool` frame hider flag. - rando type-annot updates of course :)modden_spawn_from_client_req
parent
299429a278
commit
ed10632d97
|
@ -27,8 +27,9 @@ from __future__ import annotations
|
|||
import importlib
|
||||
import inspect
|
||||
from typing import (
|
||||
Any, Optional,
|
||||
Callable, AsyncGenerator,
|
||||
Any,
|
||||
Callable,
|
||||
AsyncGenerator,
|
||||
Type,
|
||||
)
|
||||
from functools import partial
|
||||
|
@ -52,6 +53,7 @@ from ._ipc import Channel
|
|||
from .log import get_logger
|
||||
from .msg import NamespacePath
|
||||
from ._exceptions import (
|
||||
InternalError,
|
||||
_raise_from_no_key_in_msg,
|
||||
unpack_error,
|
||||
NoResult,
|
||||
|
@ -126,7 +128,7 @@ class Portal:
|
|||
def __init__(self, channel: Channel) -> None:
|
||||
self.chan = channel
|
||||
# during the portal's lifetime
|
||||
self._result_msg: Optional[dict] = None
|
||||
self._result_msg: dict|None = None
|
||||
|
||||
# When set to a ``Context`` (when _submit_for_result is called)
|
||||
# it is expected that ``result()`` will be awaited at some
|
||||
|
@ -171,7 +173,7 @@ class Portal:
|
|||
) -> dict[str, Any]:
|
||||
|
||||
assert ctx._remote_func_type == 'asyncfunc' # single response
|
||||
msg = await ctx._recv_chan.receive()
|
||||
msg: dict = await ctx._recv_chan.receive()
|
||||
return msg
|
||||
|
||||
async def result(self) -> Any:
|
||||
|
@ -255,11 +257,11 @@ class Portal:
|
|||
return False
|
||||
|
||||
reminfo: str = (
|
||||
f'uid: {self.channel.uid}\n'
|
||||
f'{self.channel.uid}\n'
|
||||
f' |_{chan}\n'
|
||||
)
|
||||
log.cancel(
|
||||
f'Sending actor cancel request to peer\n'
|
||||
f'Sending runtime `.cancel()` request to peer\n\n'
|
||||
f'{reminfo}'
|
||||
)
|
||||
|
||||
|
@ -281,7 +283,9 @@ class Portal:
|
|||
return True
|
||||
|
||||
if cs.cancelled_caught:
|
||||
log.cancel(
|
||||
# may timeout and we never get an ack (obvi racy)
|
||||
# but that doesn't mean it wasn't cancelled.
|
||||
log.debug(
|
||||
'May have failed to cancel peer?\n'
|
||||
f'{reminfo}'
|
||||
)
|
||||
|
@ -293,9 +297,11 @@ class Portal:
|
|||
trio.ClosedResourceError,
|
||||
trio.BrokenResourceError,
|
||||
):
|
||||
log.cancel(
|
||||
f"{self.channel} for {self.channel.uid} was already "
|
||||
"closed or broken?")
|
||||
log.debug(
|
||||
'IPC chan for actor already closed or broken?\n\n'
|
||||
f'{self.channel.uid}\n'
|
||||
f' |_{self.channel}\n'
|
||||
)
|
||||
return False
|
||||
|
||||
async def run_from_ns(
|
||||
|
@ -416,7 +422,8 @@ class Portal:
|
|||
try:
|
||||
# deliver receive only stream
|
||||
async with MsgStream(
|
||||
ctx, ctx._recv_chan,
|
||||
ctx=ctx,
|
||||
rx_chan=ctx._recv_chan,
|
||||
) as rchan:
|
||||
self._streams.add(rchan)
|
||||
yield rchan
|
||||
|
@ -443,6 +450,11 @@ class Portal:
|
|||
# await recv_chan.aclose()
|
||||
self._streams.remove(rchan)
|
||||
|
||||
# TODO: move this impl to `._context` mod and
|
||||
# instead just bind it here as a method so that the logic
|
||||
# for ctx stuff stays all in one place (instead of frickin
|
||||
# having to open this file in tandem every gd time!!! XD)
|
||||
#
|
||||
@asynccontextmanager
|
||||
async def open_context(
|
||||
|
||||
|
@ -451,6 +463,11 @@ class Portal:
|
|||
|
||||
allow_overruns: bool = False,
|
||||
|
||||
# TODO: if we set this the wrapping `@acm` body will
|
||||
# still be shown (awkwardly) on pdb REPL entry. Ideally
|
||||
# we can similarly annotate that frame to NOT show?
|
||||
hide_tb: bool = False,
|
||||
|
||||
# proxied to RPC
|
||||
**kwargs,
|
||||
|
||||
|
@ -484,6 +501,8 @@ class Portal:
|
|||
collection. See ``tractor.Context`` for more details.
|
||||
|
||||
'''
|
||||
__tracebackhide__: bool = hide_tb
|
||||
|
||||
# conduct target func method structural checks
|
||||
if not inspect.iscoroutinefunction(func) and (
|
||||
getattr(func, '_tractor_contex_function', False)
|
||||
|
@ -537,8 +556,11 @@ class Portal:
|
|||
# placeholder for any exception raised in the runtime
|
||||
# or by user tasks which cause this context's closure.
|
||||
scope_err: BaseException|None = None
|
||||
ctxc_from_callee: ContextCancelled|None = None
|
||||
try:
|
||||
async with trio.open_nursery() as nurse:
|
||||
|
||||
# NOTE: used to start overrun queuing tasks
|
||||
ctx._scope_nursery: trio.Nursery = nurse
|
||||
ctx._scope: trio.CancelScope = nurse.cancel_scope
|
||||
|
||||
|
@ -546,14 +568,26 @@ class Portal:
|
|||
# in enter tuple.
|
||||
yield ctx, first
|
||||
|
||||
# between the caller exiting and arriving here the
|
||||
# far end may have sent a ctxc-msg or other error,
|
||||
# so check for it here immediately and maybe raise
|
||||
# so as to engage the ctxc handling block below!
|
||||
# ??TODO??: do we still want to consider this or is
|
||||
# the `else:` block handling via a `.result()`
|
||||
# call below enough??
|
||||
# -[ ] pretty sure `.result()` internals do the
|
||||
# same as our ctxc handler below so it ended up
|
||||
# being same (repeated?) behaviour, but ideally we
|
||||
# wouldn't have that duplication either by somehow
|
||||
# factoring the `.result()` handler impl in a way
|
||||
# that we can re-use it around the `yield` ^ here
|
||||
# or vice versa?
|
||||
#
|
||||
# NOTE: between the caller exiting and arriving
|
||||
# here the far end may have sent a ctxc-msg or
|
||||
# other error, so check for it here immediately
|
||||
# and maybe raise so as to engage the ctxc
|
||||
# handling block below!
|
||||
#
|
||||
# if re := ctx._remote_error:
|
||||
# maybe_ctxc: ContextCancelled|None = ctx._maybe_raise_remote_err(
|
||||
# re,
|
||||
|
||||
# # TODO: do we want this to always raise?
|
||||
# # - means that on self-ctxc, if/when the
|
||||
# # block is exited before the msg arrives
|
||||
|
@ -571,7 +605,7 @@ class Portal:
|
|||
# # block?
|
||||
# raise_ctxc_from_self_call=True,
|
||||
# )
|
||||
# assert maybe_ctxc
|
||||
# ctxc_from_callee = maybe_ctxc
|
||||
|
||||
# when in allow_overruns mode there may be
|
||||
# lingering overflow sender tasks remaining?
|
||||
|
@ -583,13 +617,18 @@ class Portal:
|
|||
not ctx._allow_overruns
|
||||
or len(nurse.child_tasks) > 1
|
||||
):
|
||||
raise RuntimeError(
|
||||
raise InternalError(
|
||||
'Context has sub-tasks but is '
|
||||
'not in `allow_overruns=True` mode!?'
|
||||
)
|
||||
|
||||
# ensure cancel of all overflow sender tasks
|
||||
# started in the ctx nursery.
|
||||
# ensure we cancel all overflow sender
|
||||
# tasks started in the nursery when
|
||||
# `._allow_overruns == True`.
|
||||
#
|
||||
# NOTE: this means `._scope.cancelled_caught`
|
||||
# will prolly be set! not sure if that's
|
||||
# non-ideal or not ???
|
||||
ctx._scope.cancel()
|
||||
|
||||
# XXX NOTE XXX: maybe shield against
|
||||
|
@ -602,14 +641,15 @@ class Portal:
|
|||
# of a `Context`. In both cases any `ContextCancelled`
|
||||
# raised in this scope-block came from a transport msg
|
||||
# relayed from some remote-actor-task which our runtime set
|
||||
# as a `Context._remote_error`
|
||||
# as to `Context._remote_error`
|
||||
#
|
||||
# the CASES:
|
||||
#
|
||||
# - if that context IS THE SAME ONE that called
|
||||
# `Context.cancel()`, we want to absorb the error
|
||||
# silently and let this `.open_context()` block to exit
|
||||
# without raising.
|
||||
# without raising, ideally eventually receiving the ctxc
|
||||
# ack msg thus resulting in `ctx.cancel_acked == True`.
|
||||
#
|
||||
# - if it is from some OTHER context (we did NOT call
|
||||
# `.cancel()`), we want to re-RAISE IT whilst also
|
||||
|
@ -633,6 +673,7 @@ class Portal:
|
|||
# `Nursery.cancel_scope.cancel()`)
|
||||
except ContextCancelled as ctxc:
|
||||
scope_err = ctxc
|
||||
ctxc_from_callee = ctxc
|
||||
|
||||
# XXX TODO XXX: FIX THIS debug_mode BUGGGG!!!
|
||||
# using this code and then resuming the REPL will
|
||||
|
@ -642,6 +683,7 @@ class Portal:
|
|||
# documenting it as a definittive example of
|
||||
# debugging the tractor-runtime itself using it's
|
||||
# own `.devx.` tooling!
|
||||
#
|
||||
# await pause()
|
||||
|
||||
# CASE 2: context was cancelled by local task calling
|
||||
|
@ -649,15 +691,10 @@ class Portal:
|
|||
# exit silently.
|
||||
if (
|
||||
ctx._cancel_called
|
||||
and (
|
||||
and
|
||||
ctxc is ctx._remote_error
|
||||
# ctxc.msgdata == ctx._remote_error.msgdata
|
||||
|
||||
# TODO: uhh `Portal.canceller` ain't a thangg
|
||||
# dawg? (was `self.canceller` before?!?)
|
||||
and
|
||||
ctxc.canceller == self.actor.uid
|
||||
)
|
||||
):
|
||||
log.cancel(
|
||||
f'Context (cid=[{ctx.cid[-6:]}..] cancelled gracefully with:\n'
|
||||
|
@ -665,9 +702,9 @@ class Portal:
|
|||
)
|
||||
# CASE 1: this context was never cancelled via a local
|
||||
# task (tree) having called `Context.cancel()`, raise
|
||||
# the error since it was caused by someone else!
|
||||
# the error since it was caused by someone else
|
||||
# -> probably a remote peer!
|
||||
else:
|
||||
# await pause()
|
||||
raise
|
||||
|
||||
# the above `._scope` can be cancelled due to:
|
||||
|
@ -680,19 +717,29 @@ class Portal:
|
|||
# CASE 3: standard local error in this caller/yieldee
|
||||
Exception,
|
||||
|
||||
# CASES 1 & 2: normally manifested as
|
||||
# a `Context._scope_nursery` raised
|
||||
# CASES 1 & 2: can manifest as a `ctx._scope_nursery`
|
||||
# exception-group of,
|
||||
#
|
||||
# 1.-`trio.Cancelled`s, since
|
||||
# `._scope.cancel()` will have been called and any
|
||||
# `ContextCancelled` absorbed and thus NOT RAISED in
|
||||
# any `Context._maybe_raise_remote_err()`,
|
||||
# `._scope.cancel()` will have been called
|
||||
# (transitively by the runtime calling
|
||||
# `._deliver_msg()`) and any `ContextCancelled`
|
||||
# eventually absorbed and thus absorbed/supressed in
|
||||
# any `Context._maybe_raise_remote_err()` call.
|
||||
#
|
||||
# 2.-`BaseExceptionGroup[ContextCancelled | RemoteActorError]`
|
||||
# from any error raised in the "callee" side with
|
||||
# a group only raised if there was any more then one
|
||||
# task started here in the "caller" in the
|
||||
# `yield`-ed to task.
|
||||
BaseExceptionGroup, # since overrun handler tasks may have been spawned
|
||||
# from any error delivered from the "callee" side
|
||||
# AND a group-exc is only raised if there was > 1
|
||||
# tasks started *here* in the "caller" / opener
|
||||
# block. If any one of those tasks calls
|
||||
# `.result()` or `MsgStream.receive()`
|
||||
# `._maybe_raise_remote_err()` will be transitively
|
||||
# called and the remote error raised causing all
|
||||
# tasks to be cancelled.
|
||||
# NOTE: ^ this case always can happen if any
|
||||
# overrun handler tasks were spawned!
|
||||
BaseExceptionGroup,
|
||||
|
||||
trio.Cancelled, # NOTE: NOT from inside the ctx._scope
|
||||
KeyboardInterrupt,
|
||||
|
||||
|
@ -702,32 +749,38 @@ class Portal:
|
|||
# XXX: ALWAYS request the context to CANCEL ON any ERROR.
|
||||
# NOTE: `Context.cancel()` is conversely NEVER CALLED in
|
||||
# the `ContextCancelled` "self cancellation absorbed" case
|
||||
# handled in the block above!
|
||||
# handled in the block above ^^^ !!
|
||||
log.cancel(
|
||||
'Context cancelled for task due to\n'
|
||||
'Context terminated due to\n\n'
|
||||
f'{caller_err}\n'
|
||||
'Sending cancel request..\n'
|
||||
f'task:{cid}\n'
|
||||
f'actor:{uid}'
|
||||
)
|
||||
|
||||
if debug_mode():
|
||||
log.pdb(
|
||||
'Delaying `ctx.cancel()` until debug lock '
|
||||
'acquired..'
|
||||
)
|
||||
# async with acquire_debug_lock(self.actor.uid):
|
||||
# pass
|
||||
# TODO: factor ^ into below for non-root cases?
|
||||
await maybe_wait_for_debugger()
|
||||
was_acquired: bool = await maybe_wait_for_debugger(
|
||||
header_msg=(
|
||||
'Delaying `ctx.cancel()` until debug lock '
|
||||
'acquired..\n'
|
||||
),
|
||||
)
|
||||
if was_acquired:
|
||||
log.pdb(
|
||||
'Acquired debug lock! '
|
||||
'Calling `ctx.cancel()`!'
|
||||
'Calling `ctx.cancel()`!\n'
|
||||
)
|
||||
|
||||
|
||||
# we don't need to cancel the callee if it already
|
||||
# told us it's cancelled ;p
|
||||
if ctxc_from_callee is None:
|
||||
try:
|
||||
await ctx.cancel()
|
||||
except trio.BrokenResourceError:
|
||||
except (
|
||||
trio.BrokenResourceError,
|
||||
trio.ClosedResourceError,
|
||||
):
|
||||
log.warning(
|
||||
'IPC connection for context is broken?\n'
|
||||
f'task:{cid}\n'
|
||||
|
@ -738,33 +791,6 @@ class Portal:
|
|||
|
||||
# no local scope error, the "clean exit with a result" case.
|
||||
else:
|
||||
# between the caller exiting and arriving here the
|
||||
# far end may have sent a ctxc-msg or other error,
|
||||
# so check for it here immediately and maybe raise
|
||||
# so as to engage the ctxc handling block below!
|
||||
# if re := ctx._remote_error:
|
||||
# maybe_ctxc: ContextCancelled|None = ctx._maybe_raise_remote_err(
|
||||
# re,
|
||||
|
||||
# # TODO: do we want this to always raise?
|
||||
# # - means that on self-ctxc, if/when the
|
||||
# # block is exited before the msg arrives
|
||||
# # but then the msg during __exit__
|
||||
# # calling we may not activate the
|
||||
# # ctxc-handler block below? should we
|
||||
# # be?
|
||||
# # - if there's a remote error that arrives
|
||||
# # after the child has exited, we won't
|
||||
# # handle until the `finally:` block
|
||||
# # where `.result()` is always called,
|
||||
# # again in which case we handle it
|
||||
# # differently then in the handler block
|
||||
# # that would normally engage from THIS
|
||||
# # block?
|
||||
# raise_ctxc_from_self_call=True,
|
||||
# )
|
||||
# assert maybe_ctxc
|
||||
|
||||
if ctx.chan.connected():
|
||||
log.runtime(
|
||||
'Waiting on final context result for\n'
|
||||
|
@ -794,16 +820,18 @@ class Portal:
|
|||
scope_err = berr
|
||||
raise
|
||||
|
||||
# yes! this worx Bp
|
||||
# from .devx import _debug
|
||||
# await _debug.pause()
|
||||
|
||||
# an exception type boxed in a `RemoteActorError`
|
||||
# is returned (meaning it was obvi not raised).
|
||||
# is returned (meaning it was obvi not raised)
|
||||
# that we want to log-report on.
|
||||
msgdata: str|None = getattr(
|
||||
result_or_err,
|
||||
'msgdata',
|
||||
None
|
||||
)
|
||||
# yes! this worx Bp
|
||||
# from .devx import _debug
|
||||
# await _debug.pause()
|
||||
match (msgdata, result_or_err):
|
||||
case (
|
||||
{'tb_str': tbstr},
|
||||
|
@ -846,9 +874,8 @@ class Portal:
|
|||
# operating *in* this scope to have survived
|
||||
# we tear down the runtime feeder chan last
|
||||
# to avoid premature stream clobbers.
|
||||
rxchan: trio.ReceiveChannel = ctx._recv_chan
|
||||
if (
|
||||
rxchan
|
||||
(rxchan := ctx._recv_chan)
|
||||
|
||||
# maybe TODO: yes i know the below check is
|
||||
# touching `trio` memchan internals..BUT, there are
|
||||
|
@ -904,22 +931,27 @@ class Portal:
|
|||
etype: Type[BaseException] = type(scope_err)
|
||||
|
||||
# CASE 2
|
||||
if ctx._cancel_called:
|
||||
if (
|
||||
ctx._cancel_called
|
||||
and ctx.cancel_acked
|
||||
):
|
||||
log.cancel(
|
||||
'Context cancelled by caller task\n'
|
||||
f'|_{ctx._task}\n\n'
|
||||
|
||||
f'{etype}'
|
||||
f'{repr(scope_err)}\n'
|
||||
)
|
||||
|
||||
# TODO: should we add a `._cancel_req_received`
|
||||
# flag to determine if the callee manually called
|
||||
# `ctx.cancel()`?
|
||||
# -[ ] going to need a cid check no?
|
||||
|
||||
# CASE 1
|
||||
else:
|
||||
log.cancel(
|
||||
f'Context cancelled by remote callee task\n'
|
||||
f'peer: {uid}\n'
|
||||
f'|_ {nsf}()\n\n'
|
||||
|
||||
f'{etype}\n'
|
||||
f'Context terminated due to local scope error:\n'
|
||||
f'{etype.__name__}\n'
|
||||
)
|
||||
|
||||
# FINALLY, remove the context from runtime tracking and
|
||||
|
@ -967,7 +999,7 @@ class LocalPortal:
|
|||
async def open_portal(
|
||||
|
||||
channel: Channel,
|
||||
nursery: Optional[trio.Nursery] = None,
|
||||
nursery: trio.Nursery|None = None,
|
||||
start_msg_loop: bool = True,
|
||||
shield: bool = False,
|
||||
|
||||
|
@ -992,7 +1024,7 @@ async def open_portal(
|
|||
if channel.uid is None:
|
||||
await actor._do_handshake(channel)
|
||||
|
||||
msg_loop_cs: Optional[trio.CancelScope] = None
|
||||
msg_loop_cs: trio.CancelScope|None = None
|
||||
if start_msg_loop:
|
||||
from ._runtime import process_messages
|
||||
msg_loop_cs = await nursery.start(
|
||||
|
|
Loading…
Reference in New Issue