Use `._entry` proto-ed "lifetime ops" in logging

As per a WIP scribbled out TODO in `._entry.nest_from_op()`, change
a bunch of "supervisor/lifetime mgmt ops" related log messages to
contain some supervisor-annotation "headers" in an effort to give
a terser "visual indication" of how some execution/scope/storage
primitive entity (like an actor/task/ctx/connection) is being operated
on (like, opening/started/closed/cancelled/erroring) from a "supervisor
action" POV.

Also tweak a bunch more emissions to lower levels to reduce noise around
normal inter-actor operations like process and IPC ctx supervision.
multihost_exs
Tyler Goodlet 2024-07-02 16:31:58 -04:00
parent 02812b9f51
commit b46400a86f
4 changed files with 62 additions and 33 deletions

View File

@ -933,11 +933,12 @@ class Context:
self.cancel_called = True self.cancel_called = True
header: str = ( header: str = (
f'Cancelling ctx with peer from {side.upper()} side\n\n' f'Cancelling ctx from {side.upper()}-side\n'
) )
reminfo: str = ( reminfo: str = (
# ' =>\n' # ' =>\n'
f'Context.cancel() => {self.chan.uid}\n' # f'Context.cancel() => {self.chan.uid}\n'
f'c)=> {self.chan.uid}\n'
# f'{self.chan.uid}\n' # f'{self.chan.uid}\n'
f' |_ @{self.dst_maddr}\n' f' |_ @{self.dst_maddr}\n'
f' >> {self.repr_rpc}\n' f' >> {self.repr_rpc}\n'
@ -1267,6 +1268,12 @@ class Context:
@property @property
def maybe_error(self) -> BaseException|None: def maybe_error(self) -> BaseException|None:
'''
Return the (remote) error as outcome or `None`.
Remote errors take precedence over local ones.
'''
le: BaseException|None = self._local_error le: BaseException|None = self._local_error
re: RemoteActorError|ContextCancelled|None = self._remote_error re: RemoteActorError|ContextCancelled|None = self._remote_error
@ -2182,9 +2189,16 @@ async def open_context_from_portal(
# handled in the block above ^^^ !! # handled in the block above ^^^ !!
# await _debug.pause() # await _debug.pause()
# log.cancel( # log.cancel(
log.exception( match scope_err:
f'{ctx.side}-side of `Context` terminated with ' case trio.Cancelled:
f'.outcome => {ctx.repr_outcome()}\n' logmeth = log.cancel
# XXX explicitly report on any non-graceful-taskc cases
case _:
logmeth = log.exception
logmeth(
f'ctx {ctx.side!r}-side exited with {ctx.repr_outcome()}\n'
) )
if debug_mode(): if debug_mode():

View File

@ -265,7 +265,7 @@ def _trio_main(
except BaseException as err: except BaseException as err:
logmeth = log.error logmeth = log.error
exit_status: str = ( exit_status: str = (
'Main actor task crashed during exit?\n' 'Main actor task exited due to crash?\n'
+ +
nest_from_op( nest_from_op(
input_op='x)>', # closed by error input_op='x)>', # closed by error

View File

@ -66,10 +66,11 @@ from trio import (
) )
from tractor.msg import ( from tractor.msg import (
pretty_struct,
NamespacePath,
types as msgtypes,
MsgType, MsgType,
NamespacePath,
Stop,
pretty_struct,
types as msgtypes,
) )
from ._ipc import Channel from ._ipc import Channel
from ._context import ( from ._context import (
@ -545,7 +546,8 @@ class Actor:
): ):
log.cancel( log.cancel(
'Waiting on cancel request to peer\n' 'Waiting on cancel request to peer\n'
f'`Portal.cancel_actor()` => {chan.uid}\n' f'c)=>\n'
f' |_{chan.uid}\n'
) )
# XXX: this is a soft wait on the channel (and its # XXX: this is a soft wait on the channel (and its
@ -642,12 +644,14 @@ class Actor:
# and # and
an_exit_cs.cancelled_caught an_exit_cs.cancelled_caught
): ):
log.warning( report: str = (
'Timed out waiting on local actor-nursery to exit?\n' 'Timed out waiting on local actor-nursery to exit?\n'
f'{local_nursery}\n' f'{local_nursery}\n'
f' |_{pformat(local_nursery._children)}\n'
) )
# await _debug.pause() if children := local_nursery._children:
report += f' |_{pformat(children)}\n'
log.warning(report)
if disconnected: if disconnected:
# if the transport died and this actor is still # if the transport died and this actor is still
@ -819,14 +823,17 @@ class Actor:
# side, # side,
)] )]
except KeyError: except KeyError:
log.warning( report: str = (
'Ignoring invalid IPC ctx msg!\n\n' 'Ignoring invalid IPC ctx msg!\n\n'
f'<= sender: {uid}\n\n' f'<=? {uid}\n\n'
# XXX don't need right since it's always in msg? f' |_{pretty_struct.pformat(msg)}\n'
# f'=> cid: {cid}\n\n'
f'{pretty_struct.pformat(msg)}\n'
) )
match msg:
case Stop():
log.runtime(report)
case _:
log.warning(report)
return return
# if isinstance(msg, MsgTypeError): # if isinstance(msg, MsgTypeError):
@ -1338,10 +1345,11 @@ class Actor:
return True return True
log.cancel( log.cancel(
'Cancel request for RPC task\n\n' 'Rxed cancel request for RPC task\n'
f'<= Actor._cancel_task(): {requesting_uid}\n\n' f'<=c) {requesting_uid}\n'
f'=> {ctx._task}\n' f' |_{ctx._task}\n'
f' |_ >> {ctx.repr_rpc}\n' f' >> {ctx.repr_rpc}\n'
# f'=> {ctx._task}\n'
# f' >> Actor._cancel_task() => {ctx._task}\n' # f' >> Actor._cancel_task() => {ctx._task}\n'
# f' |_ {ctx._task}\n\n' # f' |_ {ctx._task}\n\n'

View File

@ -246,7 +246,8 @@ async def hard_kill(
''' '''
log.cancel( log.cancel(
'Terminating sub-proc:\n' 'Terminating sub-proc\n'
f'>x)\n'
f' |_{proc}\n' f' |_{proc}\n'
) )
# NOTE: this timeout used to do nothing since we were shielding # NOTE: this timeout used to do nothing since we were shielding
@ -293,7 +294,7 @@ async def hard_kill(
log.critical( log.critical(
# 'Well, the #ZOMBIE_LORD_IS_HERE# to collect\n' # 'Well, the #ZOMBIE_LORD_IS_HERE# to collect\n'
'#T-800 deployed to collect zombie B0\n' '#T-800 deployed to collect zombie B0\n'
f'|\n' f'>x)\n'
f' |_{proc}\n' f' |_{proc}\n'
) )
proc.kill() proc.kill()
@ -322,7 +323,8 @@ async def soft_kill(
uid: tuple[str, str] = portal.channel.uid uid: tuple[str, str] = portal.channel.uid
try: try:
log.cancel( log.cancel(
'Soft killing sub-actor via `Portal.cancel_actor()`\n' 'Soft killing sub-actor via portal request\n'
f'c)> {portal.chan.uid}\n'
f' |_{proc}\n' f' |_{proc}\n'
) )
# wait on sub-proc to signal termination # wait on sub-proc to signal termination
@ -552,8 +554,9 @@ async def trio_proc(
# cancel result waiter that may have been spawned in # cancel result waiter that may have been spawned in
# tandem if not done already # tandem if not done already
log.cancel( log.cancel(
'Cancelling existing result waiter task for ' 'Cancelling portal result reaper task\n'
f'{subactor.uid}' f'>c)\n'
f' |_{subactor.uid}\n'
) )
nursery.cancel_scope.cancel() nursery.cancel_scope.cancel()
@ -562,7 +565,11 @@ async def trio_proc(
# allowed! Do this **after** cancellation/teardown to avoid # allowed! Do this **after** cancellation/teardown to avoid
# killing the process too early. # killing the process too early.
if proc: if proc:
log.cancel(f'Hard reap sequence starting for {subactor.uid}') log.cancel(
f'Hard reap sequence starting for subactor\n'
f'>x)\n'
f' |_{subactor}@{subactor.uid}\n'
)
with trio.CancelScope(shield=True): with trio.CancelScope(shield=True):
# don't clobber an ongoing pdb # don't clobber an ongoing pdb