Use `._entry` proto-ed "lifetime ops" in logging

As per a WIP scribbled out TODO in `._entry.nest_from_op()`, change
a bunch of "supervisor/lifetime mgmt ops" related log messages to
contain some supervisor-annotation "headers" in an effort to give
a terser "visual indication" of how some execution/scope/storage
primitive entity (like an actor/task/ctx/connection) is being operated
on (like, opening/started/closed/cancelled/erroring) from a "supervisor
action" POV.

Also tweak a bunch more emissions to lower levels to reduce noise around
normal inter-actor operations like process and IPC ctx supervision.
multihost_exs
Tyler Goodlet 2024-07-02 16:31:58 -04:00
parent 02812b9f51
commit b46400a86f
4 changed files with 62 additions and 33 deletions

View File

@ -933,11 +933,12 @@ class Context:
self.cancel_called = True
header: str = (
f'Cancelling ctx with peer from {side.upper()} side\n\n'
f'Cancelling ctx from {side.upper()}-side\n'
)
reminfo: str = (
# ' =>\n'
f'Context.cancel() => {self.chan.uid}\n'
# f'Context.cancel() => {self.chan.uid}\n'
f'c)=> {self.chan.uid}\n'
# f'{self.chan.uid}\n'
f' |_ @{self.dst_maddr}\n'
f' >> {self.repr_rpc}\n'
@ -1267,6 +1268,12 @@ class Context:
@property
def maybe_error(self) -> BaseException|None:
'''
Return the (remote) error as outcome or `None`.
Remote errors take precedence over local ones.
'''
le: BaseException|None = self._local_error
re: RemoteActorError|ContextCancelled|None = self._remote_error
@ -2182,9 +2189,16 @@ async def open_context_from_portal(
# handled in the block above ^^^ !!
# await _debug.pause()
# log.cancel(
log.exception(
f'{ctx.side}-side of `Context` terminated with '
f'.outcome => {ctx.repr_outcome()}\n'
match scope_err:
case trio.Cancelled:
logmeth = log.cancel
# XXX explicitly report on any non-graceful-taskc cases
case _:
logmeth = log.exception
logmeth(
f'ctx {ctx.side!r}-side exited with {ctx.repr_outcome()}\n'
)
if debug_mode():

View File

@ -265,7 +265,7 @@ def _trio_main(
except BaseException as err:
logmeth = log.error
exit_status: str = (
'Main actor task crashed during exit?\n'
'Main actor task exited due to crash?\n'
+
nest_from_op(
input_op='x)>', # closed by error

View File

@ -66,10 +66,11 @@ from trio import (
)
from tractor.msg import (
pretty_struct,
NamespacePath,
types as msgtypes,
MsgType,
NamespacePath,
Stop,
pretty_struct,
types as msgtypes,
)
from ._ipc import Channel
from ._context import (
@ -545,7 +546,8 @@ class Actor:
):
log.cancel(
'Waiting on cancel request to peer\n'
f'`Portal.cancel_actor()` => {chan.uid}\n'
f'c)=>\n'
f' |_{chan.uid}\n'
)
# XXX: this is a soft wait on the channel (and its
@ -642,12 +644,14 @@ class Actor:
# and
an_exit_cs.cancelled_caught
):
log.warning(
report: str = (
'Timed out waiting on local actor-nursery to exit?\n'
f'{local_nursery}\n'
f' |_{pformat(local_nursery._children)}\n'
)
# await _debug.pause()
if children := local_nursery._children:
report += f' |_{pformat(children)}\n'
log.warning(report)
if disconnected:
# if the transport died and this actor is still
@ -819,14 +823,17 @@ class Actor:
# side,
)]
except KeyError:
log.warning(
report: str = (
'Ignoring invalid IPC ctx msg!\n\n'
f'<= sender: {uid}\n\n'
# XXX don't need right since it's always in msg?
# f'=> cid: {cid}\n\n'
f'{pretty_struct.pformat(msg)}\n'
f'<=? {uid}\n\n'
f' |_{pretty_struct.pformat(msg)}\n'
)
match msg:
case Stop():
log.runtime(report)
case _:
log.warning(report)
return
# if isinstance(msg, MsgTypeError):
@ -1338,10 +1345,11 @@ class Actor:
return True
log.cancel(
'Cancel request for RPC task\n\n'
f'<= Actor._cancel_task(): {requesting_uid}\n\n'
f'=> {ctx._task}\n'
f' |_ >> {ctx.repr_rpc}\n'
'Rxed cancel request for RPC task\n'
f'<=c) {requesting_uid}\n'
f' |_{ctx._task}\n'
f' >> {ctx.repr_rpc}\n'
# f'=> {ctx._task}\n'
# f' >> Actor._cancel_task() => {ctx._task}\n'
# f' |_ {ctx._task}\n\n'

View File

@ -246,7 +246,8 @@ async def hard_kill(
'''
log.cancel(
'Terminating sub-proc:\n'
'Terminating sub-proc\n'
f'>x)\n'
f' |_{proc}\n'
)
# NOTE: this timeout used to do nothing since we were shielding
@ -293,7 +294,7 @@ async def hard_kill(
log.critical(
# 'Well, the #ZOMBIE_LORD_IS_HERE# to collect\n'
'#T-800 deployed to collect zombie B0\n'
f'|\n'
f'>x)\n'
f' |_{proc}\n'
)
proc.kill()
@ -322,7 +323,8 @@ async def soft_kill(
uid: tuple[str, str] = portal.channel.uid
try:
log.cancel(
'Soft killing sub-actor via `Portal.cancel_actor()`\n'
'Soft killing sub-actor via portal request\n'
f'c)> {portal.chan.uid}\n'
f' |_{proc}\n'
)
# wait on sub-proc to signal termination
@ -552,8 +554,9 @@ async def trio_proc(
# cancel result waiter that may have been spawned in
# tandem if not done already
log.cancel(
'Cancelling existing result waiter task for '
f'{subactor.uid}'
'Cancelling portal result reaper task\n'
f'>c)\n'
f' |_{subactor.uid}\n'
)
nursery.cancel_scope.cancel()
@ -562,7 +565,11 @@ async def trio_proc(
# allowed! Do this **after** cancellation/teardown to avoid
# killing the process too early.
if proc:
log.cancel(f'Hard reap sequence starting for {subactor.uid}')
log.cancel(
f'Hard reap sequence starting for subactor\n'
f'>x)\n'
f' |_{subactor}@{subactor.uid}\n'
)
with trio.CancelScope(shield=True):
# don't clobber an ongoing pdb