From 9be821a5cfa4bf4bb7edd44ec1a14da35fd15bd5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Jul 2024 17:06:50 -0400 Subject: [PATCH] More failed REPL-lock-request refinements In `lock_stdio_for_peer()` better internal-error handling/reporting: - only `Lock._blocked.remove(ctx.cid)` if that same cid was added on entry to avoid needless key-errors. - drop all `Lock.release(force: bool)` usage remnants. - if `req_ctx.cancel()` fails mention it with `ctx_err.add_note()`. - add more explicit internal-failed-request log messaging via a new `fail_reason: str`. - use and use new `x)<=\n|_` annots in any failure logging. Other cleanups/niceties: - drop `force: bool` flag entirely from the `Lock.release()`. - use more supervisor-op-annots in `.pdb()` logging with both `_pause/crash_msg: str` instead of double '|' lines when `.pdb()`-reported from `._set_trace()`/`._post_mortem()`. --- tractor/devx/_debug.py | 96 +++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 43 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 1135932..113371d 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -299,7 +299,6 @@ class Lock: @pdbp.hideframe def release( cls, - force: bool = False, raise_on_thread: bool = True, ) -> bool: @@ -347,12 +346,9 @@ class Lock: lock: trio.StrictFIFOLock = cls._debug_lock owner: Task = lock.statistics().owner if ( - (lock.locked() or force) - # ^-TODO-NOTE-^ should we just remove this, since the - # RTE case above will always happen when you force - # from the wrong task? - - and (owner is task) + lock.locked() + and + (owner is task) # ^-NOTE-^ if we do NOT ensure this, `trio` will # raise a RTE when a non-owner tries to releasee the # lock. @@ -553,6 +549,7 @@ async def lock_stdio_for_peer( # can try to avoid clobbering any connection from a child # that's currently relying on it. we_finished = Lock.req_handler_finished = trio.Event() + lock_blocked: bool = False try: if ctx.cid in Lock._blocked: raise RuntimeError( @@ -565,7 +562,8 @@ async def lock_stdio_for_peer( 'Consider that an internal bug exists given the TTY ' '`Lock`ing IPC dialog..\n' ) - + Lock._blocked.add(ctx.cid) + lock_blocked = True root_task_name: str = current_task().name if tuple(subactor_uid) in Lock._blocked: log.warning( @@ -575,7 +573,11 @@ async def lock_stdio_for_peer( ) ctx._enter_debugger_on_cancel: bool = False message: str = ( - f'Debug lock blocked for {subactor_uid}\n' + f'Debug lock blocked for subactor\n\n' + f'x)<= {subactor_uid}\n\n' + + f'Likely because the root actor already started shutdown and is ' + 'closing IPC connections for this child!\n\n' 'Cancelling debug request!\n' ) log.cancel(message) @@ -589,7 +591,6 @@ async def lock_stdio_for_peer( f'remote task: {subactor_task_uid}\n' ) DebugStatus.shield_sigint() - Lock._blocked.add(ctx.cid) # NOTE: we use the IPC ctx's cancel scope directly in order to # ensure that on any transport failure, or cancellation request @@ -648,31 +649,34 @@ async def lock_stdio_for_peer( ) except BaseException as req_err: - message: str = ( - f'On behalf of remote peer {subactor_task_uid!r}@{ctx.chan.uid!r}\n\n' - 'Forcing `Lock.release()` for req-ctx since likely an ' - 'internal error!\n\n' - f'{ctx}' + fail_reason: str = ( + f'on behalf of peer\n\n' + f'x)<=\n' + f' |_{subactor_task_uid!r}@{ctx.chan.uid!r}\n\n' + + 'Forcing `Lock.release()` due to acquire failure!\n\n' + f'x)=> {ctx}\n' ) if isinstance(req_err, trio.Cancelled): - message = ( - 'Cancelled during root TTY-lock dialog\n' + fail_reason = ( + 'Cancelled during stdio-mutex request ' + - message + fail_reason ) else: - message = ( - 'Errored during root TTY-lock dialog\n' + fail_reason = ( + 'Failed to deliver stdio-mutex request ' + - message + fail_reason ) - log.exception(message) - Lock.release() #force=True) + log.exception(fail_reason) + Lock.release() raise finally: - Lock._blocked.remove(ctx.cid) + if lock_blocked: + Lock._blocked.remove(ctx.cid) # wakeup any waiters since the lock was (presumably) # released, possibly only temporarily. @@ -1167,7 +1171,7 @@ async def request_root_stdio_lock( ): log.cancel( 'Debug lock request was CANCELLED?\n\n' - f'{req_ctx}\n' + f'<=c) {req_ctx}\n' # f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' # f'{pformat_cs(req_ctx._scope, var_name="req_ctx._scope")}\n\n' ) @@ -1179,22 +1183,26 @@ async def request_root_stdio_lock( message: str = ( 'Failed during debug request dialog with root actor?\n\n' ) - - if req_ctx: + if (req_ctx := DebugStatus.req_ctx): message += ( - f'{req_ctx}\n' + f'<=x) {req_ctx}\n\n' f'Cancelling IPC ctx!\n' ) - await req_ctx.cancel() + try: + await req_ctx.cancel() + except trio.ClosedResourceError as terr: + ctx_err.add_note( + # f'Failed with {type(terr)!r} x)> `req_ctx.cancel()` ' + f'Failed with `req_ctx.cancel()` (\n' + f' |_ {task} @ {actor.uid}\n' + # ^-TODO-^ more compact pformating? # -[ ] make an `Actor.__repr()__` # -[ ] should we use `log.pformat_task_uid()`? - f'|_ {task} @ {actor.uid}\n' ) # presuming the caller passed in the "api frame" # (the last frame before user code - like `.pause()`) @@ -2541,9 +2551,9 @@ def _post_mortem( # here! Bo log.pdb( f'{_crash_msg}\n' - '|\n' - # f'|_ {current_task()}\n' - f'|_ {current_task()} @ {actor.uid}\n' + # '|\n' + f'x>(\n' + f' |_ {current_task()} @ {actor.uid}\n' # f'|_ @{actor.uid}\n' # TODO: make an `Actor.__repr()__`