More failed REPL-lock-request refinements

In `lock_stdio_for_peer()` better internal-error handling/reporting:
- only `Lock._blocked.remove(ctx.cid)` if that same cid was added on
  entry to avoid needless key-errors.
- drop all `Lock.release(force: bool)` usage remnants.
- if `req_ctx.cancel()` fails mention it with `ctx_err.add_note()`.
- add more explicit internal-failed-request log messaging via a new
  `fail_reason: str`.
- use and use new `x)<=\n|_` annots in any failure logging.

Other cleanups/niceties:
- drop `force: bool` flag entirely from the `Lock.release()`.
- use more supervisor-op-annots in `.pdb()` logging
  with both `_pause/crash_msg: str` instead of double '|' lines when
  `.pdb()`-reported from `._set_trace()`/`._post_mortem()`.
aio_abandons
Tyler Goodlet 2024-07-02 17:06:50 -04:00
parent b46400a86f
commit 9be821a5cf
1 changed files with 53 additions and 43 deletions

View File

@ -299,7 +299,6 @@ class Lock:
@pdbp.hideframe
def release(
cls,
force: bool = False,
raise_on_thread: bool = True,
) -> bool:
@ -347,12 +346,9 @@ class Lock:
lock: trio.StrictFIFOLock = cls._debug_lock
owner: Task = lock.statistics().owner
if (
(lock.locked() or force)
# ^-TODO-NOTE-^ should we just remove this, since the
# RTE case above will always happen when you force
# from the wrong task?
and (owner is task)
lock.locked()
and
(owner is task)
# ^-NOTE-^ if we do NOT ensure this, `trio` will
# raise a RTE when a non-owner tries to releasee the
# lock.
@ -553,6 +549,7 @@ async def lock_stdio_for_peer(
# can try to avoid clobbering any connection from a child
# that's currently relying on it.
we_finished = Lock.req_handler_finished = trio.Event()
lock_blocked: bool = False
try:
if ctx.cid in Lock._blocked:
raise RuntimeError(
@ -565,7 +562,8 @@ async def lock_stdio_for_peer(
'Consider that an internal bug exists given the TTY '
'`Lock`ing IPC dialog..\n'
)
Lock._blocked.add(ctx.cid)
lock_blocked = True
root_task_name: str = current_task().name
if tuple(subactor_uid) in Lock._blocked:
log.warning(
@ -575,7 +573,11 @@ async def lock_stdio_for_peer(
)
ctx._enter_debugger_on_cancel: bool = False
message: str = (
f'Debug lock blocked for {subactor_uid}\n'
f'Debug lock blocked for subactor\n\n'
f'x)<= {subactor_uid}\n\n'
f'Likely because the root actor already started shutdown and is '
'closing IPC connections for this child!\n\n'
'Cancelling debug request!\n'
)
log.cancel(message)
@ -589,7 +591,6 @@ async def lock_stdio_for_peer(
f'remote task: {subactor_task_uid}\n'
)
DebugStatus.shield_sigint()
Lock._blocked.add(ctx.cid)
# NOTE: we use the IPC ctx's cancel scope directly in order to
# ensure that on any transport failure, or cancellation request
@ -648,30 +649,33 @@ async def lock_stdio_for_peer(
)
except BaseException as req_err:
message: str = (
f'On behalf of remote peer {subactor_task_uid!r}@{ctx.chan.uid!r}\n\n'
'Forcing `Lock.release()` for req-ctx since likely an '
'internal error!\n\n'
f'{ctx}'
fail_reason: str = (
f'on behalf of peer\n\n'
f'x)<=\n'
f' |_{subactor_task_uid!r}@{ctx.chan.uid!r}\n\n'
'Forcing `Lock.release()` due to acquire failure!\n\n'
f'x)=> {ctx}\n'
)
if isinstance(req_err, trio.Cancelled):
message = (
'Cancelled during root TTY-lock dialog\n'
fail_reason = (
'Cancelled during stdio-mutex request '
+
message
fail_reason
)
else:
message = (
'Errored during root TTY-lock dialog\n'
fail_reason = (
'Failed to deliver stdio-mutex request '
+
message
fail_reason
)
log.exception(message)
Lock.release() #force=True)
log.exception(fail_reason)
Lock.release()
raise
finally:
if lock_blocked:
Lock._blocked.remove(ctx.cid)
# wakeup any waiters since the lock was (presumably)
@ -1167,7 +1171,7 @@ async def request_root_stdio_lock(
):
log.cancel(
'Debug lock request was CANCELLED?\n\n'
f'{req_ctx}\n'
f'<=c) {req_ctx}\n'
# f'{pformat_cs(req_cs, var_name="req_cs")}\n\n'
# f'{pformat_cs(req_ctx._scope, var_name="req_ctx._scope")}\n\n'
)
@ -1179,22 +1183,26 @@ async def request_root_stdio_lock(
message: str = (
'Failed during debug request dialog with root actor?\n\n'
)
if req_ctx:
if (req_ctx := DebugStatus.req_ctx):
message += (
f'{req_ctx}\n'
f'<=x) {req_ctx}\n\n'
f'Cancelling IPC ctx!\n'
)
try:
await req_ctx.cancel()
except trio.ClosedResourceError as terr:
ctx_err.add_note(
# f'Failed with {type(terr)!r} x)> `req_ctx.cancel()` '
f'Failed with `req_ctx.cancel()` <x) {type(terr)!r} '
)
else:
message += 'Failed during `Portal.open_context()` ?\n'
message += 'Failed in `Portal.open_context()` call ??\n'
log.exception(message)
ctx_err.add_note(message)
raise ctx_err
except (
tractor.ContextCancelled,
trio.Cancelled,
@ -1218,9 +1226,10 @@ async def request_root_stdio_lock(
# -[ ]FURTHER, after we 'continue', we should be able to
# ctl-c out of the currently hanging task!
raise DebugRequestError(
'Failed to lock stdio from subactor IPC ctx!\n\n'
'Failed during stdio-locking dialog from root actor\n\n'
f'req_ctx: {DebugStatus.req_ctx}\n'
f'<=x)\n'
f'|_{DebugStatus.req_ctx}\n'
) from req_err
finally:
@ -1998,10 +2007,10 @@ async def _pause(
# sanity, for when hackin on all this?
if not isinstance(pause_err, trio.Cancelled):
req_ctx: Context = DebugStatus.req_ctx
if req_ctx:
# XXX, bc the child-task in root might cancel it?
# assert req_ctx._scope.cancel_called
assert req_ctx.maybe_error
# if req_ctx:
# # XXX, bc the child-task in root might cancel it?
# # assert req_ctx._scope.cancel_called
# assert req_ctx.maybe_error
raise
@ -2041,11 +2050,12 @@ def _set_trace(
# root here? Bo
log.pdb(
f'{_pause_msg}\n'
'|\n'
# TODO: more compact pformating?
# '|\n'
f'>(\n'
f' |_ {task} @ {actor.uid}\n'
# ^-TODO-^ more compact pformating?
# -[ ] make an `Actor.__repr()__`
# -[ ] should we use `log.pformat_task_uid()`?
f'|_ {task} @ {actor.uid}\n'
)
# presuming the caller passed in the "api frame"
# (the last frame before user code - like `.pause()`)
@ -2541,8 +2551,8 @@ def _post_mortem(
# here! Bo
log.pdb(
f'{_crash_msg}\n'
'|\n'
# f'|_ {current_task()}\n'
# '|\n'
f'x>(\n'
f' |_ {current_task()} @ {actor.uid}\n'
# f'|_ @{actor.uid}\n'