Compare commits

..

10 Commits

Author SHA1 Message Date
Tyler Goodlet 6a280da597 Add basic ctl-c testing cases to suite 2022-06-27 16:22:45 -04:00
Tyler Goodlet f984fa8daa Hack around double long list print issue..
See https://github.com/pdbpp/pdbpp/issues/496
2022-06-27 16:21:38 -04:00
Tyler Goodlet cc18c84389 Decode bytes prior to log msg 2022-06-26 16:23:38 -04:00
Tyler Goodlet af205c08f2 Show full KBI trace for help with CI hangs 2022-06-26 16:00:14 -04:00
Tyler Goodlet ab557fae21 Move pydantic-click hang example to new dir, skip in test suite 2022-06-26 15:34:33 -04:00
Tyler Goodlet 0b1c1ac568 Drop asyncio-cancelled-itself msg for now, report task names 2022-06-26 15:06:00 -04:00
Tyler Goodlet 9e37bb22e1 Add spaces before values in log msg 2022-06-26 15:06:00 -04:00
Tyler Goodlet 01dea6fe32 Add runtime level msg around channel draining 2022-06-26 15:06:00 -04:00
Tyler Goodlet 79faffd577 Always undo SIGINT overrides , cancel detached children
Ensure that even when `pdb` resumption methods are called during a crash
where `trio`'s runtime has already terminated (eg. `Event.set()` will
raise) we always revert our sigint handler to the original. Further
inside the handler if we hit a case where a child is in debug and
(thinks it) has the global pdb lock, if it has no IPC connection to
a parent, simply presume tty sync-coordination is now lost and cancel
the child immediately.
2022-06-26 15:06:00 -04:00
Tyler Goodlet b3fd5da1be Allow up to 4 `msgpsec` decode failures 2022-06-26 15:06:00 -04:00
9 changed files with 158 additions and 58 deletions

View File

@ -120,4 +120,4 @@ jobs:
run: pip install -U . -r requirements-test.txt -r requirements-docs.txt --upgrade-strategy eager
- name: Run tests
run: pytest tests/ --spawn-backend=${{ matrix.spawn_backend }} -rs
run: pytest tests/ --spawn-backend=${{ matrix.spawn_backend }} -rs --full-trace

View File

@ -1,5 +1,5 @@
"""
That native debug better work!
That "native" debug mode better work!
All these tests can be understood (somewhat) by running the equivalent
`examples/debugging/` scripts manually.
@ -13,6 +13,7 @@ TODO:
import time
from os import path
import platform
from typing import Optional
import pytest
import pexpect
@ -73,6 +74,14 @@ def spawn(
return _spawn
@pytest.fixture(
params=[False, True],
ids='ctl-c={}'.format,
)
def ctlc(request) -> bool:
yield request.param
@pytest.mark.parametrize(
'user_in_out',
[
@ -137,20 +146,50 @@ def test_root_actor_bp(spawn, user_in_out):
assert expect_err_str in str(child.before)
def test_root_actor_bp_forever(spawn):
def do_ctlc(
child,
count: int = 3,
patt: Optional[str] = None,
) -> None:
# make sure ctl-c sends don't do anything but repeat output
for _ in range(count):
child.sendcontrol('c')
child.expect(r"\(Pdb\+\+\)")
if patt:
# should see the last line on console
before = str(child.before.decode())
assert patt in before
def test_root_actor_bp_forever(
spawn,
ctlc: bool,
):
"Re-enter a breakpoint from the root actor-task."
child = spawn('root_actor_breakpoint_forever')
# do some "next" commands to demonstrate recurrent breakpoint
# entries
for _ in range(10):
child.sendline('next')
child.expect(r"\(Pdb\+\+\)")
# do one continue which should trigger a new task to lock the tty
if ctlc:
do_ctlc(child)
child.sendline('next')
# do one continue which should trigger a
# new task to lock the tty
child.sendline('continue')
child.expect(r"\(Pdb\+\+\)")
if ctlc:
do_ctlc(child)
# XXX: this previously caused a bug!
child.sendline('n')
child.expect(r"\(Pdb\+\+\)")
@ -158,8 +197,15 @@ def test_root_actor_bp_forever(spawn):
child.sendline('n')
child.expect(r"\(Pdb\+\+\)")
# quit out of the loop
child.sendline('q')
child.expect(pexpect.EOF)
def test_subactor_error(spawn):
def test_subactor_error(
spawn,
ctlc: bool,
):
"Single subactor raising an error"
child = spawn('subactor_error')
@ -170,23 +216,29 @@ def test_subactor_error(spawn):
before = str(child.before.decode())
assert "Attaching to pdb in crashed actor: ('name_error'" in before
# send user command
# (in this case it's the same for 'continue' vs. 'quit')
child.sendline('continue')
# make sure ctl-c sends don't do anything but repeat output
if ctlc:
do_ctlc(
child,
patt='(doggypants)',
)
# the debugger should enter a second time in the nursery
# send user command and (in this case it's the same for 'continue'
# vs. 'quit') the debugger should enter a second time in the nursery
# creating actor
child.sendline('continue')
child.expect(r"\(Pdb\+\+\)")
before = str(child.before.decode())
# root actor gets debugger engaged
assert "Attaching to pdb in crashed actor: ('root'" in before
# error is a remote error propagated from the subactor
assert "RemoteActorError: ('name_error'" in before
# another round
if ctlc:
do_ctlc(child)
child.sendline('c')
child.expect('\r\n')

View File

@ -81,11 +81,14 @@ def run_example_in_subproc(loglevel, testdir, arb_addr):
'example_script',
# walk yields: (dirpath, dirnames, filenames)
[(p[0], f) for p in os.walk(examples_dir()) for f in p[2]
[
(p[0], f) for p in os.walk(examples_dir()) for f in p[2]
if '__' not in f
and f[0] != '_'
and 'debugging' not in p[0]],
and 'debugging' not in p[0]
and 'integration' not in p[0]
],
ids=lambda t: t[1],
)

View File

@ -611,7 +611,8 @@ class Actor:
entry = local_nursery._children.get(uid)
if entry:
_, proc, _ = entry
log.warning(f'Actor {uid}@{proc} IPC connection broke!?')
log.warning(
f'Actor {uid}@{proc} IPC connection broke!?')
# if proc.poll() is not None:
# log.error('Actor {uid} proc died and IPC broke?')
@ -630,6 +631,11 @@ class Actor:
# Attempt to wait for the far end to close the channel
# and bail after timeout (2-generals on closure).
assert chan.msgstream
log.runtime(
f'Draining lingering msgs from stream {chan.msgstream}'
)
async for msg in chan.msgstream.drain():
# try to deliver any lingering msgs
# before we destroy the channel.

View File

@ -41,6 +41,7 @@ from .log import get_logger
from ._discovery import get_root
from ._state import is_root_process, debug_mode
from ._exceptions import is_multi_cancelled
from ._ipc import Channel
try:
@ -396,6 +397,10 @@ def mk_mpdb() -> tuple[MultiActorPdb, Callable]:
signal.SIGINT,
partial(shield_sigint, pdb_obj=pdb),
)
# XXX: These are the important flags mentioned in
# https://github.com/python-trio/trio/issues/1155
# which resolve the traceback spews to console.
pdb.allow_kbdint = True
pdb.nosigint = True
@ -464,11 +469,15 @@ async def _breakpoint(
_local_task_in_debug = task_name
def child_release_hook():
# _local_task_in_debug = None
try:
# sometimes the ``trio`` might already be termianated in
# which case this call will raise.
_local_pdb_complete.set()
finally:
# restore original sigint handler
undo_sigint()
# should always be cleared in the hijack hook aboved right?
# _local_task_in_debug = None
# assign unlock callback for debugger teardown hooks
# _pdb_release_hook = _local_pdb_complete.set
@ -539,8 +548,12 @@ async def _breakpoint(
_global_actor_in_debug = None
_local_task_in_debug = None
_local_pdb_complete.set()
try:
# sometimes the ``trio`` might already be termianated in
# which case this call will raise.
_local_pdb_complete.set()
finally:
# restore original sigint handler
undo_sigint()
@ -600,7 +613,21 @@ def shield_sigint(
actor = tractor.current_actor()
def do_cancel():
# If we haven't tried to cancel the runtime then do that instead
# of raising a KBI (which may non-gracefully destroy
# a ``trio.run()``).
if not actor._cancel_called:
actor.cancel_soon()
# If the runtime is already cancelled it likely means the user
# hit ctrl-c again because teardown didn't full take place in
# which case we do the "hard" raising of a local KBI.
else:
raise KeyboardInterrupt
any_connected = False
if uid_in_debug is not None:
# try to see if the supposed (sub)actor in debug still
# has an active connection to *this* actor, and if not
@ -616,6 +643,7 @@ def shield_sigint(
f'{uid_in_debug}\n'
'Allowing SIGINT propagation..'
)
return do_cancel()
# root actor branch that reports whether or not a child
# has locked debugger.
@ -644,6 +672,16 @@ def shield_sigint(
elif (
not is_root_process()
):
chan: Channel = actor._parent_chan
if not chan or not chan.connected():
log.warning(
'A global actor reported to be in debug '
'but no connection exists for its parent:\n'
f'{uid_in_debug}\n'
'Allowing SIGINT propagation..'
)
return do_cancel()
task = _local_task_in_debug
if task:
log.pdb(
@ -659,20 +697,6 @@ def shield_sigint(
"Ignoring SIGINT since debug mode is enabled"
)
# noone has the debugger so raise KBI
else:
# If we haven't tried to cancel the runtime then do that instead
# of raising a KBI (which may non-gracefully destroy
# a ``trio.run()``).
if not actor._cancel_called:
actor.cancel_soon()
# If the runtime is already cancelled it likely means the user
# hit ctrl-c again because teardown didn't full take place in
# which case we do the "hard" raising of a local KBI.
else:
raise KeyboardInterrupt
# maybe redraw/print last REPL output to console
if pdb_obj:
@ -687,8 +711,13 @@ def shield_sigint(
# https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py
try:
pdb_obj.do_longlist(None)
print(pdb_obj.prompt, end='', flush=True)
# XXX: lol, see ``pdbpp`` issue:
# https://github.com/pdbpp/pdbpp/issues/496
# pdb_obj.do_longlist(None)
# pdb_obj.lastcmd = 'longlist'
pdb_obj._printlonglist(max_lines=False)
# print(pdb_obj.prompt, end='', flush=True)
except AttributeError:
log.exception('pdbpp longlist failed...')
raise KeyboardInterrupt
@ -709,7 +738,7 @@ def _set_trace(
# start 2 levels up in user code
frame: FrameType = sys._getframe()
if frame:
frame = frame.f_back.f_back # type: ignore
frame = frame.f_back # type: ignore
if pdb and actor is not None:
log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")

View File

@ -231,7 +231,7 @@ class MsgspecTCPStream(MsgpackTCPStream):
'''
import msgspec # noqa
last_decode_failed: bool = False
decodes_failed: int = 0
while True:
try:
@ -268,12 +268,17 @@ class MsgspecTCPStream(MsgpackTCPStream):
msgspec.DecodeError,
UnicodeDecodeError,
):
if not last_decode_failed:
if decodes_failed < 4:
# ignore decoding errors for now and assume they have to
# do with a channel drop - hope that receiving from the
# channel will raise an expected error and bubble up.
log.error('`msgspec` failed to decode!?')
last_decode_failed = True
decoded_bytes = msg_bytes.decode()
log.error(
'`msgspec` failed to decode!?\n'
'dumping bytes:\n'
f'{decoded_bytes}'
)
decodes_failed += 1
else:
raise

View File

@ -511,8 +511,8 @@ class Portal:
if ctx.chan.connected():
log.info(
'Waiting on final context-task result for\n'
f'task:{cid}\n'
f'actor:{uid}'
f'task: {cid}\n'
f'actor: {uid}'
)
result = await ctx.result()

View File

@ -259,6 +259,7 @@ def _run_asyncio_task(
nonlocal chan
aio_err = chan._aio_err
task_err: Optional[BaseException] = None
tname = task.get_name()
# only to avoid ``asyncio`` complaining about uncaptured
# task exceptions
@ -268,9 +269,10 @@ def _run_asyncio_task(
task_err = terr
if isinstance(terr, CancelledError):
log.cancel(f'`asyncio` task cancelled: {task.get_name()}')
log.cancel(
f'infected `asyncio` task cancelled: {tname}')
else:
log.exception(f'`asyncio` task: {task.get_name()} errored')
log.exception(f'`asyncio` task: {tname} errored')
assert type(terr) is type(aio_err), 'Asyncio task error mismatch?'
@ -285,21 +287,24 @@ def _run_asyncio_task(
# We might want to change this in the future though.
from_aio.close()
if type(aio_err) is CancelledError:
log.cancel("infected task was cancelled")
# if type(aio_err) is CancelledError:
# if not task_err:
# log.cancel(
# f"infected task {tname} cancelled itself, was not ``trio``"
# )
# TODO: show that the cancellation originated
# from the ``trio`` side? right?
# if cancel_scope.cancelled:
# raise aio_err from err
elif task_err is None:
if task_err is None:
assert aio_err
aio_err.with_traceback(aio_err.__traceback__)
msg = ''.join(traceback.format_exception(type(aio_err)))
log.error(
f'infected task errorred:\n{msg}'
)
# msg = ''.join(traceback.format_exception(type(aio_err)))
# log.error(
# f'infected task errorred:\n{msg}'
# )
# raise any ``asyncio`` side error.
raise aio_err
@ -392,8 +397,8 @@ async def run_task(
) -> Any:
'''
Run an ``asyncio`` async function or generator in a task, return
or stream the result back to ``trio``.
Run an ``asyncio`` async function or generator in a new task, block
and return the result back to ``trio``.
'''
# simple async func