2023-01-27 21:27:25 +00:00
|
|
|
'''
|
|
|
|
Complex edge case where during real-time streaming the IPC tranport
|
|
|
|
channels are wiped out (purposely in this example though it could have
|
|
|
|
been an outage) and we want to ensure that despite being in debug mode
|
|
|
|
(or not) the user can sent SIGINT once they notice the hang and the
|
|
|
|
actor tree will eventually be cancelled without leaving any zombies.
|
|
|
|
|
|
|
|
'''
|
2024-03-19 23:33:06 +00:00
|
|
|
from contextlib import asynccontextmanager as acm
|
2024-03-12 19:48:20 +00:00
|
|
|
from functools import partial
|
|
|
|
|
2023-01-26 22:48:08 +00:00
|
|
|
from tractor import (
|
|
|
|
open_nursery,
|
|
|
|
context,
|
|
|
|
Context,
|
2024-03-12 19:48:20 +00:00
|
|
|
ContextCancelled,
|
2023-01-26 22:48:08 +00:00
|
|
|
MsgStream,
|
2024-03-12 19:48:20 +00:00
|
|
|
_testing,
|
2023-01-26 22:48:08 +00:00
|
|
|
)
|
2024-03-12 19:48:20 +00:00
|
|
|
import trio
|
2024-03-19 23:33:06 +00:00
|
|
|
import pytest
|
2024-03-12 19:48:20 +00:00
|
|
|
|
|
|
|
|
|
|
|
async def break_ipc(
|
|
|
|
stream: MsgStream,
|
|
|
|
method: str|None = None,
|
|
|
|
pre_close: bool = False,
|
|
|
|
|
|
|
|
def_method: str = 'eof',
|
|
|
|
|
|
|
|
) -> None:
|
|
|
|
'''
|
|
|
|
XXX: close the channel right after an error is raised
|
|
|
|
purposely breaking the IPC transport to make sure the parent
|
|
|
|
doesn't get stuck in debug or hang on the connection join.
|
|
|
|
this more or less simulates an infinite msg-receive hang on
|
|
|
|
the other end.
|
2023-01-26 22:48:08 +00:00
|
|
|
|
2024-03-12 19:48:20 +00:00
|
|
|
'''
|
|
|
|
# close channel via IPC prot msging before
|
|
|
|
# any transport breakage
|
|
|
|
if pre_close:
|
|
|
|
await stream.aclose()
|
|
|
|
|
|
|
|
method: str = method or def_method
|
2024-03-19 23:33:06 +00:00
|
|
|
print(
|
|
|
|
'#################################\n'
|
|
|
|
'Simulating CHILD-side IPC BREAK!\n'
|
|
|
|
f'method: {method}\n'
|
|
|
|
f'pre `.aclose()`: {pre_close}\n'
|
|
|
|
'#################################\n'
|
|
|
|
)
|
2024-03-12 19:48:20 +00:00
|
|
|
|
|
|
|
match method:
|
|
|
|
case 'trans_aclose':
|
|
|
|
await stream._ctx.chan.transport.stream.aclose()
|
|
|
|
|
|
|
|
case 'eof':
|
|
|
|
await stream._ctx.chan.transport.stream.send_eof()
|
2023-01-26 22:48:08 +00:00
|
|
|
|
2024-03-12 19:48:20 +00:00
|
|
|
case 'msg':
|
|
|
|
await stream._ctx.chan.send(None)
|
|
|
|
|
|
|
|
# TODO: the actual real-world simulated cases like
|
|
|
|
# transport layer hangs and/or lower layer 2-gens type
|
|
|
|
# scenarios..
|
|
|
|
#
|
|
|
|
# -[ ] already have some issues for this general testing
|
|
|
|
# area:
|
|
|
|
# - https://github.com/goodboy/tractor/issues/97
|
|
|
|
# - https://github.com/goodboy/tractor/issues/124
|
|
|
|
# - PR from @guille:
|
|
|
|
# https://github.com/goodboy/tractor/pull/149
|
|
|
|
# case 'hang':
|
|
|
|
# TODO: framework research:
|
|
|
|
#
|
|
|
|
# - https://github.com/GuoTengda1993/pynetem
|
|
|
|
# - https://github.com/shopify/toxiproxy
|
|
|
|
# - https://manpages.ubuntu.com/manpages/trusty/man1/wirefilter.1.html
|
|
|
|
|
|
|
|
case _:
|
|
|
|
raise RuntimeError(
|
|
|
|
f'IPC break method unsupported: {method}'
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
async def break_ipc_then_error(
|
2023-01-26 22:48:08 +00:00
|
|
|
stream: MsgStream,
|
2024-03-12 19:48:20 +00:00
|
|
|
break_ipc_with: str|None = None,
|
|
|
|
pre_close: bool = False,
|
2023-01-26 22:48:08 +00:00
|
|
|
):
|
2024-03-19 23:33:06 +00:00
|
|
|
await break_ipc(
|
|
|
|
stream=stream,
|
|
|
|
method=break_ipc_with,
|
|
|
|
pre_close=pre_close,
|
|
|
|
)
|
2023-01-26 22:48:08 +00:00
|
|
|
async for msg in stream:
|
|
|
|
await stream.send(msg)
|
2024-03-19 23:33:06 +00:00
|
|
|
|
|
|
|
assert 0
|
2023-01-26 22:48:08 +00:00
|
|
|
|
|
|
|
|
2024-03-12 19:48:20 +00:00
|
|
|
async def iter_ipc_stream(
|
2023-01-26 22:48:08 +00:00
|
|
|
stream: MsgStream,
|
2024-03-12 19:48:20 +00:00
|
|
|
break_ipc_with: str|None = None,
|
|
|
|
pre_close: bool = False,
|
2023-01-26 22:48:08 +00:00
|
|
|
):
|
|
|
|
async for msg in stream:
|
|
|
|
await stream.send(msg)
|
|
|
|
|
|
|
|
|
|
|
|
@context
|
2023-01-27 21:27:25 +00:00
|
|
|
async def recv_and_spawn_net_killers(
|
2023-01-26 22:48:08 +00:00
|
|
|
|
|
|
|
ctx: Context,
|
2024-03-12 19:48:20 +00:00
|
|
|
break_ipc_after: bool|int = False,
|
|
|
|
pre_close: bool = False,
|
2023-01-26 22:48:08 +00:00
|
|
|
|
|
|
|
) -> None:
|
|
|
|
'''
|
2023-01-27 21:27:25 +00:00
|
|
|
Receive stream msgs and spawn some IPC killers mid-stream.
|
2023-01-26 22:48:08 +00:00
|
|
|
|
|
|
|
'''
|
|
|
|
await ctx.started()
|
2023-01-27 21:27:25 +00:00
|
|
|
async with (
|
|
|
|
ctx.open_stream() as stream,
|
|
|
|
trio.open_nursery() as n,
|
|
|
|
):
|
2023-01-28 03:59:15 +00:00
|
|
|
async for i in stream:
|
|
|
|
print(f'child echoing {i}')
|
2023-01-27 21:27:25 +00:00
|
|
|
await stream.send(i)
|
2024-03-19 23:33:06 +00:00
|
|
|
|
2023-01-28 03:59:15 +00:00
|
|
|
if (
|
2023-01-28 21:44:35 +00:00
|
|
|
break_ipc_after
|
2024-03-12 19:48:20 +00:00
|
|
|
and
|
2024-03-19 23:33:06 +00:00
|
|
|
i >= break_ipc_after
|
2023-01-28 03:59:15 +00:00
|
|
|
):
|
2024-03-19 23:33:06 +00:00
|
|
|
n.start_soon(
|
|
|
|
iter_ipc_stream,
|
|
|
|
stream,
|
|
|
|
)
|
2024-03-12 19:48:20 +00:00
|
|
|
n.start_soon(
|
|
|
|
partial(
|
|
|
|
break_ipc_then_error,
|
|
|
|
stream=stream,
|
|
|
|
pre_close=pre_close,
|
|
|
|
)
|
|
|
|
)
|
2024-03-19 23:33:06 +00:00
|
|
|
|
|
|
|
|
|
|
|
@acm
|
|
|
|
async def stuff_hangin_ctlc(timeout: float = 1) -> None:
|
|
|
|
|
|
|
|
with trio.move_on_after(timeout) as cs:
|
|
|
|
yield timeout
|
|
|
|
|
|
|
|
if cs.cancelled_caught:
|
|
|
|
# pretend to be a user seeing no streaming action
|
|
|
|
# thinking it's a hang, and then hitting ctl-c..
|
|
|
|
print(
|
|
|
|
f"i'm a user on the PARENT side and thingz hangin "
|
|
|
|
f'after timeout={timeout} ???\n\n'
|
|
|
|
'MASHING CTlR-C..!?\n'
|
|
|
|
)
|
|
|
|
raise KeyboardInterrupt
|
2023-01-26 22:48:08 +00:00
|
|
|
|
|
|
|
|
2023-01-27 21:27:25 +00:00
|
|
|
async def main(
|
2023-01-27 22:02:36 +00:00
|
|
|
debug_mode: bool = False,
|
|
|
|
start_method: str = 'trio',
|
2024-03-12 19:48:20 +00:00
|
|
|
loglevel: str = 'cancel',
|
2023-01-28 21:44:35 +00:00
|
|
|
|
|
|
|
# by default we break the parent IPC first (if configured to break
|
|
|
|
# at all), but this can be changed so the child does first (even if
|
|
|
|
# both are set to break).
|
2024-03-12 19:48:20 +00:00
|
|
|
break_parent_ipc_after: int|bool = False,
|
|
|
|
break_child_ipc_after: int|bool = False,
|
|
|
|
pre_close: bool = False,
|
2023-01-26 22:48:08 +00:00
|
|
|
|
2023-01-27 21:27:25 +00:00
|
|
|
) -> None:
|
2023-01-26 22:48:08 +00:00
|
|
|
|
2023-01-28 03:59:15 +00:00
|
|
|
async with (
|
|
|
|
open_nursery(
|
|
|
|
start_method=start_method,
|
2023-01-27 21:27:25 +00:00
|
|
|
|
2023-01-28 03:59:15 +00:00
|
|
|
# NOTE: even debugger is used we shouldn't get
|
|
|
|
# a hang since it never engages due to broken IPC
|
|
|
|
debug_mode=debug_mode,
|
2024-03-12 19:48:20 +00:00
|
|
|
loglevel=loglevel,
|
2023-01-27 21:27:25 +00:00
|
|
|
|
2023-01-28 03:59:15 +00:00
|
|
|
) as an,
|
|
|
|
):
|
2024-03-12 19:48:20 +00:00
|
|
|
sub_name: str = 'chitty_hijo'
|
2023-01-28 03:59:15 +00:00
|
|
|
portal = await an.start_actor(
|
2024-03-12 19:48:20 +00:00
|
|
|
sub_name,
|
2023-01-26 22:48:08 +00:00
|
|
|
enable_modules=[__name__],
|
|
|
|
)
|
|
|
|
|
2024-03-12 19:48:20 +00:00
|
|
|
async with (
|
2024-03-19 23:33:06 +00:00
|
|
|
stuff_hangin_ctlc(timeout=2) as timeout,
|
2024-03-12 19:48:20 +00:00
|
|
|
_testing.expect_ctxc(
|
|
|
|
yay=(
|
|
|
|
break_parent_ipc_after
|
2024-03-19 23:33:06 +00:00
|
|
|
or break_child_ipc_after
|
2024-03-12 19:48:20 +00:00
|
|
|
),
|
|
|
|
# TODO: we CAN'T remove this right?
|
|
|
|
# since we need the ctxc to bubble up from either
|
|
|
|
# the stream API after the `None` msg is sent
|
|
|
|
# (which actually implicitly cancels all remote
|
|
|
|
# tasks in the hijo) or from simluated
|
|
|
|
# KBI-mash-from-user
|
|
|
|
# or should we expect that a KBI triggers the ctxc
|
|
|
|
# and KBI in an eg?
|
|
|
|
reraise=True,
|
|
|
|
),
|
2024-03-19 23:33:06 +00:00
|
|
|
|
2024-03-12 19:48:20 +00:00
|
|
|
portal.open_context(
|
|
|
|
recv_and_spawn_net_killers,
|
|
|
|
break_ipc_after=break_child_ipc_after,
|
|
|
|
pre_close=pre_close,
|
|
|
|
) as (ctx, sent),
|
|
|
|
):
|
2024-03-19 23:33:06 +00:00
|
|
|
rx_eoc: bool = False
|
2024-03-12 19:48:20 +00:00
|
|
|
ipc_break_sent: bool = False
|
2023-01-26 22:48:08 +00:00
|
|
|
async with ctx.open_stream() as stream:
|
2023-01-28 03:59:15 +00:00
|
|
|
for i in range(1000):
|
|
|
|
|
|
|
|
if (
|
2023-01-28 21:44:35 +00:00
|
|
|
break_parent_ipc_after
|
2024-03-12 19:48:20 +00:00
|
|
|
and
|
|
|
|
i > break_parent_ipc_after
|
|
|
|
and
|
|
|
|
not ipc_break_sent
|
2023-01-28 03:59:15 +00:00
|
|
|
):
|
|
|
|
print(
|
|
|
|
'#################################\n'
|
2024-03-12 19:48:20 +00:00
|
|
|
'Simulating PARENT-side IPC BREAK!\n'
|
|
|
|
'#################################\n'
|
2023-01-28 03:59:15 +00:00
|
|
|
)
|
2024-03-12 19:48:20 +00:00
|
|
|
|
2024-03-19 23:33:06 +00:00
|
|
|
# TODO: other methods? see break func above.
|
2024-03-12 19:48:20 +00:00
|
|
|
# await stream._ctx.chan.send(None)
|
|
|
|
# await stream._ctx.chan.transport.stream.send_eof()
|
|
|
|
await stream._ctx.chan.transport.stream.aclose()
|
|
|
|
|
|
|
|
ipc_break_sent = True
|
2023-01-27 22:02:36 +00:00
|
|
|
|
2023-01-27 22:17:17 +00:00
|
|
|
# it actually breaks right here in the
|
|
|
|
# mp_spawn/forkserver backends and thus the zombie
|
|
|
|
# reaper never even kicks in?
|
2023-01-28 03:59:15 +00:00
|
|
|
print(f'parent sending {i}')
|
2024-03-12 19:48:20 +00:00
|
|
|
try:
|
|
|
|
await stream.send(i)
|
|
|
|
except ContextCancelled as ctxc:
|
|
|
|
print(
|
|
|
|
'parent received ctxc on `stream.send()`\n'
|
|
|
|
f'{ctxc}\n'
|
|
|
|
)
|
|
|
|
assert 'root' in ctxc.canceller
|
|
|
|
assert sub_name in ctx.canceller
|
2023-01-27 21:27:25 +00:00
|
|
|
|
2024-03-12 19:48:20 +00:00
|
|
|
# TODO: is this needed or no?
|
|
|
|
raise
|
|
|
|
|
2024-03-19 23:33:06 +00:00
|
|
|
# timeout: int = 1
|
|
|
|
# with trio.move_on_after(timeout) as cs:
|
|
|
|
async with stuff_hangin_ctlc() as timeout:
|
|
|
|
print(
|
|
|
|
f'PARENT `stream.receive()` with timeout={timeout}\n'
|
|
|
|
)
|
2023-01-28 03:59:15 +00:00
|
|
|
# NOTE: in the parent side IPC failure case this
|
|
|
|
# will raise an ``EndOfChannel`` after the child
|
|
|
|
# is killed and sends a stop msg back to it's
|
|
|
|
# caller/this-parent.
|
2024-03-12 19:48:20 +00:00
|
|
|
try:
|
|
|
|
rx = await stream.receive()
|
|
|
|
print(
|
|
|
|
"I'm a happy PARENT user and echoed to me is\n"
|
|
|
|
f'{rx}\n'
|
|
|
|
)
|
|
|
|
except trio.EndOfChannel:
|
2024-03-19 23:33:06 +00:00
|
|
|
rx_eoc: bool = True
|
2024-03-12 19:48:20 +00:00
|
|
|
print('MsgStream got EoC for PARENT')
|
|
|
|
raise
|
2023-01-27 21:27:25 +00:00
|
|
|
|
2024-03-19 23:33:06 +00:00
|
|
|
print(
|
|
|
|
'Streaming finished and we got Eoc.\n'
|
|
|
|
'Canceling `.open_context()` in root with\n'
|
|
|
|
'CTlR-C..'
|
|
|
|
)
|
|
|
|
if rx_eoc:
|
|
|
|
assert stream.closed
|
|
|
|
try:
|
|
|
|
await stream.send(i)
|
|
|
|
pytest.fail('stream not closed?')
|
|
|
|
except (
|
|
|
|
trio.ClosedResourceError,
|
|
|
|
trio.EndOfChannel,
|
|
|
|
) as send_err:
|
|
|
|
if rx_eoc:
|
|
|
|
assert send_err is stream._eoc
|
|
|
|
else:
|
|
|
|
assert send_err is stream._closed
|
|
|
|
|
|
|
|
raise KeyboardInterrupt
|
2023-01-26 22:48:08 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
trio.run(main)
|