Hack asyncio to not abandon a guest-mode run? #2

Open
goodboy wants to merge 42 commits from aio_abandons into runtime_to_msgspec
41 changed files with 3122 additions and 1163 deletions

View File

@ -2,7 +2,10 @@ import asyncio
import trio
import tractor
from tractor import to_asyncio
from tractor import (
to_asyncio,
Portal,
)
async def aio_sleep_forever():
@ -43,7 +46,7 @@ async def bp_then_error(
@tractor.context
async def trio_ctx(
ctx: tractor.Context,
bp_before_started: bool = False,
bp_before_started: bool = True,
):
# this will block until the ``asyncio`` task sends a "first"
@ -57,7 +60,6 @@ async def trio_ctx(
trio.open_nursery() as n,
):
assert first == 'start'
if bp_before_started:
@ -73,23 +75,24 @@ async def trio_ctx(
async def main(
bps_all_over: bool = False,
bps_all_over: bool = True,
) -> None:
async with tractor.open_nursery(
# debug_mode=True,
debug_mode=True,
maybe_enable_greenback=True,
# loglevel='devx',
) as n:
p = await n.start_actor(
ptl: Portal = await n.start_actor(
'aio_daemon',
enable_modules=[__name__],
infect_asyncio=True,
debug_mode=True,
loglevel='cancel',
# loglevel='cancel',
)
async with p.open_context(
async with ptl.open_context(
trio_ctx,
bp_before_started=bps_all_over,
) as (ctx, first):
@ -105,7 +108,7 @@ async def main(
# TODO: case where we cancel from trio-side while asyncio task
# has debugger lock?
# await p.cancel_actor()
# await ptl.cancel_actor()
if __name__ == '__main__':

View File

@ -25,7 +25,8 @@ async def main():
"""
async with tractor.open_nursery(
debug_mode=True,
loglevel='cancel',
# loglevel='cancel',
# loglevel='devx',
) as n:
p0 = await n.start_actor('bp_forever', enable_modules=[__name__])

View File

@ -0,0 +1,81 @@
'''
Verify we can dump a `stackscope` tree on a hang.
'''
import os
import signal
import trio
import tractor
@tractor.context
async def start_n_shield_hang(
ctx: tractor.Context,
):
# actor: tractor.Actor = tractor.current_actor()
# sync to parent-side task
await ctx.started(os.getpid())
print('Entering shield sleep..')
with trio.CancelScope(shield=True):
await trio.sleep_forever() # in subactor
# XXX NOTE ^^^ since this shields, we expect
# the zombie reaper (aka T800) to engage on
# SIGINT from the user and eventually hard-kill
# this subprocess!
async def main(
from_test: bool = False,
) -> None:
async with (
tractor.open_nursery(
debug_mode=True,
enable_stack_on_sig=True,
# maybe_enable_greenback=False,
loglevel='devx',
) as an,
):
ptl: tractor.Portal = await an.start_actor(
'hanger',
enable_modules=[__name__],
debug_mode=True,
)
async with ptl.open_context(
start_n_shield_hang,
) as (ctx, cpid):
_, proc, _ = an._children[ptl.chan.uid]
assert cpid == proc.pid
print(
'Yo my child hanging..?\n'
'Sending SIGUSR1 to see a tree-trace!\n'
)
# XXX simulate the wrapping test's "user actions"
# (i.e. if a human didn't run this manually but wants to
# know what they should do to reproduce test behaviour)
if from_test:
os.kill(
cpid,
signal.SIGUSR1,
)
# simulate user cancelling program
await trio.sleep(0.5)
os.kill(
os.getpid(),
signal.SIGINT,
)
else:
# actually let user send the ctl-c
await trio.sleep_forever() # in root
if __name__ == '__main__':
trio.run(main)

View File

@ -4,6 +4,13 @@ import time
import trio
import tractor
# TODO: only import these when not running from test harness?
# can we detect `pexpect` usage maybe?
# from tractor.devx._debug import (
# get_lock,
# get_debug_req,
# )
def sync_pause(
use_builtin: bool = False,
@ -18,7 +25,13 @@ def sync_pause(
breakpoint(hide_tb=hide_tb)
else:
# TODO: maybe for testing some kind of cm style interface
# where the `._set_trace()` call doesn't happen until block
# exit?
# assert get_lock().ctx_in_debug is None
# assert get_debug_req().repl is None
tractor.pause_from_sync()
# assert get_debug_req().repl is None
if error:
raise RuntimeError('yoyo sync code error')
@ -41,10 +54,11 @@ async def start_n_sync_pause(
async def main() -> None:
async with (
tractor.open_nursery(
# NOTE: required for pausing from sync funcs
maybe_enable_greenback=True,
debug_mode=True,
# loglevel='cancel',
maybe_enable_greenback=True,
enable_stack_on_sig=True,
# loglevel='warning',
# loglevel='devx',
) as an,
trio.open_nursery() as tn,
):
@ -138,7 +152,9 @@ async def main() -> None:
# the case 2. from above still exists!
use_builtin=True,
),
abandon_on_cancel=False,
# TODO: with this `False` we can hang!??!
# abandon_on_cancel=False,
abandon_on_cancel=True,
thread_name='inline_root_bg_thread',
)

View File

@ -9,7 +9,7 @@ async def main(service_name):
async with tractor.open_nursery() as an:
await an.start_actor(service_name)
async with tractor.get_arbiter('127.0.0.1', 1616) as portal:
async with tractor.get_registry('127.0.0.1', 1616) as portal:
print(f"Arbiter is listening on {portal.channel}")
async with tractor.wait_for_actor(service_name) as sockaddr:

View File

View File

@ -0,0 +1,168 @@
'''
`tractor.devx.*` tooling sub-pkg test space.
'''
from typing import (
Callable,
)
import pytest
from pexpect.exceptions import (
TIMEOUT,
)
from pexpect.spawnbase import SpawnBase
from tractor._testing import (
mk_cmd,
)
@pytest.fixture
def spawn(
start_method,
testdir: pytest.Testdir,
reg_addr: tuple[str, int],
) -> Callable[[str], None]:
'''
Use the `pexpect` module shipped via `testdir.spawn()` to
run an `./examples/..` script by name.
'''
if start_method != 'trio':
pytest.skip(
'`pexpect` based tests only supported on `trio` backend'
)
def _spawn(
cmd: str,
**mkcmd_kwargs,
):
return testdir.spawn(
cmd=mk_cmd(
cmd,
**mkcmd_kwargs,
),
expect_timeout=3,
)
# such that test-dep can pass input script name.
return _spawn
@pytest.fixture(
params=[False, True],
ids='ctl-c={}'.format,
)
def ctlc(
request,
ci_env: bool,
) -> bool:
use_ctlc = request.param
node = request.node
markers = node.own_markers
for mark in markers:
if mark.name == 'has_nested_actors':
pytest.skip(
f'Test {node} has nested actors and fails with Ctrl-C.\n'
f'The test can sometimes run fine locally but until'
' we solve' 'this issue this CI test will be xfail:\n'
'https://github.com/goodboy/tractor/issues/320'
)
if use_ctlc:
# XXX: disable pygments highlighting for auto-tests
# since some envs (like actions CI) will struggle
# the the added color-char encoding..
from tractor.devx._debug import TractorConfig
TractorConfig.use_pygements = False
yield use_ctlc
def expect(
child,
# normally a `pdb` prompt by default
patt: str,
**kwargs,
) -> None:
'''
Expect wrapper that prints last seen console
data before failing.
'''
try:
child.expect(
patt,
**kwargs,
)
except TIMEOUT:
before = str(child.before.decode())
print(before)
raise
def in_prompt_msg(
child: SpawnBase,
parts: list[str],
pause_on_false: bool = False,
err_on_false: bool = False,
print_prompt_on_false: bool = True,
) -> bool:
'''
Predicate check if (the prompt's) std-streams output has all
`str`-parts in it.
Can be used in test asserts for bulk matching expected
log/REPL output for a given `pdb` interact point.
'''
__tracebackhide__: bool = False
before: str = str(child.before.decode())
for part in parts:
if part not in before:
if pause_on_false:
import pdbp
pdbp.set_trace()
if print_prompt_on_false:
print(before)
if err_on_false:
raise ValueError(
f'Could not find pattern in `before` output?\n'
f'part: {part!r}\n'
)
return False
return True
# TODO: todo support terminal color-chars stripping so we can match
# against call stack frame output from the the 'll' command the like!
# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789
def assert_before(
child: SpawnBase,
patts: list[str],
**kwargs,
) -> None:
__tracebackhide__: bool = False
assert in_prompt_msg(
child=child,
parts=patts,
# since this is an "assert" helper ;)
err_on_false=True,
**kwargs
)

View File

@ -13,11 +13,9 @@ TODO:
from functools import partial
import itertools
import platform
import pathlib
import time
import pytest
import pexpect
from pexpect.exceptions import (
TIMEOUT,
EOF,
@ -28,12 +26,14 @@ from tractor.devx._debug import (
_crash_msg,
_repl_fail_msg,
)
from tractor._testing import (
examples_dir,
)
from conftest import (
_ci_env,
)
from .conftest import (
expect,
in_prompt_msg,
assert_before,
)
# TODO: The next great debugger audit could be done by you!
# - recurrent entry to breakpoint() from single actor *after* and an
@ -52,15 +52,6 @@ if platform.system() == 'Windows':
)
def mk_cmd(ex_name: str) -> str:
'''
Generate a command suitable to pass to ``pexpect.spawn()``.
'''
script_path: pathlib.Path = examples_dir() / 'debugging' / f'{ex_name}.py'
return ' '.join(['python', str(script_path)])
# TODO: was trying to this xfail style but some weird bug i see in CI
# that's happening at collect time.. pretty soon gonna dump actions i'm
# thinkin...
@ -79,142 +70,9 @@ has_nested_actors = pytest.mark.has_nested_actors
# )
@pytest.fixture
def spawn(
start_method,
testdir,
reg_addr,
) -> 'pexpect.spawn':
if start_method != 'trio':
pytest.skip(
"Debugger tests are only supported on the trio backend"
)
def _spawn(cmd):
return testdir.spawn(
cmd=mk_cmd(cmd),
expect_timeout=3,
)
return _spawn
PROMPT = r"\(Pdb\+\)"
def expect(
child,
# prompt by default
patt: str = PROMPT,
**kwargs,
) -> None:
'''
Expect wrapper that prints last seen console
data before failing.
'''
try:
child.expect(
patt,
**kwargs,
)
except TIMEOUT:
before = str(child.before.decode())
print(before)
raise
def in_prompt_msg(
prompt: str,
parts: list[str],
pause_on_false: bool = False,
print_prompt_on_false: bool = True,
) -> bool:
'''
Predicate check if (the prompt's) std-streams output has all
`str`-parts in it.
Can be used in test asserts for bulk matching expected
log/REPL output for a given `pdb` interact point.
'''
__tracebackhide__: bool = False
for part in parts:
if part not in prompt:
if pause_on_false:
import pdbp
pdbp.set_trace()
if print_prompt_on_false:
print(prompt)
return False
return True
# TODO: todo support terminal color-chars stripping so we can match
# against call stack frame output from the the 'll' command the like!
# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789
def assert_before(
child,
patts: list[str],
**kwargs,
) -> None:
__tracebackhide__: bool = False
# as in before the prompt end
before: str = str(child.before.decode())
assert in_prompt_msg(
prompt=before,
parts=patts,
**kwargs
)
@pytest.fixture(
params=[False, True],
ids='ctl-c={}'.format,
)
def ctlc(
request,
ci_env: bool,
) -> bool:
use_ctlc = request.param
node = request.node
markers = node.own_markers
for mark in markers:
if mark.name == 'has_nested_actors':
pytest.skip(
f'Test {node} has nested actors and fails with Ctrl-C.\n'
f'The test can sometimes run fine locally but until'
' we solve' 'this issue this CI test will be xfail:\n'
'https://github.com/goodboy/tractor/issues/320'
)
if use_ctlc:
# XXX: disable pygments highlighting for auto-tests
# since some envs (like actions CI) will struggle
# the the added color-char encoding..
from tractor.devx._debug import TractorConfig
TractorConfig.use_pygements = False
yield use_ctlc
@pytest.mark.parametrize(
'user_in_out',
[
@ -238,14 +96,15 @@ def test_root_actor_error(
# scan for the prompt
expect(child, PROMPT)
before = str(child.before.decode())
# make sure expected logging and error arrives
assert in_prompt_msg(
before,
[_crash_msg, "('root'"]
child,
[
_crash_msg,
"('root'",
'AssertionError',
]
)
assert 'AssertionError' in before
# send user command
child.sendline(user_input)
@ -279,7 +138,7 @@ def test_root_actor_bp(spawn, user_in_out):
child.expect('\r\n')
# process should exit
child.expect(pexpect.EOF)
child.expect(EOF)
if expect_err_str is None:
assert 'Error' not in str(child.before)
@ -299,7 +158,9 @@ def do_ctlc(
# needs some further investigation potentially...
expect_prompt: bool = not _ci_env,
) -> None:
) -> str|None:
before: str|None = None
# make sure ctl-c sends don't do anything but repeat output
for _ in range(count):
@ -309,15 +170,18 @@ def do_ctlc(
# TODO: figure out why this makes CI fail..
# if you run this test manually it works just fine..
if expect_prompt:
before = str(child.before.decode())
time.sleep(delay)
child.expect(PROMPT)
before = str(child.before.decode())
time.sleep(delay)
if patt:
# should see the last line on console
assert patt in before
# return the console content up to the final prompt
return before
def test_root_actor_bp_forever(
spawn,
@ -358,7 +222,7 @@ def test_root_actor_bp_forever(
# quit out of the loop
child.sendline('q')
child.expect(pexpect.EOF)
child.expect(EOF)
@pytest.mark.parametrize(
@ -380,10 +244,12 @@ def test_subactor_error(
# scan for the prompt
child.expect(PROMPT)
before = str(child.before.decode())
assert in_prompt_msg(
before,
[_crash_msg, "('name_error'"]
child,
[
_crash_msg,
"('name_error'",
]
)
if do_next:
@ -402,17 +268,15 @@ def test_subactor_error(
child.sendline('continue')
child.expect(PROMPT)
before = str(child.before.decode())
# root actor gets debugger engaged
assert in_prompt_msg(
before,
[_crash_msg, "('root'"]
)
# error is a remote error propagated from the subactor
assert in_prompt_msg(
before,
[_crash_msg, "('name_error'"]
child,
[
_crash_msg,
# root actor gets debugger engaged
"('root'",
# error is a remote error propagated from the subactor
"('name_error'",
]
)
# another round
@ -423,7 +287,7 @@ def test_subactor_error(
child.expect('\r\n')
# process should exit
child.expect(pexpect.EOF)
child.expect(EOF)
def test_subactor_breakpoint(
@ -433,14 +297,11 @@ def test_subactor_breakpoint(
"Single subactor with an infinite breakpoint loop"
child = spawn('subactor_breakpoint')
# scan for the prompt
child.expect(PROMPT)
before = str(child.before.decode())
assert in_prompt_msg(
before,
[_pause_msg, "('breakpoint_forever'"]
child,
[_pause_msg,
"('breakpoint_forever'",]
)
# do some "next" commands to demonstrate recurrent breakpoint
@ -456,9 +317,8 @@ def test_subactor_breakpoint(
for _ in range(5):
child.sendline('continue')
child.expect(PROMPT)
before = str(child.before.decode())
assert in_prompt_msg(
before,
child,
[_pause_msg, "('breakpoint_forever'"]
)
@ -471,9 +331,8 @@ def test_subactor_breakpoint(
# child process should exit but parent will capture pdb.BdbQuit
child.expect(PROMPT)
before = str(child.before.decode())
assert in_prompt_msg(
before,
child,
['RemoteActorError:',
"('breakpoint_forever'",
'bdb.BdbQuit',]
@ -486,11 +345,10 @@ def test_subactor_breakpoint(
child.sendline('c')
# process should exit
child.expect(pexpect.EOF)
child.expect(EOF)
before = str(child.before.decode())
assert in_prompt_msg(
before,
child,
['RemoteActorError:',
"('breakpoint_forever'",
'bdb.BdbQuit',]
@ -514,7 +372,7 @@ def test_multi_subactors(
before = str(child.before.decode())
assert in_prompt_msg(
before,
child,
[_pause_msg, "('breakpoint_forever'"]
)
@ -535,12 +393,14 @@ def test_multi_subactors(
# first name_error failure
child.expect(PROMPT)
before = str(child.before.decode())
assert in_prompt_msg(
before,
[_crash_msg, "('name_error'"]
child,
[
_crash_msg,
"('name_error'",
"NameError",
]
)
assert "NameError" in before
if ctlc:
do_ctlc(child)
@ -564,9 +424,8 @@ def test_multi_subactors(
# breakpoint loop should re-engage
child.sendline('c')
child.expect(PROMPT)
before = str(child.before.decode())
assert in_prompt_msg(
before,
child,
[_pause_msg, "('breakpoint_forever'"]
)
@ -629,7 +488,7 @@ def test_multi_subactors(
# process should exit
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
# repeat of previous multierror for final output
assert_before(child, [
@ -659,25 +518,28 @@ def test_multi_daemon_subactors(
# the root's tty lock first so anticipate either crash
# message on the first entry.
bp_forev_parts = [_pause_msg, "('bp_forever'"]
bp_forev_parts = [
_pause_msg,
"('bp_forever'",
]
bp_forev_in_msg = partial(
in_prompt_msg,
parts=bp_forev_parts,
)
name_error_msg = "NameError: name 'doggypants' is not defined"
name_error_parts = [name_error_msg]
name_error_msg: str = "NameError: name 'doggypants' is not defined"
name_error_parts: list[str] = [name_error_msg]
before = str(child.before.decode())
if bp_forev_in_msg(prompt=before):
if bp_forev_in_msg(child=child):
next_parts = name_error_parts
elif name_error_msg in before:
next_parts = bp_forev_parts
else:
raise ValueError("Neither log msg was found !?")
raise ValueError('Neither log msg was found !?')
if ctlc:
do_ctlc(child)
@ -746,14 +608,12 @@ def test_multi_daemon_subactors(
# wait for final error in root
# where it crashs with boxed error
while True:
try:
child.sendline('c')
child.expect(PROMPT)
assert_before(
child,
bp_forev_parts
)
except AssertionError:
child.sendline('c')
child.expect(PROMPT)
if not in_prompt_msg(
child,
bp_forev_parts
):
break
assert_before(
@ -769,7 +629,7 @@ def test_multi_daemon_subactors(
)
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
@has_nested_actors
@ -845,7 +705,7 @@ def test_multi_subactors_root_errors(
])
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
assert_before(child, [
# "Attaching to pdb in crashed actor: ('root'",
@ -934,10 +794,13 @@ def test_root_nursery_cancels_before_child_releases_tty_lock(
child = spawn('root_cancelled_but_child_is_in_tty_lock')
child.expect(PROMPT)
before = str(child.before.decode())
assert "NameError: name 'doggypants' is not defined" in before
assert "tractor._exceptions.RemoteActorError: ('name_error'" not in before
assert_before(
child,
[
"NameError: name 'doggypants' is not defined",
"tractor._exceptions.RemoteActorError: ('name_error'",
],
)
time.sleep(0.5)
if ctlc:
@ -975,7 +838,7 @@ def test_root_nursery_cancels_before_child_releases_tty_lock(
for i in range(3):
try:
child.expect(pexpect.EOF, timeout=0.5)
child.expect(EOF, timeout=0.5)
break
except TIMEOUT:
child.sendline('c')
@ -1017,7 +880,7 @@ def test_root_cancels_child_context_during_startup(
do_ctlc(child)
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
def test_different_debug_mode_per_actor(
@ -1028,9 +891,8 @@ def test_different_debug_mode_per_actor(
child.expect(PROMPT)
# only one actor should enter the debugger
before = str(child.before.decode())
assert in_prompt_msg(
before,
child,
[_crash_msg, "('debugged_boi'", "RuntimeError"],
)
@ -1038,9 +900,7 @@ def test_different_debug_mode_per_actor(
do_ctlc(child)
child.sendline('c')
child.expect(pexpect.EOF)
before = str(child.before.decode())
child.expect(EOF)
# NOTE: this debugged actor error currently WON'T show up since the
# root will actually cancel and terminate the nursery before the error
@ -1085,17 +945,16 @@ def test_pause_from_sync(
)
if ctlc:
do_ctlc(child)
# ^NOTE^ subactor not spawned yet; don't need extra delay.
child.sendline('c')
# first `await tractor.pause()` inside `p.open_context()` body
child.expect(PROMPT)
# XXX shouldn't see gb loaded message with PDB loglevel!
before = str(child.before.decode())
assert not in_prompt_msg(
before,
child,
['`greenback` portal opened!'],
)
# should be same root task
@ -1109,7 +968,27 @@ def test_pause_from_sync(
)
if ctlc:
do_ctlc(child)
do_ctlc(
child,
# NOTE: setting this to 0 (or some other sufficient
# small val) can cause the test to fail since the
# `subactor` suffers a race where the root/parent
# sends an actor-cancel prior to it hitting its pause
# point; by def the value is 0.1
delay=0.4,
)
# XXX, fwiw without a brief sleep here the SIGINT might actually
# trigger "subactor" cancellation by its parent before the
# shield-handler is engaged.
#
# => similar to the `delay` input to `do_ctlc()` below, setting
# this too low can cause the test to fail since the `subactor`
# suffers a race where the root/parent sends an actor-cancel
# prior to the context task hitting its pause point (and thus
# engaging the `sigint_shield()` handler in time); this value
# seems be good enuf?
time.sleep(0.6)
# one of the bg thread or subactor should have
# `Lock.acquire()`-ed
@ -1128,32 +1007,48 @@ def test_pause_from_sync(
"('root'",
],
}
conts: int = 0 # for debugging below matching logic on failure
while attach_patts:
child.sendline('c')
conts += 1
child.expect(PROMPT)
before = str(child.before.decode())
for key in attach_patts.copy():
for key in attach_patts:
if key in before:
attach_key: str = key
expected_patts: str = attach_patts.pop(key)
assert_before(
child,
[_pause_msg] + expected_patts
[_pause_msg]
+
expected_patts
)
break
else:
pytest.fail(
f'No keys found?\n\n'
f'{attach_patts.keys()}\n\n'
f'{before}\n'
)
# ensure no other task/threads engaged a REPL
# at the same time as the one that was detected above.
for key, other_patts in attach_patts.items():
for key, other_patts in attach_patts.copy().items():
assert not in_prompt_msg(
before,
child,
other_patts,
)
if ctlc:
do_ctlc(child)
do_ctlc(
child,
patt=attach_key,
# NOTE same as comment above
delay=0.4,
)
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
def test_post_mortem_api(
@ -1258,7 +1153,7 @@ def test_post_mortem_api(
# )
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
def test_shield_pause(
@ -1333,9 +1228,26 @@ def test_shield_pause(
]
)
child.sendline('c')
child.expect(pexpect.EOF)
child.expect(EOF)
# TODO: better error for "non-ideal" usage from the root actor.
# -[ ] if called from an async scope emit a message that suggests
# using `await tractor.pause()` instead since it's less overhead
# (in terms of `greenback` and/or extra threads) and if it's from
# a sync scope suggest that usage must first call
# `ensure_portal()` in the (eventual parent) async calling scope?
def test_sync_pause_from_bg_task_in_root_actor_():
'''
When used from the root actor, normally we can only implicitly
support `.pause_from_sync()` from the main-parent-task (that
opens the runtime via `open_root_actor()`) since `greenback`
requires a `.ensure_portal()` call per `trio.Task` where it is
used.
'''
...
# TODO: needs ANSI code stripping tho, see `assert_before()` # above!
def test_correct_frames_below_hidden():
'''

View File

@ -0,0 +1,120 @@
'''
That "native" runtime-hackin toolset better be dang useful!
Verify the funtion of a variety of "developer-experience" tools we
offer from the `.devx` sub-pkg:
- use of the lovely `stackscope` for dumping actor `trio`-task trees
during operation and hangs.
TODO:
- demonstration of `CallerInfo` call stack frame filtering such that
for logging and REPL purposes a user sees exactly the layers needed
when debugging a problem inside the stack vs. in their app.
'''
import os
import signal
from .conftest import (
expect,
assert_before,
# in_prompt_msg,
)
def test_shield_pause(
spawn,
):
'''
Verify the `tractor.pause()/.post_mortem()` API works inside an
already cancelled `trio.CancelScope` and that you can step to the
next checkpoint wherein the cancelled will get raised.
'''
child = spawn(
'shield_hang_in_sub'
)
expect(
child,
'Yo my child hanging..?',
)
assert_before(
child,
[
'Entering shield sleep..',
'Enabling trace-trees on `SIGUSR1` since `stackscope` is installed @',
]
)
print(
'Sending SIGUSR1 to see a tree-trace!',
)
os.kill(
child.pid,
signal.SIGUSR1,
)
expect(
child,
# end-of-tree delimiter
"------ \('root', ",
)
assert_before(
child,
[
'Trying to dump `stackscope` tree..',
'Dumping `stackscope` tree for actor',
"('root'", # uid line
# parent block point (non-shielded)
'await trio.sleep_forever() # in root',
]
)
# expect(
# child,
# # relay to the sub should be reported
# 'Relaying `SIGUSR1`[10] to sub-actor',
# )
expect(
child,
# end-of-tree delimiter
"------ \('hanger', ",
)
assert_before(
child,
[
# relay to the sub should be reported
'Relaying `SIGUSR1`[10] to sub-actor',
"('hanger'", # uid line
# hanger LOC where it's shield-halted
'await trio.sleep_forever() # in subactor',
]
)
# breakpoint()
# simulate the user sending a ctl-c to the hanging program.
# this should result in the terminator kicking in since
# the sub is shield blocking and can't respond to SIGINT.
os.kill(
child.pid,
signal.SIGINT,
)
expect(
child,
'Shutting down actor runtime',
timeout=6,
)
assert_before(
child,
[
'raise KeyboardInterrupt',
# 'Shutting down actor runtime',
'#T-800 deployed to collect zombie B0',
"'--uid', \"('hanger',",
]
)

View File

@ -91,7 +91,8 @@ def test_ipc_channel_break_during_stream(
# non-`trio` spawners should never hit the hang condition that
# requires the user to do ctl-c to cancel the actor tree.
expect_final_exc = trio.ClosedResourceError
# expect_final_exc = trio.ClosedResourceError
expect_final_exc = tractor.TransportClosed
mod: ModuleType = import_path(
examples_dir() / 'advanced_faults'
@ -157,7 +158,7 @@ def test_ipc_channel_break_during_stream(
if pre_aclose_msgstream:
expect_final_exc = KeyboardInterrupt
# NOTE when the parent IPC side dies (even if the child's does as well
# NOTE when the parent IPC side dies (even if the child does as well
# but the child fails BEFORE the parent) we always expect the
# IPC layer to raise a closed-resource, NEVER do we expect
# a stop msg since the parent-side ctx apis will error out
@ -169,7 +170,8 @@ def test_ipc_channel_break_during_stream(
and
ipc_break['break_child_ipc_after'] is False
):
expect_final_exc = trio.ClosedResourceError
# expect_final_exc = trio.ClosedResourceError
expect_final_exc = tractor.TransportClosed
# BOTH but, PARENT breaks FIRST
elif (
@ -180,7 +182,8 @@ def test_ipc_channel_break_during_stream(
ipc_break['break_parent_ipc_after']
)
):
expect_final_exc = trio.ClosedResourceError
# expect_final_exc = trio.ClosedResourceError
expect_final_exc = tractor.TransportClosed
with pytest.raises(
expected_exception=(
@ -199,8 +202,8 @@ def test_ipc_channel_break_during_stream(
**ipc_break,
)
)
except KeyboardInterrupt as kbi:
_err = kbi
except KeyboardInterrupt as _kbi:
kbi = _kbi
if expect_final_exc is not KeyboardInterrupt:
pytest.fail(
'Rxed unexpected KBI !?\n'
@ -209,6 +212,21 @@ def test_ipc_channel_break_during_stream(
raise
except tractor.TransportClosed as _tc:
tc = _tc
if expect_final_exc is KeyboardInterrupt:
pytest.fail(
'Unexpected transport failure !?\n'
f'{repr(tc)}'
)
cause: Exception = tc.__cause__
assert (
type(cause) is trio.ClosedResourceError
and
cause.args[0] == 'another task closed this fd'
)
raise
# get raw instance from pytest wrapper
value = excinfo.value
if isinstance(value, ExceptionGroup):

View File

@ -11,9 +11,6 @@ from typing import (
Type,
Union,
)
from contextvars import (
Context,
)
from msgspec import (
structs,
@ -27,6 +24,7 @@ import tractor
from tractor import (
_state,
MsgTypeError,
Context,
)
from tractor.msg import (
_codec,
@ -41,7 +39,7 @@ from tractor.msg import (
from tractor.msg.types import (
_payload_msgs,
log,
Msg,
PayloadMsg,
Started,
mk_msg_spec,
)
@ -61,7 +59,7 @@ def mk_custom_codec(
uid: tuple[str, str] = tractor.current_actor().uid
# XXX NOTE XXX: despite defining `NamespacePath` as a type
# field on our `Msg.pld`, we still need a enc/dec_hook() pair
# field on our `PayloadMsg.pld`, we still need a enc/dec_hook() pair
# to cast to/from that type on the wire. See the docs:
# https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types
@ -321,12 +319,12 @@ def dec_type_union(
import importlib
types: list[Type] = []
for type_name in type_names:
for ns in [
for mod in [
typing,
importlib.import_module(__name__),
]:
if type_ref := getattr(
ns,
mod,
type_name,
False,
):
@ -744,7 +742,7 @@ def chk_pld_type(
# 'Error', .pld: ErrorData
codec: MsgCodec = mk_codec(
# NOTE: this ONLY accepts `Msg.pld` fields of a specified
# NOTE: this ONLY accepts `PayloadMsg.pld` fields of a specified
# type union.
ipc_pld_spec=payload_spec,
)
@ -752,7 +750,7 @@ def chk_pld_type(
# make a one-off dec to compare with our `MsgCodec` instance
# which does the below `mk_msg_spec()` call internally
ipc_msg_spec: Union[Type[Struct]]
msg_types: list[Msg[payload_spec]]
msg_types: list[PayloadMsg[payload_spec]]
(
ipc_msg_spec,
msg_types,
@ -761,7 +759,7 @@ def chk_pld_type(
)
_enc = msgpack.Encoder()
_dec = msgpack.Decoder(
type=ipc_msg_spec or Any, # like `Msg[Any]`
type=ipc_msg_spec or Any, # like `PayloadMsg[Any]`
)
assert (
@ -806,7 +804,7 @@ def chk_pld_type(
'cid': '666',
'pld': pld,
}
enc_msg: Msg = typedef(**kwargs)
enc_msg: PayloadMsg = typedef(**kwargs)
_wire_bytes: bytes = _enc.encode(enc_msg)
wire_bytes: bytes = codec.enc.encode(enc_msg)
@ -883,25 +881,16 @@ def test_limit_msgspec():
debug_mode=True
):
# ensure we can round-trip a boxing `Msg`
# ensure we can round-trip a boxing `PayloadMsg`
assert chk_pld_type(
# Msg,
Any,
None,
payload_spec=Any,
pld=None,
expect_roundtrip=True,
)
# TODO: don't need this any more right since
# `msgspec>=0.15` has the nice generics stuff yah??
#
# manually override the type annot of the payload
# field and ensure it propagates to all msg-subtypes.
# Msg.__annotations__['pld'] = Any
# verify that a mis-typed payload value won't decode
assert not chk_pld_type(
# Msg,
int,
payload_spec=int,
pld='doggy',
)
@ -913,18 +902,16 @@ def test_limit_msgspec():
value: Any
assert not chk_pld_type(
# Msg,
CustomPayload,
payload_spec=CustomPayload,
pld='doggy',
)
assert chk_pld_type(
# Msg,
CustomPayload,
payload_spec=CustomPayload,
pld=CustomPayload(name='doggy', value='urmom')
)
# uhh bc we can `.pause_from_sync()` now! :surfer:
# yah, we can `.pause_from_sync()` now!
# breakpoint()
trio.run(main)

View File

@ -26,7 +26,7 @@ async def test_reg_then_unreg(reg_addr):
portal = await n.start_actor('actor', enable_modules=[__name__])
uid = portal.channel.uid
async with tractor.get_arbiter(*reg_addr) as aportal:
async with tractor.get_registry(*reg_addr) as aportal:
# this local actor should be the arbiter
assert actor is aportal.actor
@ -160,7 +160,7 @@ async def spawn_and_check_registry(
async with tractor.open_root_actor(
registry_addrs=[reg_addr],
):
async with tractor.get_arbiter(*reg_addr) as portal:
async with tractor.get_registry(*reg_addr) as portal:
# runtime needs to be up to call this
actor = tractor.current_actor()
@ -298,7 +298,7 @@ async def close_chans_before_nursery(
async with tractor.open_root_actor(
registry_addrs=[reg_addr],
):
async with tractor.get_arbiter(*reg_addr) as aportal:
async with tractor.get_registry(*reg_addr) as aportal:
try:
get_reg = partial(unpack_reg, aportal)

View File

@ -19,7 +19,7 @@ from tractor._testing import (
@pytest.fixture
def run_example_in_subproc(
loglevel: str,
testdir,
testdir: pytest.Testdir,
reg_addr: tuple[str, int],
):

View File

@ -2,19 +2,30 @@
The hipster way to force SC onto the stdlib's "async": 'infection mode'.
'''
from typing import Optional, Iterable, Union
import asyncio
import builtins
from contextlib import ExitStack
import itertools
import importlib
import os
from pathlib import Path
import signal
from typing import (
Callable,
Iterable,
Union,
)
import pytest
import trio
import tractor
from tractor import (
current_actor,
Actor,
to_asyncio,
RemoteActorError,
ContextCancelled,
_state,
)
from tractor.trionics import BroadcastReceiver
from tractor._testing import expect_ctxc
@ -25,8 +36,8 @@ async def sleep_and_err(
# just signature placeholders for compat with
# ``to_asyncio.open_channel_from()``
to_trio: Optional[trio.MemorySendChannel] = None,
from_trio: Optional[asyncio.Queue] = None,
to_trio: trio.MemorySendChannel|None = None,
from_trio: asyncio.Queue|None = None,
):
if to_trio:
@ -36,7 +47,7 @@ async def sleep_and_err(
assert 0
async def sleep_forever():
async def aio_sleep_forever():
await asyncio.sleep(float('inf'))
@ -44,7 +55,7 @@ async def trio_cancels_single_aio_task():
# spawn an ``asyncio`` task to run a func and return result
with trio.move_on_after(.2):
await tractor.to_asyncio.run_task(sleep_forever)
await tractor.to_asyncio.run_task(aio_sleep_forever)
def test_trio_cancels_aio_on_actor_side(reg_addr):
@ -66,14 +77,22 @@ def test_trio_cancels_aio_on_actor_side(reg_addr):
async def asyncio_actor(
target: str,
expect_err: Exception|None = None
) -> None:
assert tractor.current_actor().is_infected_aio()
target = globals()[target]
# ensure internal runtime state is consistent
actor: Actor = tractor.current_actor()
assert (
actor.is_infected_aio()
and
actor._infected_aio
and
_state._runtime_vars['_is_infected_aio']
)
target: Callable = globals()[target]
if '.' in expect_err:
modpath, _, name = expect_err.rpartition('.')
@ -128,7 +147,7 @@ def test_aio_simple_error(reg_addr):
assert err
assert isinstance(err, RemoteActorError)
assert err.boxed_type == AssertionError
assert err.boxed_type is AssertionError
def test_tractor_cancels_aio(reg_addr):
@ -140,7 +159,7 @@ def test_tractor_cancels_aio(reg_addr):
async with tractor.open_nursery() as n:
portal = await n.run_in_actor(
asyncio_actor,
target='sleep_forever',
target='aio_sleep_forever',
expect_err='trio.Cancelled',
infect_asyncio=True,
)
@ -164,7 +183,7 @@ def test_trio_cancels_aio(reg_addr):
async with tractor.open_nursery() as n:
await n.run_in_actor(
asyncio_actor,
target='sleep_forever',
target='aio_sleep_forever',
expect_err='trio.Cancelled',
infect_asyncio=True,
)
@ -195,7 +214,7 @@ async def trio_ctx(
# spawn another asyncio task for the cuck of it.
n.start_soon(
tractor.to_asyncio.run_task,
sleep_forever,
aio_sleep_forever,
)
await trio.sleep_forever()
@ -272,7 +291,7 @@ def test_context_spawns_aio_task_that_errors(
err = excinfo.value
assert isinstance(err, expect)
assert err.boxed_type == AssertionError
assert err.boxed_type is AssertionError
async def aio_cancel():
@ -281,23 +300,35 @@ async def aio_cancel():
'''
await asyncio.sleep(0.5)
task = asyncio.current_task()
# cancel and enter sleep
task = asyncio.current_task()
task.cancel()
await sleep_forever()
await aio_sleep_forever()
def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr):
'''
When the `asyncio.Task` cancels itself the `trio` side cshould
also cancel and teardown and relay the cancellation cross-process
to the caller (parent).
'''
async def main():
async with tractor.open_nursery() as n:
await n.run_in_actor(
an: tractor.ActorNursery
async with tractor.open_nursery() as an:
p: tractor.Portal = await an.run_in_actor(
asyncio_actor,
target='aio_cancel',
expect_err='tractor.to_asyncio.AsyncioCancelled',
infect_asyncio=True,
)
# NOTE: normally the `an.__aexit__()` waits on the
# portal's result but we do it explicitly here
# to avoid indent levels.
with trio.fail_after(1):
await p.wait_for_result()
with pytest.raises(
expected_exception=(RemoteActorError, ExceptionGroup),
@ -305,7 +336,7 @@ def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr):
trio.run(main)
# might get multiple `trio.Cancelled`s as well inside an inception
err = excinfo.value
err: RemoteActorError|ExceptionGroup = excinfo.value
if isinstance(err, ExceptionGroup):
err = next(itertools.dropwhile(
lambda exc: not isinstance(exc, tractor.RemoteActorError),
@ -313,7 +344,8 @@ def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr):
))
assert err
# ensure boxed error is correct
# relayed boxed error should be our `trio`-task's
# cancel-signal-proxy-equivalent of `asyncio.CancelledError`.
assert err.boxed_type == to_asyncio.AsyncioCancelled
@ -355,7 +387,6 @@ async def push_from_aio_task(
async def stream_from_aio(
exit_early: bool = False,
raise_err: bool = False,
aio_raise_err: bool = False,
@ -466,7 +497,7 @@ def test_trio_error_cancels_intertask_chan(reg_addr):
trio.run(main)
# ensure boxed error type
excinfo.value.boxed_type == Exception
excinfo.value.boxed_type is Exception
def test_trio_closes_early_and_channel_exits(reg_addr):
@ -502,7 +533,7 @@ def test_aio_errors_and_channel_propagates_and_closes(reg_addr):
) as excinfo:
trio.run(main)
excinfo.value.boxed_type == Exception
excinfo.value.boxed_type is Exception
@tractor.context
@ -618,6 +649,242 @@ def test_echoserver_detailed_mechanics(
trio.run(main)
@tractor.context
async def manage_file(
ctx: tractor.Context,
tmp_path_str: str,
send_sigint_to: str,
trio_side_is_shielded: bool = True,
bg_aio_task: bool = False,
):
'''
Start an `asyncio` task that just sleeps after registering a context
with `Actor.lifetime_stack`. Trigger a SIGINT to kill the actor tree
and ensure the stack is closed in the infected mode child.
To verify the teardown state just write a tmpfile to the `testdir`
and delete it on actor close.
'''
tmp_path: Path = Path(tmp_path_str)
tmp_file: Path = tmp_path / f'{" ".join(ctx._actor.uid)}.file'
# create a the tmp file and tell the parent where it's at
assert not tmp_file.is_file()
tmp_file.touch()
stack: ExitStack = current_actor().lifetime_stack
stack.callback(tmp_file.unlink)
await ctx.started((
str(tmp_file),
os.getpid(),
))
# expect to be cancelled from here!
try:
# NOTE: turns out you don't even need to sched an aio task
# since the original issue, even though seemingly was due to
# the guest-run being abandoned + a `._debug.pause()` inside
# `._runtime._async_main()` (which was originally trying to
# debug the `.lifetime_stack` not closing), IS NOT actually
# the core issue?
#
# further notes:
#
# - `trio` only issues the " RuntimeWarning: Trio guest run
# got abandoned without properly finishing... weird stuff
# might happen" IFF you DO run a asyncio task here, BUT
# - the original issue of the `.lifetime_stack` not closing
# will still happen even if you don't run an `asyncio` task
# here even though the "abandon" messgage won't be shown..
#
# => ????? honestly i'm lost but it seems to be some issue
# with `asyncio` and SIGINT..
#
# honestly, this REALLY reminds me why i haven't used
# `asyncio` by choice in years.. XD
#
async with trio.open_nursery() as tn:
if bg_aio_task:
tn.start_soon(
tractor.to_asyncio.run_task,
aio_sleep_forever,
)
# XXX don't-need/doesn't-make-a-diff right
# since we're already doing it from parent?
# if send_sigint_to == 'child':
# os.kill(
# os.getpid(),
# signal.SIGINT,
# )
# XXX spend a half sec doing shielded checkpointing to
# ensure that despite the `trio`-side task ignoring the
# SIGINT, the `asyncio` side won't abandon the guest-run!
if trio_side_is_shielded:
with trio.CancelScope(shield=True):
for i in range(5):
await trio.sleep(0.1)
await trio.sleep_forever()
# signalled manually at the OS level (aka KBI) by the parent actor.
except KeyboardInterrupt:
print('child raised KBI..')
assert tmp_file.exists()
raise
raise RuntimeError('shoulda received a KBI?')
@pytest.mark.parametrize(
'trio_side_is_shielded',
[
False,
True,
],
ids=[
'trio_side_no_shielding',
'trio_side_does_shielded_work',
],
)
@pytest.mark.parametrize(
'send_sigint_to',
[
'child',
'parent',
],
ids='send_SIGINT_to={}'.format,
)
@pytest.mark.parametrize(
'bg_aio_task',
[
False,
# NOTE: (and see notes in `manage_file()` above as well) if
# we FOR SURE SPAWN AN AIO TASK in the child it seems the
# "silent-abandon" case (as is described in detail in
# `to_asyncio.run_as_asyncio_guest()`) does not happen and
# `asyncio`'s loop will at least abandon the `trio` side
# loudly? .. prolly the state-spot to start looking for
# a soln that results in NO ABANDONMENT.. XD
True,
],
ids=[
'bg_aio_task',
'just_trio_slee',
],
)
@pytest.mark.parametrize(
'wait_for_ctx',
[
False,
True,
],
ids=[
'raise_KBI_in_rent',
'wait_for_ctx',
],
)
def test_sigint_closes_lifetime_stack(
tmp_path: Path,
wait_for_ctx: bool,
bg_aio_task: bool,
trio_side_is_shielded: bool,
debug_mode: bool,
send_sigint_to: str,
):
'''
Ensure that an infected child can use the `Actor.lifetime_stack`
to make a file on boot and it's automatically cleaned up by the
actor-lifetime-linked exit stack closure.
'''
async def main():
try:
an: tractor.ActorNursery
async with tractor.open_nursery(
debug_mode=debug_mode,
) as an:
p: tractor.Portal = await an.start_actor(
'file_mngr',
enable_modules=[__name__],
infect_asyncio=True,
)
async with p.open_context(
manage_file,
tmp_path_str=str(tmp_path),
send_sigint_to=send_sigint_to,
bg_aio_task=bg_aio_task,
trio_side_is_shielded=trio_side_is_shielded,
) as (ctx, first):
path_str, cpid = first
tmp_file: Path = Path(path_str)
assert tmp_file.exists()
# XXX originally to simulate what (hopefully)
# the below now triggers.. had to manually
# trigger a SIGINT from a ctl-c in the root.
# await trio.sleep_forever()
# XXX NOTE XXX signal infected-`asyncio` child to
# OS-cancel with SIGINT; this should trigger the
# bad `asyncio` cancel behaviour that can cause
# a guest-run abandon as was seen causing
# shm-buffer leaks in `piker`'s live quote stream
# susbys!
#
await trio.sleep(.2)
pid: int = (
cpid if send_sigint_to == 'child'
else os.getpid()
)
os.kill(
pid,
signal.SIGINT,
)
# XXX CASE 1: without the bug fixed, in
# the non-KBI-raised-in-parent case, this
# timeout should trigger!
if wait_for_ctx:
print('waiting for ctx outcome in parent..')
try:
with trio.fail_after(1):
await ctx.wait_for_result()
except tractor.ContextCancelled as ctxc:
assert ctxc.canceller == ctx.chan.uid
raise
# XXX CASE 2: this seems to be the source of the
# original issue which exhibited BEFORE we put
# a `Actor.cancel_soon()` inside
# `run_as_asyncio_guest()`..
else:
raise KeyboardInterrupt
pytest.fail('should have raised some kinda error?!?')
except (
KeyboardInterrupt,
ContextCancelled,
):
# XXX CASE 2: without the bug fixed, in the
# KBI-raised-in-parent case, the actor teardown should
# never get run (silently abaondoned by `asyncio`..) and
# thus the file should leak!
assert not tmp_file.exists()
assert ctx.maybe_error
trio.run(main)
# TODO: debug_mode tests once we get support for `asyncio`!
#
# -[ ] need tests to wrap both scripts:

View File

@ -38,7 +38,7 @@ async def test_self_is_registered_localportal(reg_addr):
"Verify waiting on the arbiter to register itself using a local portal."
actor = tractor.current_actor()
assert actor.is_arbiter
async with tractor.get_arbiter(*reg_addr) as portal:
async with tractor.get_registry(*reg_addr) as portal:
assert isinstance(portal, tractor._portal.LocalPortal)
with trio.fail_after(0.2):

View File

@ -32,7 +32,7 @@ def test_abort_on_sigint(daemon):
@tractor_test
async def test_cancel_remote_arbiter(daemon, reg_addr):
assert not tractor.current_actor().is_arbiter
async with tractor.get_arbiter(*reg_addr) as portal:
async with tractor.get_registry(*reg_addr) as portal:
await portal.cancel_actor()
time.sleep(0.1)
@ -41,7 +41,7 @@ async def test_cancel_remote_arbiter(daemon, reg_addr):
# no arbiter socket should exist
with pytest.raises(OSError):
async with tractor.get_arbiter(*reg_addr) as portal:
async with tractor.get_registry(*reg_addr) as portal:
pass

View File

@ -285,14 +285,14 @@ def test_basic_payload_spec(
if invalid_started:
msg_type_str: str = 'Started'
bad_value_str: str = '10'
bad_value: int = 10
elif invalid_return:
msg_type_str: str = 'Return'
bad_value_str: str = "'yo'"
bad_value: str = 'yo'
else:
# XXX but should never be used below then..
msg_type_str: str = ''
bad_value_str: str = ''
bad_value: str = ''
maybe_mte: MsgTypeError|None = None
should_raise: Exception|None = (
@ -307,8 +307,10 @@ def test_basic_payload_spec(
raises=should_raise,
ensure_in_message=[
f"invalid `{msg_type_str}` msg payload",
f"value: `{bad_value_str}` does not "
f"match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`",
f'{bad_value}',
f'has type {type(bad_value)!r}',
'not match type-spec',
f'`{msg_type_str}.pld: PldMsg|NoneType`',
],
# only for debug
# post_mortem=True,

View File

@ -30,7 +30,7 @@ from ._streaming import (
stream as stream,
)
from ._discovery import (
get_arbiter as get_arbiter,
get_registry as get_registry,
find_actor as find_actor,
wait_for_actor as wait_for_actor,
query_actor as query_actor,
@ -49,6 +49,7 @@ from ._exceptions import (
ModuleNotExposed as ModuleNotExposed,
MsgTypeError as MsgTypeError,
RemoteActorError as RemoteActorError,
TransportClosed as TransportClosed,
)
from .devx import (
breakpoint as breakpoint,

View File

@ -38,6 +38,7 @@ from collections import deque
from contextlib import (
asynccontextmanager as acm,
)
from contextvars import Token
from dataclasses import (
dataclass,
field,
@ -45,6 +46,7 @@ from dataclasses import (
from functools import partial
import inspect
from pprint import pformat
import textwrap
from typing import (
Any,
AsyncGenerator,
@ -121,10 +123,19 @@ class Unresolved:
@dataclass
class Context:
'''
An inter-actor, SC transitive, `Task` communication context.
An inter-actor, SC transitive, `trio.Task` (pair)
communication context.
NB: This class should **never be instatiated directly**, it is allocated
by the runtime in 2 ways:
(We've also considered other names and ideas:
- "communicating tasks scope": cts
- "distributed task scope": dts
- "communicating tasks context": ctc
**Got a better idea for naming? Make an issue dawg!**
)
NB: This class should **never be instatiated directly**, it is
allocated by the runtime in 2 ways:
- by entering `Portal.open_context()` which is the primary
public API for any "parent" task or,
- by the RPC machinery's `._rpc._invoke()` as a `ctx` arg
@ -210,6 +221,16 @@ class Context:
# more the the `Context` is needed?
_portal: Portal | None = None
@property
def portal(self) -> Portal|None:
'''
Return any wrapping memory-`Portal` if this is
a 'parent'-side task which called `Portal.open_context()`,
otherwise `None`.
'''
return self._portal
# NOTE: each side of the context has its own cancel scope
# which is exactly the primitive that allows for
# cross-actor-task-supervision and thus SC.
@ -299,6 +320,8 @@ class Context:
# boxed exception. NOW, it's used for spawning overrun queuing
# tasks when `.allow_overruns == True` !!!
_scope_nursery: trio.Nursery|None = None
# ^-TODO-^ change name?
# -> `._scope_tn` "scope task nursery"
# streaming overrun state tracking
_in_overrun: bool = False
@ -313,6 +336,7 @@ class Context:
extra_fields: dict[str, Any]|None = None,
# ^-TODO-^ some built-in extra state fields
# we'll want in some devx specific cases?
indent: str|None = None,
) -> str:
ds: str = '='
@ -332,7 +356,6 @@ class Context:
show_error_fields=True
)
fmtstr: str = (
f'<Context(\n'
# f'\n'
# f' ---\n'
f' |_ipc: {self.dst_maddr}\n'
@ -379,11 +402,20 @@ class Context:
f' {key}{ds}{val!r}\n'
)
if indent:
fmtstr = textwrap.indent(
fmtstr,
prefix=indent,
)
return (
'<Context(\n'
+
fmtstr
+
')>\n'
f'{indent})>\n'
)
# NOTE: making this return a value that can be passed to
# `eval()` is entirely **optional** dawggg B)
# https://docs.python.org/3/library/functions.html#repr
@ -408,10 +440,23 @@ class Context:
'''
return self._cancel_called
@cancel_called.setter
def cancel_called(self, val: bool) -> None:
'''
Set the self-cancelled request `bool` value.
'''
# to debug who frickin sets it..
# if val:
# from .devx import pause_from_sync
# pause_from_sync()
self._cancel_called = val
@property
def canceller(self) -> tuple[str, str]|None:
'''
``Actor.uid: tuple[str, str]`` of the (remote)
`Actor.uid: tuple[str, str]` of the (remote)
actor-process who's task was cancelled thus causing this
(side of the) context to also be cancelled.
@ -515,7 +560,7 @@ class Context:
# the local scope was never cancelled
# and instead likely we received a remote side
# # cancellation that was raised inside `.result()`
# # cancellation that was raised inside `.wait_for_result()`
# or (
# (se := self._local_error)
# and se is re
@ -585,6 +630,10 @@ class Context:
self,
error: BaseException,
# TODO: manual toggle for cases where we wouldn't normally
# mark ourselves cancelled but want to?
# set_cancel_called: bool = False,
) -> None:
'''
(Maybe) cancel this local scope due to a received remote
@ -603,7 +652,7 @@ class Context:
- `Portal.open_context()`
- `Portal.result()`
- `Context.open_stream()`
- `Context.result()`
- `Context.wait_for_result()`
when called/closed by actor local task(s).
@ -729,7 +778,7 @@ class Context:
# Cancel the local `._scope`, catch that
# `._scope.cancelled_caught` and re-raise any remote error
# once exiting (or manually calling `.result()`) the
# once exiting (or manually calling `.wait_for_result()`) the
# `.open_context()` block.
cs: trio.CancelScope = self._scope
if (
@ -764,8 +813,9 @@ class Context:
# `trio.Cancelled` subtype here ;)
# https://github.com/goodboy/tractor/issues/368
message: str = 'Cancelling `Context._scope` !\n\n'
# from .devx import pause_from_sync
# pause_from_sync()
self._scope.cancel()
else:
message: str = 'NOT cancelling `Context._scope` !\n\n'
# from .devx import mk_pdb
@ -845,15 +895,15 @@ class Context:
@property
def repr_api(self) -> str:
return 'Portal.open_context()'
# TODO: use `.dev._frame_stack` scanning to find caller!
# ci: CallerInfo|None = self._caller_info
# if ci:
# return (
# f'{ci.api_nsp}()\n'
# )
# TODO: use `.dev._frame_stack` scanning to find caller!
return 'Portal.open_context()'
async def cancel(
self,
timeout: float = 0.616,
@ -889,16 +939,18 @@ class Context:
'''
side: str = self.side
self._cancel_called: bool = True
# XXX for debug via the `@.setter`
self.cancel_called = True
header: str = (
f'Cancelling ctx with peer from {side.upper()} side\n\n'
f'Cancelling ctx from {side.upper()}-side\n'
)
reminfo: str = (
# ' =>\n'
f'Context.cancel() => {self.chan.uid}\n'
# f'Context.cancel() => {self.chan.uid}\n'
f'c)=> {self.chan.uid}\n'
# f'{self.chan.uid}\n'
f' |_ @{self.dst_maddr}\n'
f' |_ @{self.dst_maddr}\n'
f' >> {self.repr_rpc}\n'
# f' >> {self._nsf}() -> {codec}[dict]:\n\n'
# TODO: pull msg-type from spec re #320
@ -912,7 +964,7 @@ class Context:
# `._scope.cancel()` since we expect the eventual
# `ContextCancelled` from the other side to trigger this
# when the runtime finally receives it during teardown
# (normally in `.result()` called from
# (normally in `.wait_for_result()` called from
# `Portal.open_context().__aexit__()`)
if side == 'parent':
if not self._portal:
@ -1025,10 +1077,10 @@ class Context:
'''
__tracebackhide__: bool = hide_tb
our_uid: tuple = self.chan.uid
peer_uid: tuple = self.chan.uid
# XXX NOTE XXX: `ContextCancelled`/`StreamOverrun` absorption
# for "graceful cancellation" case:
# for "graceful cancellation" case(s):
#
# Whenever a "side" of a context (a `Task` running in
# an actor) **is** the side which requested ctx
@ -1045,9 +1097,11 @@ class Context:
# set to the `Actor.uid` of THIS task (i.e. the
# cancellation requesting task's actor is the actor
# checking whether it should absorb the ctxc).
self_ctxc: bool = self._is_self_cancelled(remote_error)
if (
self_ctxc
and
not raise_ctxc_from_self_call
and self._is_self_cancelled(remote_error)
# TODO: ?potentially it is useful to emit certain
# warning/cancel logs for the cases where the
@ -1077,8 +1131,8 @@ class Context:
and isinstance(remote_error, RemoteActorError)
and remote_error.boxed_type is StreamOverrun
# and tuple(remote_error.msgdata['sender']) == our_uid
and tuple(remote_error.sender) == our_uid
# and tuple(remote_error.msgdata['sender']) == peer_uid
and tuple(remote_error.sender) == peer_uid
):
# NOTE: we set the local scope error to any "self
# cancellation" error-response thus "absorbing"
@ -1140,9 +1194,9 @@ class Context:
of the remote cancellation.
'''
__tracebackhide__ = hide_tb
__tracebackhide__: bool = hide_tb
assert self._portal, (
"Context.result() can not be called from callee side!"
'`Context.wait_for_result()` can not be called from callee side!'
)
if self._final_result_is_set():
return self._result
@ -1197,10 +1251,11 @@ class Context:
# raising something we know might happen
# during cancellation ;)
(not self._cancel_called)
)
),
hide_tb=hide_tb,
)
# TODO: eventually make `.outcome: Outcome` and thus return
# `self.outcome.unwrap()` here!
# `self.outcome.unwrap()` here?
return self.outcome
# TODO: switch this with above!
@ -1223,6 +1278,12 @@ class Context:
@property
def maybe_error(self) -> BaseException|None:
'''
Return the (remote) error as outcome or `None`.
Remote errors take precedence over local ones.
'''
le: BaseException|None = self._local_error
re: RemoteActorError|ContextCancelled|None = self._remote_error
@ -1284,17 +1345,24 @@ class Context:
Any|
RemoteActorError|
ContextCancelled
# TODO: make this a `outcome.Outcome`!
):
'''
The final "outcome" from an IPC context which can either be
some Value returned from the target `@context`-decorated
remote task-as-func, or an `Error` wrapping an exception
raised from an RPC task fault or cancellation.
Return the "final outcome" (state) of the far end peer task
non-blocking. If the remote task has not completed then this
field always resolves to the module defined `Unresolved`
handle.
Note that if the remote task has not terminated then this
field always resolves to the module defined `Unresolved` handle.
------ - ------
TODO->( this is doc-driven-dev content not yet actual ;P )
TODO: implement this using `outcome.Outcome` types?
The final "outcome" from an IPC context which can be any of:
- some `outcome.Value` which boxes the returned output from the peer task's
`@context`-decorated remote task-as-func, or
- an `outcome.Error` wrapping an exception raised that same RPC task
after a fault or cancellation, or
- an unresolved `outcome.Outcome` when the peer task is still
executing and has not yet completed.
'''
return (
@ -1583,7 +1651,7 @@ class Context:
- NEVER `return` early before delivering the msg!
bc if the error is a ctxc and there is a task waiting on
`.result()` we need the msg to be
`.wait_for_result()` we need the msg to be
`send_chan.send_nowait()`-ed over the `._rx_chan` so
that the error is relayed to that waiter task and thus
raised in user code!
@ -1828,7 +1896,7 @@ async def open_context_from_portal(
When the "callee" (side that is "called"/started by a call
to *this* method) returns, the caller side (this) unblocks
and any final value delivered from the other end can be
retrieved using the `Contex.result()` api.
retrieved using the `Contex.wait_for_result()` api.
The yielded ``Context`` instance further allows for opening
bidirectional streams, explicit cancellation and
@ -1893,7 +1961,7 @@ async def open_context_from_portal(
)
assert ctx._remote_func_type == 'context'
assert ctx._caller_info
_ctxvar_Context.set(ctx)
prior_ctx_tok: Token = _ctxvar_Context.set(ctx)
# placeholder for any exception raised in the runtime
# or by user tasks which cause this context's closure.
@ -1965,14 +2033,14 @@ async def open_context_from_portal(
yield ctx, first
# ??TODO??: do we still want to consider this or is
# the `else:` block handling via a `.result()`
# the `else:` block handling via a `.wait_for_result()`
# call below enough??
#
# -[ ] pretty sure `.result()` internals do the
# -[ ] pretty sure `.wait_for_result()` internals do the
# same as our ctxc handler below so it ended up
# being same (repeated?) behaviour, but ideally we
# wouldn't have that duplication either by somehow
# factoring the `.result()` handler impl in a way
# factoring the `.wait_for_result()` handler impl in a way
# that we can re-use it around the `yield` ^ here
# or vice versa?
#
@ -2110,7 +2178,7 @@ async def open_context_from_portal(
# AND a group-exc is only raised if there was > 1
# tasks started *here* in the "caller" / opener
# block. If any one of those tasks calls
# `.result()` or `MsgStream.receive()`
# `.wait_for_result()` or `MsgStream.receive()`
# `._maybe_raise_remote_err()` will be transitively
# called and the remote error raised causing all
# tasks to be cancelled.
@ -2131,9 +2199,16 @@ async def open_context_from_portal(
# handled in the block above ^^^ !!
# await _debug.pause()
# log.cancel(
log.exception(
f'{ctx.side}-side of `Context` terminated with '
f'.outcome => {ctx.repr_outcome()}\n'
match scope_err:
case trio.Cancelled:
logmeth = log.cancel
# XXX explicitly report on any non-graceful-taskc cases
case _:
logmeth = log.exception
logmeth(
f'ctx {ctx.side!r}-side exited with {ctx.repr_outcome()}\n'
)
if debug_mode():
@ -2180,7 +2255,7 @@ async def open_context_from_portal(
f'|_{ctx._task}\n'
)
# XXX NOTE XXX: the below call to
# `Context.result()` will ALWAYS raise
# `Context.wait_for_result()` will ALWAYS raise
# a `ContextCancelled` (via an embedded call to
# `Context._maybe_raise_remote_err()`) IFF
# a `Context._remote_error` was set by the runtime
@ -2190,10 +2265,10 @@ async def open_context_from_portal(
# ALWAYS SET any time "callee" side fails and causes "caller
# side" cancellation via a `ContextCancelled` here.
try:
result_or_err: Exception|Any = await ctx.result()
result_or_err: Exception|Any = await ctx.wait_for_result()
except BaseException as berr:
# on normal teardown, if we get some error
# raised in `Context.result()` we still want to
# raised in `Context.wait_for_result()` we still want to
# save that error on the ctx's state to
# determine things like `.cancelled_caught` for
# cases where there was remote cancellation but
@ -2311,8 +2386,9 @@ async def open_context_from_portal(
and ctx.cancel_acked
):
log.cancel(
f'Context cancelled by {ctx.side!r}-side task\n'
f'|_{ctx._task}\n\n'
f'Context cancelled by local {ctx.side!r}-side task\n'
f'c)>\n'
f' |_{ctx._task}\n\n'
f'{repr(scope_err)}\n'
)
@ -2328,8 +2404,10 @@ async def open_context_from_portal(
# type_only=True,
)
log.cancel(
f'Context terminated due to local {ctx.side!r}-side error:\n\n'
f'{ctx.chan.uid} => {outcome_str}\n'
f'Context terminated due to {ctx.side!r}-side\n\n'
# TODO: do an x)> on err and c)> only for ctxc?
f'c)> {outcome_str}\n'
f' |_{ctx.repr_rpc}\n'
)
# FINALLY, remove the context from runtime tracking and
@ -2344,6 +2422,9 @@ async def open_context_from_portal(
None,
)
# XXX revert to prior IPC-task-ctx scope
_ctxvar_Context.reset(prior_ctx_tok)
def mk_context(
chan: Channel,

View File

@ -26,8 +26,8 @@ from typing import (
TYPE_CHECKING,
)
from contextlib import asynccontextmanager as acm
import warnings
from tractor.log import get_logger
from .trionics import gather_contexts
from ._ipc import _connect_chan, Channel
from ._portal import (
@ -40,11 +40,13 @@ from ._state import (
_runtime_vars,
)
if TYPE_CHECKING:
from ._runtime import Actor
log = get_logger(__name__)
@acm
async def get_registry(
host: str,
@ -56,14 +58,12 @@ async def get_registry(
]:
'''
Return a portal instance connected to a local or remote
arbiter.
registry-service actor; if a connection already exists re-use it
(presumably to call a `.register_actor()` registry runtime RPC
ep).
'''
actor = current_actor()
if not actor:
raise RuntimeError("No actor instance has been defined yet?")
actor: Actor = current_actor()
if actor.is_registrar:
# we're already the arbiter
# (likely a re-entrant call from the arbiter actor)
@ -72,6 +72,8 @@ async def get_registry(
Channel((host, port))
)
else:
# TODO: try to look pre-existing connection from
# `Actor._peers` and use it instead?
async with (
_connect_chan(host, port) as chan,
open_portal(chan) as regstr_ptl,
@ -80,19 +82,6 @@ async def get_registry(
# TODO: deprecate and this remove _arbiter form!
@acm
async def get_arbiter(*args, **kwargs):
warnings.warn(
'`tractor.get_arbiter()` is now deprecated!\n'
'Use `.get_registry()` instead!',
DeprecationWarning,
stacklevel=2,
)
async with get_registry(*args, **kwargs) as to_yield:
yield to_yield
@acm
async def get_root(
**kwargs,
@ -110,22 +99,53 @@ async def get_root(
yield portal
def get_peer_by_name(
name: str,
# uuid: str|None = None,
) -> list[Channel]|None: # at least 1
'''
Scan for an existing connection (set) to a named actor
and return any channels from `Actor._peers`.
This is an optimization method over querying the registrar for
the same info.
'''
actor: Actor = current_actor()
to_scan: dict[tuple, list[Channel]] = actor._peers.copy()
pchan: Channel|None = actor._parent_chan
if pchan:
to_scan[pchan.uid].append(pchan)
for aid, chans in to_scan.items():
_, peer_name = aid
if name == peer_name:
if not chans:
log.warning(
'No IPC chans for matching peer {peer_name}\n'
)
continue
return chans
return None
@acm
async def query_actor(
name: str,
arbiter_sockaddr: tuple[str, int] | None = None,
regaddr: tuple[str, int] | None = None,
regaddr: tuple[str, int]|None = None,
) -> AsyncGenerator[
tuple[str, int] | None,
tuple[str, int]|None,
None,
]:
'''
Make a transport address lookup for an actor name to a specific
registrar.
Lookup a transport address (by actor name) via querying a registrar
listening @ `regaddr`.
Returns the (socket) address or ``None`` if no entry under that
name exists for the given registrar listening @ `regaddr`.
Returns the transport protocol (socket) address or `None` if no
entry under that name exists.
'''
actor: Actor = current_actor()
@ -137,14 +157,10 @@ async def query_actor(
'The current actor IS the registry!?'
)
if arbiter_sockaddr is not None:
warnings.warn(
'`tractor.query_actor(regaddr=<blah>)` is deprecated.\n'
'Use `registry_addrs: list[tuple]` instead!',
DeprecationWarning,
stacklevel=2,
)
regaddr: list[tuple[str, int]] = arbiter_sockaddr
maybe_peers: list[Channel]|None = get_peer_by_name(name)
if maybe_peers:
yield maybe_peers[0].raddr
return
reg_portal: Portal
regaddr: tuple[str, int] = regaddr or actor.reg_addrs[0]
@ -159,10 +175,28 @@ async def query_actor(
yield sockaddr
@acm
async def maybe_open_portal(
addr: tuple[str, int],
name: str,
):
async with query_actor(
name=name,
regaddr=addr,
) as sockaddr:
pass
if sockaddr:
async with _connect_chan(*sockaddr) as chan:
async with open_portal(chan) as portal:
yield portal
else:
yield None
@acm
async def find_actor(
name: str,
arbiter_sockaddr: tuple[str, int]|None = None,
registry_addrs: list[tuple[str, int]]|None = None,
only_first: bool = True,
@ -179,29 +213,12 @@ async def find_actor(
known to the arbiter.
'''
if arbiter_sockaddr is not None:
warnings.warn(
'`tractor.find_actor(arbiter_sockaddr=<blah>)` is deprecated.\n'
'Use `registry_addrs: list[tuple]` instead!',
DeprecationWarning,
stacklevel=2,
)
registry_addrs: list[tuple[str, int]] = [arbiter_sockaddr]
@acm
async def maybe_open_portal_from_reg_addr(
addr: tuple[str, int],
):
async with query_actor(
name=name,
regaddr=addr,
) as sockaddr:
if sockaddr:
async with _connect_chan(*sockaddr) as chan:
async with open_portal(chan) as portal:
yield portal
else:
yield None
# optimization path, use any pre-existing peer channel
maybe_peers: list[Channel]|None = get_peer_by_name(name)
if maybe_peers and only_first:
async with open_portal(maybe_peers[0]) as peer_portal:
yield peer_portal
return
if not registry_addrs:
# XXX NOTE: make sure to dynamically read the value on
@ -217,10 +234,13 @@ async def find_actor(
maybe_portals: list[
AsyncContextManager[tuple[str, int]]
] = list(
maybe_open_portal_from_reg_addr(addr)
maybe_open_portal(
addr=addr,
name=name,
)
for addr in registry_addrs
)
portals: list[Portal]
async with gather_contexts(
mngrs=maybe_portals,
) as portals:
@ -254,31 +274,31 @@ async def find_actor(
@acm
async def wait_for_actor(
name: str,
arbiter_sockaddr: tuple[str, int] | None = None,
registry_addr: tuple[str, int] | None = None,
) -> AsyncGenerator[Portal, None]:
'''
Wait on an actor to register with the arbiter.
A portal to the first registered actor is returned.
Wait on at least one peer actor to register `name` with the
registrar, yield a `Portal to the first registree.
'''
actor: Actor = current_actor()
if arbiter_sockaddr is not None:
warnings.warn(
'`tractor.wait_for_actor(arbiter_sockaddr=<foo>)` is deprecated.\n'
'Use `registry_addr: tuple` instead!',
DeprecationWarning,
stacklevel=2,
)
registry_addr: tuple[str, int] = arbiter_sockaddr
# optimization path, use any pre-existing peer channel
maybe_peers: list[Channel]|None = get_peer_by_name(name)
if maybe_peers:
async with open_portal(maybe_peers[0]) as peer_portal:
yield peer_portal
return
regaddr: tuple[str, int] = (
registry_addr
or
actor.reg_addrs[0]
)
# TODO: use `.trionics.gather_contexts()` like
# above in `find_actor()` as well?
reg_portal: Portal
regaddr: tuple[str, int] = registry_addr or actor.reg_addrs[0]
async with get_registry(*regaddr) as reg_portal:
sockaddrs = await reg_portal.run_from_ns(
'self',

View File

@ -20,7 +20,9 @@ Sub-process entry points.
"""
from __future__ import annotations
from functools import partial
# import textwrap
import multiprocessing as mp
import os
import textwrap
from typing import (
Any,
TYPE_CHECKING,
@ -58,25 +60,27 @@ def _mp_main(
) -> None:
'''
The routine called *after fork* which invokes a fresh ``trio.run``
The routine called *after fork* which invokes a fresh `trio.run()`
'''
actor._forkserver_info = forkserver_info
from ._spawn import try_set_start_method
spawn_ctx = try_set_start_method(start_method)
spawn_ctx: mp.context.BaseContext = try_set_start_method(start_method)
assert spawn_ctx
if actor.loglevel is not None:
log.info(
f"Setting loglevel for {actor.uid} to {actor.loglevel}")
f'Setting loglevel for {actor.uid} to {actor.loglevel}'
)
get_console_log(actor.loglevel)
assert spawn_ctx
# TODO: use scops headers like for `trio` below!
# (well after we libify it maybe..)
log.info(
f"Started new {spawn_ctx.current_process()} for {actor.uid}")
_state._current_actor = actor
log.debug(f"parent_addr is {parent_addr}")
f'Started new {spawn_ctx.current_process()} for {actor.uid}'
# f"parent_addr is {parent_addr}"
)
_state._current_actor: Actor = actor
trio_main = partial(
async_main,
actor=actor,
@ -93,7 +97,110 @@ def _mp_main(
pass # handle it the same way trio does?
finally:
log.info(f"Subactor {actor.uid} terminated")
log.info(
f'`mp`-subactor {actor.uid} exited'
)
# TODO: move this func to some kinda `.devx._conc_lang.py` eventually
# as we work out our multi-domain state-flow-syntax!
def nest_from_op(
input_op: str,
#
# ?TODO? an idea for a syntax to the state of concurrent systems
# as a "3-domain" (execution, scope, storage) model and using
# a minimal ascii/utf-8 operator-set.
#
# try not to take any of this seriously yet XD
#
# > is a "play operator" indicating (CPU bound)
# exec/work/ops required at the "lowest level computing"
#
# execution primititves (tasks, threads, actors..) denote their
# lifetime with '(' and ')' since parentheses normally are used
# in many langs to denote function calls.
#
# starting = (
# >( opening/starting; beginning of the thread-of-exec (toe?)
# (> opened/started, (finished spawning toe)
# |_<Task: blah blah..> repr of toe, in py these look like <objs>
#
# >) closing/exiting/stopping,
# )> closed/exited/stopped,
# |_<Task: blah blah..>
# [OR <), )< ?? ]
#
# ending = )
# >c) cancelling to close/exit
# c)> cancelled (caused close), OR?
# |_<Actor: ..>
# OR maybe "<c)" which better indicates the cancel being
# "delivered/returned" / returned" to LHS?
#
# >x) erroring to eventuall exit
# x)> errored and terminated
# |_<Actor: ...>
#
# scopes: supers/nurseries, IPC-ctxs, sessions, perms, etc.
# >{ opening
# {> opened
# }> closed
# >} closing
#
# storage: like queues, shm-buffers, files, etc..
# >[ opening
# [> opened
# |_<FileObj: ..>
#
# >] closing
# ]> closed
# IPC ops: channels, transports, msging
# => req msg
# <= resp msg
# <=> 2-way streaming (of msgs)
# <- recv 1 msg
# -> send 1 msg
#
# TODO: still not sure on R/L-HS approach..?
# =>( send-req to exec start (task, actor, thread..)
# (<= recv-req to ^
#
# (<= recv-req ^
# <=( recv-resp opened remote exec primitive
# <=) recv-resp closed
#
# )<=c req to stop due to cancel
# c=>) req to stop due to cancel
#
# =>{ recv-req to open
# <={ send-status that it closed
tree_str: str,
# NOTE: so move back-from-the-left of the `input_op` by
# this amount.
back_from_op: int = 0,
) -> str:
'''
Depth-increment the input (presumably hierarchy/supervision)
input "tree string" below the provided `input_op` execution
operator, so injecting a `"\n|_{input_op}\n"`and indenting the
`tree_str` to nest content aligned with the ops last char.
'''
return (
f'{input_op}\n'
+
textwrap.indent(
tree_str,
prefix=(
len(input_op)
-
(back_from_op + 1)
) * ' ',
)
)
def _trio_main(
@ -107,7 +214,6 @@ def _trio_main(
Entry point for a `trio_run_in_process` subactor.
'''
# __tracebackhide__: bool = True
_debug.hide_runtime_frames()
_state._current_actor = actor
@ -119,7 +225,6 @@ def _trio_main(
if actor.loglevel is not None:
get_console_log(actor.loglevel)
import os
actor_info: str = (
f'|_{actor}\n'
f' uid: {actor.uid}\n'
@ -128,13 +233,24 @@ def _trio_main(
f' loglevel: {actor.loglevel}\n'
)
log.info(
'Started new trio subactor:\n'
'Starting new `trio` subactor:\n'
+
'>\n' # like a "started/play"-icon from super perspective
+
actor_info,
nest_from_op(
input_op='>(', # see syntax ideas above
tree_str=actor_info,
back_from_op=1,
)
)
logmeth = log.info
exit_status: str = (
'Subactor exited\n'
+
nest_from_op(
input_op=')>', # like a "closed-to-play"-icon from super perspective
tree_str=actor_info,
back_from_op=1,
)
)
try:
if infect_asyncio:
actor._infected_aio = True
@ -143,16 +259,28 @@ def _trio_main(
trio.run(trio_main)
except KeyboardInterrupt:
log.cancel(
'Actor received KBI\n'
logmeth = log.cancel
exit_status: str = (
'Actor received KBI (aka an OS-cancel)\n'
+
actor_info
nest_from_op(
input_op='c)>', # closed due to cancel (see above)
tree_str=actor_info,
)
)
except BaseException as err:
logmeth = log.error
exit_status: str = (
'Main actor task exited due to crash?\n'
+
nest_from_op(
input_op='x)>', # closed by error
tree_str=actor_info,
)
)
# NOTE since we raise a tb will already be shown on the
# console, thus we do NOT use `.exception()` above.
raise err
finally:
log.info(
'Subactor terminated\n'
+
'x\n' # like a "crossed-out/killed" from super perspective
+
actor_info
)
logmeth(exit_status)

View File

@ -906,8 +906,59 @@ class StreamOverrun(
'''
class TransportClosed(trio.ClosedResourceError):
"Underlying channel transport was closed prior to use"
class TransportClosed(trio.BrokenResourceError):
'''
IPC transport (protocol) connection was closed or broke and
indicates that the wrapping communication `Channel` can no longer
be used to send/receive msgs from the remote peer.
'''
def __init__(
self,
message: str,
loglevel: str = 'transport',
cause: BaseException|None = None,
raise_on_report: bool = False,
) -> None:
self.message: str = message
self._loglevel = loglevel
super().__init__(message)
if cause is not None:
self.__cause__ = cause
# flag to toggle whether the msg loop should raise
# the exc in its `TransportClosed` handler block.
self._raise_on_report = raise_on_report
def report_n_maybe_raise(
self,
message: str|None = None,
) -> None:
'''
Using the init-specified log level emit a logging report
for this error.
'''
message: str = message or self.message
# when a cause is set, slap it onto the log emission.
if cause := self.__cause__:
cause_tb_str: str = ''.join(
traceback.format_tb(cause.__traceback__)
)
message += (
f'{cause_tb_str}\n' # tb
f' {cause}\n' # exc repr
)
getattr(log, self._loglevel)(message)
# some errors we want to blow up from
# inside the RPC msg loop
if self._raise_on_report:
raise self from cause
class NoResult(RuntimeError):
@ -922,6 +973,12 @@ class NoRuntime(RuntimeError):
"The root actor has not been initialized yet"
class MessagingError(Exception):
'''
IPC related msg (typing), transaction (ordering) or dialog
handling error.
'''
class AsyncioCancelled(Exception):
'''
@ -929,12 +986,8 @@ class AsyncioCancelled(Exception):
for use with the ``to_asyncio`` module
to be raised in the ``trio`` side task
'''
class MessagingError(Exception):
'''
IPC related msg (typing), transaction (ordering) or dialog
handling error.
NOTE: this should NOT inherit from `asyncio.CancelledError` or
tests should break!
'''
@ -1324,7 +1377,9 @@ def _mk_recv_mte(
any_pld: Any = msgpack.decode(msg.pld)
message: str = (
f'invalid `{msg_type.__qualname__}` msg payload\n\n'
f'value: `{any_pld!r}` does not match type-spec: '
f'{any_pld!r}\n\n'
f'has type {type(any_pld)!r}\n\n'
f'and does not match type-spec '
f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`'
)
bad_msg = msg

View File

@ -54,7 +54,7 @@ from tractor._exceptions import (
)
from tractor.msg import (
_ctxvar_MsgCodec,
_codec,
# _codec, XXX see `self._codec` sanity/debug checks
MsgCodec,
types as msgtypes,
pretty_struct,
@ -65,8 +65,18 @@ log = get_logger(__name__)
_is_windows = platform.system() == 'Windows'
def get_stream_addrs(stream: trio.SocketStream) -> tuple:
# should both be IP sockets
def get_stream_addrs(
stream: trio.SocketStream
) -> tuple[
tuple[str, int], # local
tuple[str, int], # remote
]:
'''
Return the `trio` streaming transport prot's socket-addrs for
both the local and remote sides as a pair.
'''
# rn, should both be IP sockets
lsockname = stream.socket.getsockname()
rsockname = stream.socket.getpeername()
return (
@ -75,17 +85,22 @@ def get_stream_addrs(stream: trio.SocketStream) -> tuple:
)
# TODO: this should be our `Union[*msgtypes.__spec__]` now right?
MsgType = TypeVar("MsgType")
# TODO: consider using a generic def and indexing with our eventual
# msg definition/types?
# - https://docs.python.org/3/library/typing.html#typing.Protocol
# - https://jcristharif.com/msgspec/usage.html#structs
# from tractor.msg.types import MsgType
# ?TODO? this should be our `Union[*msgtypes.__spec__]` alias now right..?
# => BLEH, except can't bc prots must inherit typevar or param-spec
# vars..
MsgType = TypeVar('MsgType')
# TODO: break up this mod into a subpkg so we can start adding new
# backends and move this type stuff into a dedicated file.. Bo
#
@runtime_checkable
class MsgTransport(Protocol[MsgType]):
#
# ^-TODO-^ consider using a generic def and indexing with our
# eventual msg definition/types?
# - https://docs.python.org/3/library/typing.html#typing.Protocol
stream: trio.SocketStream
drained: list[MsgType]
@ -120,9 +135,9 @@ class MsgTransport(Protocol[MsgType]):
...
# TODO: not sure why we have to inherit here, but it seems to be an
# issue with ``get_msg_transport()`` returning a ``Type[Protocol]``;
# probably should make a `mypy` issue?
# TODO: typing oddity.. not sure why we have to inherit here, but it
# seems to be an issue with `get_msg_transport()` returning
# a `Type[Protocol]`; probably should make a `mypy` issue?
class MsgpackTCPStream(MsgTransport):
'''
A ``trio.SocketStream`` delivering ``msgpack`` formatted data
@ -145,7 +160,7 @@ class MsgpackTCPStream(MsgTransport):
# https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types
#
# TODO: define this as a `Codec` struct which can be
# overriden dynamically by the application/runtime.
# overriden dynamically by the application/runtime?
codec: tuple[
Callable[[Any], Any]|None, # coder
Callable[[type, Any], Any]|None, # decoder
@ -160,7 +175,7 @@ class MsgpackTCPStream(MsgTransport):
self._laddr, self._raddr = get_stream_addrs(stream)
# create read loop instance
self._agen = self._iter_packets()
self._aiter_pkts = self._iter_packets()
self._send_lock = trio.StrictFIFOLock()
# public i guess?
@ -174,15 +189,12 @@ class MsgpackTCPStream(MsgTransport):
# allow for custom IPC msg interchange format
# dynamic override Bo
self._task = trio.lowlevel.current_task()
self._codec: MsgCodec = (
codec
or
_codec._ctxvar_MsgCodec.get()
)
# TODO: mask out before release?
# log.runtime(
# f'New {self} created with codec\n'
# f'codec: {self._codec}\n'
# XXX for ctxvar debug only!
# self._codec: MsgCodec = (
# codec
# or
# _codec._ctxvar_MsgCodec.get()
# )
async def _iter_packets(self) -> AsyncGenerator[dict, None]:
@ -190,6 +202,11 @@ class MsgpackTCPStream(MsgTransport):
Yield `bytes`-blob decoded packets from the underlying TCP
stream using the current task's `MsgCodec`.
This is a streaming routine implemented as an async generator
func (which was the original design, but could be changed?)
and is allocated by a `.__call__()` inside `.__init__()` where
it is assigned to the `._aiter_pkts` attr.
'''
decodes_failed: int = 0
@ -204,16 +221,82 @@ class MsgpackTCPStream(MsgTransport):
# seem to be getting racy failures here on
# arbiter/registry name subs..
trio.BrokenResourceError,
):
raise TransportClosed(
f'transport {self} was already closed prior ro read'
)
) as trans_err:
loglevel = 'transport'
match trans_err:
# case (
# ConnectionResetError()
# ):
# loglevel = 'transport'
# peer actor (graceful??) TCP EOF but `tricycle`
# seems to raise a 0-bytes-read?
case ValueError() if (
'unclean EOF' in trans_err.args[0]
):
pass
# peer actor (task) prolly shutdown quickly due
# to cancellation
case trio.BrokenResourceError() if (
'Connection reset by peer' in trans_err.args[0]
):
pass
# unless the disconnect condition falls under "a
# normal operation breakage" we usualy console warn
# about it.
case _:
loglevel: str = 'warning'
raise TransportClosed(
message=(
f'IPC transport already closed by peer\n'
f'x)> {type(trans_err)}\n'
f' |_{self}\n'
),
loglevel=loglevel,
) from trans_err
# XXX definitely can happen if transport is closed
# manually by another `trio.lowlevel.Task` in the
# same actor; we use this in some simulated fault
# testing for ex, but generally should never happen
# under normal operation!
#
# NOTE: as such we always re-raise this error from the
# RPC msg loop!
except trio.ClosedResourceError as closure_err:
raise TransportClosed(
message=(
f'IPC transport already manually closed locally?\n'
f'x)> {type(closure_err)} \n'
f' |_{self}\n'
),
loglevel='error',
raise_on_report=(
closure_err.args[0] == 'another task closed this fd'
or
closure_err.args[0] in ['another task closed this fd']
),
) from closure_err
# graceful TCP EOF disconnect
if header == b'':
raise TransportClosed(
f'transport {self} was already closed prior ro read'
message=(
f'IPC transport already gracefully closed\n'
f')>\n'
f'|_{self}\n'
),
loglevel='transport',
# cause=??? # handy or no?
)
size: int
size, = struct.unpack("<I", header)
log.transport(f'received header {size}') # type: ignore
@ -225,33 +308,20 @@ class MsgpackTCPStream(MsgTransport):
# the current `MsgCodec`.
codec: MsgCodec = _ctxvar_MsgCodec.get()
# TODO: mask out before release?
if self._codec.pld_spec != codec.pld_spec:
# assert (
# task := trio.lowlevel.current_task()
# ) is not self._task
# self._task = task
self._codec = codec
log.runtime(
f'Using new codec in {self}.recv()\n'
f'codec: {self._codec}\n\n'
f'msg_bytes: {msg_bytes}\n'
)
# XXX for ctxvar debug only!
# if self._codec.pld_spec != codec.pld_spec:
# assert (
# task := trio.lowlevel.current_task()
# ) is not self._task
# self._task = task
# self._codec = codec
# log.runtime(
# f'Using new codec in {self}.recv()\n'
# f'codec: {self._codec}\n\n'
# f'msg_bytes: {msg_bytes}\n'
# )
yield codec.decode(msg_bytes)
# TODO: remove, was only for orig draft impl
# testing.
#
# curr_codec: MsgCodec = _ctxvar_MsgCodec.get()
# obj = curr_codec.decode(msg_bytes)
# if (
# curr_codec is not
# _codec._def_msgspec_codec
# ):
# print(f'OBJ: {obj}\n')
#
# yield obj
# XXX NOTE: since the below error derives from
# `DecodeError` we need to catch is specially
# and always raise such that spec violations
@ -295,7 +365,8 @@ class MsgpackTCPStream(MsgTransport):
msg: msgtypes.MsgType,
strict_types: bool = True,
# hide_tb: bool = False,
hide_tb: bool = False,
) -> None:
'''
Send a msgpack encoded py-object-blob-as-msg over TCP.
@ -304,21 +375,24 @@ class MsgpackTCPStream(MsgTransport):
invalid msg type
'''
# __tracebackhide__: bool = hide_tb
__tracebackhide__: bool = hide_tb
# XXX see `trio._sync.AsyncContextManagerMixin` for details
# on the `.acquire()`/`.release()` sequencing..
async with self._send_lock:
# NOTE: lookup the `trio.Task.context`'s var for
# the current `MsgCodec`.
codec: MsgCodec = _ctxvar_MsgCodec.get()
# TODO: mask out before release?
if self._codec.pld_spec != codec.pld_spec:
self._codec = codec
log.runtime(
f'Using new codec in {self}.send()\n'
f'codec: {self._codec}\n\n'
f'msg: {msg}\n'
)
# XXX for ctxvar debug only!
# if self._codec.pld_spec != codec.pld_spec:
# self._codec = codec
# log.runtime(
# f'Using new codec in {self}.send()\n'
# f'codec: {self._codec}\n\n'
# f'msg: {msg}\n'
# )
if type(msg) not in msgtypes.__msg_types__:
if strict_types:
@ -352,6 +426,16 @@ class MsgpackTCPStream(MsgTransport):
size: bytes = struct.pack("<I", len(bytes_data))
return await self.stream.send_all(size + bytes_data)
# ?TODO? does it help ever to dynamically show this
# frame?
# try:
# <the-above_code>
# except BaseException as _err:
# err = _err
# if not isinstance(err, MsgTypeError):
# __tracebackhide__: bool = False
# raise
@property
def laddr(self) -> tuple[str, int]:
return self._laddr
@ -361,7 +445,7 @@ class MsgpackTCPStream(MsgTransport):
return self._raddr
async def recv(self) -> Any:
return await self._agen.asend(None)
return await self._aiter_pkts.asend(None)
async def drain(self) -> AsyncIterator[dict]:
'''
@ -378,7 +462,7 @@ class MsgpackTCPStream(MsgTransport):
yield msg
def __aiter__(self):
return self._agen
return self._aiter_pkts
def connected(self) -> bool:
return self.stream.socket.fileno() != -1
@ -433,7 +517,7 @@ class Channel:
# set after handshake - always uid of far end
self.uid: tuple[str, str]|None = None
self._agen = self._aiter_recv()
self._aiter_msgs = self._iter_msgs()
self._exc: Exception|None = None # set if far end actor errors
self._closed: bool = False
@ -497,8 +581,6 @@ class Channel:
)
return self._transport
# TODO: something simliar at the IPC-`Context`
# level so as to support
@cm
def apply_codec(
self,
@ -517,6 +599,7 @@ class Channel:
finally:
self._transport.codec = orig
# TODO: do a .src/.dst: str for maddrs?
def __repr__(self) -> str:
if not self._transport:
return '<Channel with inactive transport?>'
@ -560,27 +643,43 @@ class Channel:
)
return transport
# TODO: something like,
# `pdbp.hideframe_on(errors=[MsgTypeError])`
# instead of the `try/except` hack we have rn..
# seems like a pretty useful thing to have in general
# along with being able to filter certain stack frame(s / sets)
# possibly based on the current log-level?
async def send(
self,
payload: Any,
# hide_tb: bool = False,
hide_tb: bool = False,
) -> None:
'''
Send a coded msg-blob over the transport.
'''
# __tracebackhide__: bool = hide_tb
log.transport(
'=> send IPC msg:\n\n'
f'{pformat(payload)}\n'
) # type: ignore
assert self._transport
await self._transport.send(
payload,
# hide_tb=hide_tb,
)
__tracebackhide__: bool = hide_tb
try:
log.transport(
'=> send IPC msg:\n\n'
f'{pformat(payload)}\n'
)
# assert self._transport # but why typing?
await self._transport.send(
payload,
hide_tb=hide_tb,
)
except BaseException as _err:
err = _err # bind for introspection
if not isinstance(_err, MsgTypeError):
# assert err
__tracebackhide__: bool = False
else:
assert err.cid
raise
async def recv(self) -> Any:
assert self._transport
@ -617,8 +716,11 @@ class Channel:
await self.aclose(*args)
def __aiter__(self):
return self._agen
return self._aiter_msgs
# ?TODO? run any reconnection sequence?
# -[ ] prolly should be impl-ed as deco-API?
#
# async def _reconnect(self) -> None:
# """Handle connection failures by polling until a reconnect can be
# established.
@ -636,7 +738,6 @@ class Channel:
# else:
# log.transport("Stream connection re-established!")
# # TODO: run any reconnection sequence
# # on_recon = self._recon_seq
# # if on_recon:
# # await on_recon(self)
@ -650,11 +751,17 @@ class Channel:
# " for re-establishment")
# await trio.sleep(1)
async def _aiter_recv(
async def _iter_msgs(
self
) -> AsyncGenerator[Any, None]:
'''
Async iterate items from underlying stream.
Yield `MsgType` IPC msgs decoded and deliverd from
an underlying `MsgTransport` protocol.
This is a streaming routine alo implemented as an async-gen
func (same a `MsgTransport._iter_pkts()`) gets allocated by
a `.__call__()` inside `.__init__()` where it is assigned to
the `._aiter_msgs` attr.
'''
assert self._transport
@ -680,15 +787,6 @@ class Channel:
case _:
yield msg
# TODO: if we were gonna do this it should be
# done up at the `MsgStream` layer!
#
# sent = yield item
# if sent is not None:
# # optimization, passing None through all the
# # time is pointless
# await self._transport.send(sent)
except trio.BrokenResourceError:
# if not self._autorecon:

View File

@ -97,7 +97,7 @@ class Portal:
channel: Channel,
) -> None:
self.chan = channel
self._chan: Channel = channel
# during the portal's lifetime
self._final_result_pld: Any|None = None
self._final_result_msg: PayloadMsg|None = None
@ -109,6 +109,10 @@ class Portal:
self._streams: set[MsgStream] = set()
self.actor: Actor = current_actor()
@property
def chan(self) -> Channel:
return self._chan
@property
def channel(self) -> Channel:
'''
@ -121,7 +125,8 @@ class Portal:
)
return self.chan
# TODO: factor this out into an `ActorNursery` wrapper
# TODO: factor this out into a `.highlevel` API-wrapper that uses
# a single `.open_context()` call underneath.
async def _submit_for_result(
self,
ns: str,
@ -141,13 +146,22 @@ class Portal:
portal=self,
)
# TODO: we should deprecate this API right? since if we remove
# `.run_in_actor()` (and instead move it to a `.highlevel`
# wrapper api (around a single `.open_context()` call) we don't
# really have any notion of a "main" remote task any more?
#
# @api_frame
async def result(self) -> Any:
async def wait_for_result(
self,
hide_tb: bool = True,
) -> Any:
'''
Return the result(s) from the remote actor's "main" task.
Return the final result delivered by a `Return`-msg from the
remote peer actor's "main" task's `return` statement.
'''
__tracebackhide__ = True
__tracebackhide__: bool = hide_tb
# Check for non-rpc errors slapped on the
# channel for which we always raise
exc = self.channel._exc
@ -182,6 +196,23 @@ class Portal:
return self._final_result_pld
# TODO: factor this out into a `.highlevel` API-wrapper that uses
# a single `.open_context()` call underneath.
async def result(
self,
*args,
**kwargs,
) -> Any|Exception:
typname: str = type(self).__name__
log.warning(
f'`{typname}.result()` is DEPRECATED!\n'
f'Use `{typname}.wait_for_result()` instead!\n'
)
return await self.wait_for_result(
*args,
**kwargs,
)
async def _cancel_streams(self):
# terminate all locally running async generator
# IPC calls
@ -232,14 +263,15 @@ class Portal:
return False
reminfo: str = (
f'Portal.cancel_actor() => {self.channel.uid}\n'
f'|_{chan}\n'
f'c)=> {self.channel.uid}\n'
f' |_{chan}\n'
)
log.cancel(
f'Requesting runtime cancel for peer\n\n'
f'Requesting actor-runtime cancel for peer\n\n'
f'{reminfo}'
)
# XXX the one spot we set it?
self.channel._cancel_called: bool = True
try:
# send cancel cmd - might not get response
@ -279,6 +311,8 @@ class Portal:
)
return False
# TODO: do we still need this for low level `Actor`-runtime
# method calls or can we also remove it?
async def run_from_ns(
self,
namespace_path: str,
@ -316,6 +350,8 @@ class Portal:
expect_msg=Return,
)
# TODO: factor this out into a `.highlevel` API-wrapper that uses
# a single `.open_context()` call underneath.
async def run(
self,
func: str,
@ -370,6 +406,8 @@ class Portal:
expect_msg=Return,
)
# TODO: factor this out into a `.highlevel` API-wrapper that uses
# a single `.open_context()` call underneath.
@acm
async def open_stream_from(
self,

View File

@ -21,6 +21,7 @@ Root actor runtime ignition(s).
from contextlib import asynccontextmanager as acm
from functools import partial
import importlib
import inspect
import logging
import os
import signal
@ -79,7 +80,7 @@ async def open_root_actor(
# enables the multi-process debugger support
debug_mode: bool = False,
maybe_enable_greenback: bool = False, # `.pause_from_sync()/breakpoint()` support
maybe_enable_greenback: bool = True, # `.pause_from_sync()/breakpoint()` support
enable_stack_on_sig: bool = False,
# internal logging
@ -115,10 +116,16 @@ async def open_root_actor(
if (
debug_mode
and maybe_enable_greenback
and await _debug.maybe_init_greenback(
raise_not_found=False,
and (
maybe_mod := await _debug.maybe_init_greenback(
raise_not_found=False,
)
)
):
logger.info(
f'Found `greenback` installed @ {maybe_mod}\n'
'Enabling `tractor.pause_from_sync()` support!\n'
)
os.environ['PYTHONBREAKPOINT'] = (
'tractor.devx._debug._sync_pause_from_builtin'
)
@ -226,14 +233,8 @@ async def open_root_actor(
and
enable_stack_on_sig
):
try:
logger.info('Enabling `stackscope` traces on SIGUSR1')
from .devx import enable_stack_on_sig
enable_stack_on_sig()
except ImportError:
logger.warning(
'`stackscope` not installed for use in debug mode!'
)
from .devx._stackscope import enable_stack_on_sig
enable_stack_on_sig()
# closed into below ping task-func
ponged_addrs: list[tuple[str, int]] = []
@ -264,7 +265,9 @@ async def open_root_actor(
except OSError:
# TODO: make this a "discovery" log level?
logger.warning(f'No actor registry found @ {addr}')
logger.info(
f'No actor registry found @ {addr}\n'
)
async with trio.open_nursery() as tn:
for addr in registry_addrs:
@ -278,7 +281,6 @@ async def open_root_actor(
# Create a new local root-actor instance which IS NOT THE
# REGISTRAR
if ponged_addrs:
if ensure_registry:
raise RuntimeError(
f'Failed to open `{name}`@{ponged_addrs}: '
@ -365,23 +367,25 @@ async def open_root_actor(
)
try:
yield actor
except (
Exception,
BaseExceptionGroup,
) as err:
import inspect
# XXX NOTE XXX see equiv note inside
# `._runtime.Actor._stream_handler()` where in the
# non-root or root-that-opened-this-mahually case we
# wait for the local actor-nursery to exit before
# exiting the transport channel handler.
entered: bool = await _debug._maybe_enter_pm(
err,
api_frame=inspect.currentframe(),
)
if (
not entered
and not is_multi_cancelled(err)
and
not is_multi_cancelled(err)
):
logger.exception('Root actor crashed:\n')
logger.exception('Root actor crashed\n')
# ALWAYS re-raise any error bubbled up from the
# runtime!

View File

@ -68,7 +68,7 @@ from .msg import (
MsgCodec,
PayloadT,
NamespacePath,
pretty_struct,
# pretty_struct,
_ops as msgops,
)
from tractor.msg.types import (
@ -89,6 +89,16 @@ if TYPE_CHECKING:
log = get_logger('tractor')
# ?TODO? move to a `tractor.lowlevel._rpc` with the below
# func-type-cases implemented "on top of" `@context` defs:
# -[ ] std async func helper decorated with `@rpc_func`?
# -[ ] `Portal.open_stream_from()` with async-gens?
# |_ possibly a duplex form of this with a
# `sent_from_peer = yield send_to_peer` form, which would require
# syncing the send/recv side with possibly `.receive_nowait()`
# on each `yield`?
# -[ ] some kinda `@rpc_acm` maybe that does a fixture style with
# user only defining a single-`yield` generator-func?
async def _invoke_non_context(
actor: Actor,
cancel_scope: CancelScope,
@ -108,8 +118,9 @@ async def _invoke_non_context(
] = trio.TASK_STATUS_IGNORED,
):
__tracebackhide__: bool = True
cs: CancelScope|None = None # ref when activated
# TODO: can we unify this with the `context=True` impl below?
# ?TODO? can we unify this with the `context=True` impl below?
if inspect.isasyncgen(coro):
await chan.send(
StartAck(
@ -160,10 +171,6 @@ async def _invoke_non_context(
functype='asyncgen',
)
)
# XXX: the async-func may spawn further tasks which push
# back values like an async-generator would but must
# manualy construct the response dict-packet-responses as
# above
with cancel_scope as cs:
ctx._scope = cs
task_status.started(ctx)
@ -175,15 +182,13 @@ async def _invoke_non_context(
await chan.send(
Stop(cid=cid)
)
# simplest function/method request-response pattern
# XXX: in the most minimally used case, just a scheduled internal runtime
# call to `Actor._cancel_task()` from the ctx-peer task since we
# don't (yet) have a dedicated IPC msg.
# ------ - ------
else:
# regular async function/method
# XXX: possibly just a scheduled `Actor._cancel_task()`
# from a remote request to cancel some `Context`.
# ------ - ------
# TODO: ideally we unify this with the above `context=True`
# block such that for any remote invocation ftype, we
# always invoke the far end RPC task scheduling the same
# way: using the linked IPC context machinery.
failed_resp: bool = False
try:
ack = StartAck(
@ -354,8 +359,15 @@ async def _errors_relayed_via_ipc(
# channel.
task_status.started(err)
# always reraise KBIs so they propagate at the sys-process level.
if isinstance(err, KeyboardInterrupt):
# always propagate KBIs at the sys-process level.
if (
isinstance(err, KeyboardInterrupt)
# ?TODO? except when running in asyncio mode?
# |_ wut if you want to open a `@context` FROM an
# infected_aio task?
# and not actor.is_infected_aio()
):
raise
# RPC task bookeeping.
@ -458,7 +470,6 @@ async def _invoke(
# tb: TracebackType = None
cancel_scope = CancelScope()
cs: CancelScope|None = None # ref when activated
ctx = actor.get_context(
chan=chan,
cid=cid,
@ -607,6 +618,8 @@ async def _invoke(
# `@context` marked RPC function.
# - `._portal` is never set.
try:
tn: trio.Nursery
rpc_ctx_cs: CancelScope
async with (
trio.open_nursery() as tn,
msgops.maybe_limit_plds(
@ -616,7 +629,7 @@ async def _invoke(
),
):
ctx._scope_nursery = tn
ctx._scope = tn.cancel_scope
rpc_ctx_cs = ctx._scope = tn.cancel_scope
task_status.started(ctx)
# TODO: better `trionics` tooling:
@ -642,7 +655,7 @@ async def _invoke(
# itself calls `ctx._maybe_cancel_and_set_remote_error()`
# which cancels the scope presuming the input error
# is not a `.cancel_acked` pleaser.
if ctx._scope.cancelled_caught:
if rpc_ctx_cs.cancelled_caught:
our_uid: tuple = actor.uid
# first check for and raise any remote error
@ -652,9 +665,7 @@ async def _invoke(
if re := ctx._remote_error:
ctx._maybe_raise_remote_err(re)
cs: CancelScope = ctx._scope
if cs.cancel_called:
if rpc_ctx_cs.cancel_called:
canceller: tuple = ctx.canceller
explain: str = f'{ctx.side!r}-side task was cancelled by '
@ -680,9 +691,15 @@ async def _invoke(
elif canceller == ctx.chan.uid:
explain += f'its {ctx.peer_side!r}-side peer'
else:
elif canceller == our_uid:
explain += 'itself'
elif canceller:
explain += 'a remote peer'
else:
explain += 'an unknown cause?'
explain += (
add_div(message=explain)
+
@ -911,7 +928,10 @@ async def process_messages(
f'IPC msg from peer\n'
f'<= {chan.uid}\n\n'
# TODO: avoid fmting depending on loglevel for perf?
# TODO: use of the pprinting of structs is
# FRAGILE and should prolly not be
#
# avoid fmting depending on loglevel for perf?
# -[ ] specifically `pretty_struct.pformat()` sub-call..?
# - how to only log-level-aware actually call this?
# -[ ] use `.msg.pretty_struct` here now instead!
@ -1177,7 +1197,7 @@ async def process_messages(
parent_chan=chan,
)
except TransportClosed:
except TransportClosed as tc:
# channels "breaking" (for TCP streams by EOF or 104
# connection-reset) is ok since we don't have a teardown
# handshake for them (yet) and instead we simply bail out of
@ -1185,12 +1205,20 @@ async def process_messages(
# up..
#
# TODO: maybe add a teardown handshake? and,
# -[ ] don't show this msg if it's an ephemeral discovery ep call?
# -[x] don't show this msg if it's an ephemeral discovery ep call?
# |_ see the below `.report_n_maybe_raise()` impl as well as
# tc-exc input details in `MsgpackTCPStream._iter_pkts()`
# for different read-failure cases.
# -[ ] figure out how this will break with other transports?
log.runtime(
f'IPC channel closed abruptly\n'
f'<=x peer: {chan.uid}\n'
f' |_{chan.raddr}\n'
tc.report_n_maybe_raise(
message=(
f'peer IPC channel closed abruptly?\n\n'
f'<=x {chan}\n'
f' |_{chan.raddr}\n\n'
)
+
tc.message
)
# transport **WAS** disconnected
@ -1238,7 +1266,7 @@ async def process_messages(
'Exiting IPC msg loop with final msg\n\n'
f'<= peer: {chan.uid}\n'
f' |_{chan}\n\n'
f'{pretty_struct.pformat(msg)}'
# f'{pretty_struct.pformat(msg)}'
)
log.runtime(message)

View File

@ -59,6 +59,7 @@ import os
import warnings
import trio
from trio._core import _run as trio_runtime
from trio import (
CancelScope,
Nursery,
@ -66,10 +67,11 @@ from trio import (
)
from tractor.msg import (
pretty_struct,
NamespacePath,
types as msgtypes,
MsgType,
NamespacePath,
Stop,
pretty_struct,
types as msgtypes,
)
from ._ipc import Channel
from ._context import (
@ -79,6 +81,7 @@ from ._context import (
from .log import get_logger
from ._exceptions import (
ContextCancelled,
InternalError,
ModuleNotExposed,
MsgTypeError,
unpack_error,
@ -101,6 +104,7 @@ from ._rpc import (
if TYPE_CHECKING:
from ._supervise import ActorNursery
from trio._channel import MemoryChannelState
log = get_logger('tractor')
@ -114,25 +118,26 @@ class Actor:
'''
The fundamental "runtime" concurrency primitive.
An *actor* is the combination of a regular Python process executing
a ``trio`` task tree, communicating with other actors through
"memory boundary portals" - which provide a native async API around
IPC transport "channels" which themselves encapsulate various
(swappable) network protocols.
An "actor" is the combination of a regular Python process
executing a `trio.run()` task tree, communicating with other
"actors" through "memory boundary portals": `Portal`, which
provide a high-level async API around IPC "channels" (`Channel`)
which themselves encapsulate various (swappable) network
transport protocols for sending msgs between said memory domains
(processes, hosts, non-GIL threads).
Each "actor" is ``trio.run()`` scheduled "runtime" composed of
many concurrent tasks in a single thread. The "runtime" tasks
conduct a slew of low(er) level functions to make it possible
for message passing between actors as well as the ability to
create new actors (aka new "runtimes" in new processes which
are supervised via a nursery construct). Each task which sends
messages to a task in a "peer" (not necessarily a parent-child,
Each "actor" is `trio.run()` scheduled "runtime" composed of many
concurrent tasks in a single thread. The "runtime" tasks conduct
a slew of low(er) level functions to make it possible for message
passing between actors as well as the ability to create new
actors (aka new "runtimes" in new processes which are supervised
via an "actor-nursery" construct). Each task which sends messages
to a task in a "peer" actor (not necessarily a parent-child,
depth hierarchy) is able to do so via an "address", which maps
IPC connections across memory boundaries, and a task request id
which allows for per-actor tasks to send and receive messages
to specific peer-actor tasks with which there is an ongoing
RPC/IPC dialog.
which allows for per-actor tasks to send and receive messages to
specific peer-actor tasks with which there is an ongoing RPC/IPC
dialog.
'''
# ugh, we need to get rid of this and replace with a "registry" sys
@ -229,17 +234,20 @@ class Actor:
# by the user (currently called the "arbiter")
self._spawn_method: str = spawn_method
self._peers: defaultdict = defaultdict(list)
self._peers: defaultdict[
str, # uaid
list[Channel], # IPC conns from peer
] = defaultdict(list)
self._peer_connected: dict[tuple[str, str], trio.Event] = {}
self._no_more_peers = trio.Event()
self._no_more_peers.set()
# RPC state
self._ongoing_rpc_tasks = trio.Event()
self._ongoing_rpc_tasks.set()
# (chan, cid) -> (cancel_scope, func)
self._rpc_tasks: dict[
tuple[Channel, str],
tuple[Context, Callable, trio.Event]
tuple[Channel, str], # (chan, cid)
tuple[Context, Callable, trio.Event] # (ctx=>, fn(), done?)
] = {}
# map {actor uids -> Context}
@ -316,7 +324,10 @@ class Actor:
event = self._peer_connected.setdefault(uid, trio.Event())
await event.wait()
log.debug(f'{uid!r} successfully connected back to us')
return event, self._peers[uid][-1]
return (
event,
self._peers[uid][-1],
)
def load_modules(
self,
@ -407,26 +418,11 @@ class Actor:
'''
self._no_more_peers = trio.Event() # unset by making new
chan = Channel.from_stream(stream)
their_uid: tuple[str, str]|None = chan.uid
con_status: str = ''
# TODO: remove this branch since can never happen?
# NOTE: `.uid` is only set after first contact
if their_uid:
con_status = (
'IPC Re-connection from already known peer?\n'
)
else:
con_status = (
'New inbound IPC connection <=\n'
)
con_status += (
con_status: str = (
'New inbound IPC connection <=\n'
f'|_{chan}\n'
# f' |_@{chan.raddr}\n\n'
# ^-TODO-^ remove since alfready in chan.__repr__()?
)
# send/receive initial handshake response
try:
uid: tuple|None = await self._do_handshake(chan)
@ -438,10 +434,10 @@ class Actor:
TransportClosed,
):
# XXX: This may propagate up from ``Channel._aiter_recv()``
# and ``MsgpackStream._inter_packets()`` on a read from the
# XXX: This may propagate up from `Channel._aiter_recv()`
# and `MsgpackStream._inter_packets()` on a read from the
# stream particularly when the runtime is first starting up
# inside ``open_root_actor()`` where there is a check for
# inside `open_root_actor()` where there is a check for
# a bound listener on the "arbiter" addr. the reset will be
# because the handshake was never meant took place.
log.runtime(
@ -451,9 +447,22 @@ class Actor:
)
return
familiar: str = 'new-peer'
if _pre_chan := self._peers.get(uid):
familiar: str = 'pre-existing-peer'
uid_short: str = f'{uid[0]}[{uid[1][-6:]}]'
con_status += (
f' -> Handshake with actor `{uid[0]}[{uid[1][-6:]}]` complete\n'
f' -> Handshake with {familiar} `{uid_short}` complete\n'
)
if _pre_chan:
log.warning(
# con_status += (
# ^TODO^ swap once we minimize conn duplication
f' -> Wait, we already have IPC with `{uid_short}`??\n'
f' |_{_pre_chan}\n'
)
# IPC connection tracking for both peers and new children:
# - if this is a new channel to a locally spawned
# sub-actor there will be a spawn wait even registered
@ -506,8 +515,9 @@ class Actor:
)
except trio.Cancelled:
log.cancel(
'IPC transport msg loop was cancelled for \n'
f'|_{chan}\n'
'IPC transport msg loop was cancelled\n'
f'c)>\n'
f' |_{chan}\n'
)
raise
@ -544,8 +554,9 @@ class Actor:
):
log.cancel(
'Waiting on cancel request to peer\n'
f'`Portal.cancel_actor()` => {chan.uid}\n'
'Waiting on cancel request to peer..\n'
f'c)=>\n'
f' |_{chan.uid}\n'
)
# XXX: this is a soft wait on the channel (and its
@ -642,12 +653,18 @@ class Actor:
# and
an_exit_cs.cancelled_caught
):
log.warning(
report: str = (
'Timed out waiting on local actor-nursery to exit?\n'
f'{local_nursery}\n'
f' |_{pformat(local_nursery._children)}\n'
f'c)>\n'
f' |_{local_nursery}\n'
)
# await _debug.pause()
if children := local_nursery._children:
# indent from above local-nurse repr
report += (
f' |_{pformat(children)}\n'
)
log.warning(report)
if disconnected:
# if the transport died and this actor is still
@ -819,14 +836,17 @@ class Actor:
# side,
)]
except KeyError:
log.warning(
report: str = (
'Ignoring invalid IPC ctx msg!\n\n'
f'<= sender: {uid}\n\n'
# XXX don't need right since it's always in msg?
# f'=> cid: {cid}\n\n'
f'{pretty_struct.pformat(msg)}\n'
f'<=? {uid}\n\n'
f' |_{pretty_struct.pformat(msg)}\n'
)
match msg:
case Stop():
log.runtime(report)
case _:
log.warning(report)
return
# if isinstance(msg, MsgTypeError):
@ -880,11 +900,15 @@ class Actor:
f'peer: {chan.uid}\n'
f'cid:{cid}\n'
)
ctx._allow_overruns = allow_overruns
ctx._allow_overruns: bool = allow_overruns
# adjust buffer size if specified
state = ctx._send_chan._state # type: ignore
if msg_buffer_size and state.max_buffer_size != msg_buffer_size:
state: MemoryChannelState = ctx._send_chan._state # type: ignore
if (
msg_buffer_size
and
state.max_buffer_size != msg_buffer_size
):
state.max_buffer_size = msg_buffer_size
except KeyError:
@ -1046,6 +1070,10 @@ class Actor:
# TODO: another `Struct` for rtvs..
rvs: dict[str, Any] = spawnspec._runtime_vars
if rvs['_debug_mode']:
from .devx import (
enable_stack_on_sig,
maybe_init_greenback,
)
try:
# TODO: maybe return some status msgs upward
# to that we can emit them in `con_status`
@ -1053,14 +1081,57 @@ class Actor:
log.devx(
'Enabling `stackscope` traces on SIGUSR1'
)
from .devx import enable_stack_on_sig
enable_stack_on_sig()
except ImportError:
log.warning(
'`stackscope` not installed for use in debug mode!'
)
rvs['_is_root'] = False
if rvs.get('use_greenback', False):
maybe_mod: ModuleType|None = await maybe_init_greenback()
if maybe_mod:
log.devx(
'Activated `greenback` '
'for `tractor.pause_from_sync()` support!'
)
else:
rvs['use_greenback'] = False
log.warning(
'`greenback` not installed for use in debug mode!\n'
'`tractor.pause_from_sync()` not available!'
)
# XXX ensure the "infected `asyncio` mode" setting
# passed down from our spawning parent is consistent
# with `trio`-runtime initialization:
# - during sub-proc boot, the entrypoint func
# (`._entry.<spawn_backend>_main()`) should set
# `._infected_aio = True` before calling
# `run_as_asyncio_guest()`,
# - the value of `infect_asyncio: bool = True` as
# passed to `ActorNursery.start_actor()` must be
# the same as `_runtime_vars['_is_infected_aio']`
if (
(aio_rtv := rvs['_is_infected_aio'])
!=
(aio_attr := self._infected_aio)
):
raise InternalError(
'Parent sent runtime-vars that mismatch for the '
'"infected `asyncio` mode" settings ?!?\n\n'
f'rvs["_is_infected_aio"] = {aio_rtv}\n'
f'self._infected_aio = {aio_attr}\n'
)
if aio_rtv:
assert trio_runtime.GLOBAL_RUN_CONTEXT.runner.is_guest
# ^TODO^ possibly add a `sniffio` or
# `trio` pub-API for `is_guest_mode()`?
rvs['_is_root'] = False # obvi XD
# update process-wide globals
_state._runtime_vars.update(rvs)
# XXX: ``msgspec`` doesn't support serializing tuples
@ -1211,8 +1282,9 @@ class Actor:
# TODO: just use the new `Context.repr_rpc: str` (and
# other) repr fields instead of doing this all manual..
msg: str = (
f'Runtime cancel request from {requester_type}:\n\n'
f'<= .cancel(): {requesting_uid}\n\n'
f'Actor-runtime cancel request from {requester_type}\n\n'
f'<=c) {requesting_uid}\n'
f' |_{self}\n'
)
# TODO: what happens here when we self-cancel tho?
@ -1320,10 +1392,11 @@ class Actor:
return True
log.cancel(
'Cancel request for RPC task\n\n'
f'<= Actor._cancel_task(): {requesting_uid}\n\n'
f'=> {ctx._task}\n'
f' |_ >> {ctx.repr_rpc}\n'
'Rxed cancel request for RPC task\n'
f'<=c) {requesting_uid}\n'
f' |_{ctx._task}\n'
f' >> {ctx.repr_rpc}\n'
# f'=> {ctx._task}\n'
# f' >> Actor._cancel_task() => {ctx._task}\n'
# f' |_ {ctx._task}\n\n'
@ -1439,17 +1512,17 @@ class Actor:
"IPC channel's "
)
rent_chan_repr: str = (
f' |_{parent_chan}\n\n'
f' |_{parent_chan}\n\n'
if parent_chan
else ''
)
log.cancel(
f'Cancelling {descr} RPC tasks\n\n'
f'<= canceller: {req_uid}\n'
f'<=c) {req_uid} [canceller]\n'
f'{rent_chan_repr}'
f'=> cancellee: {self.uid}\n'
f' |_{self}.cancel_rpc_tasks()\n'
f' |_tasks: {len(tasks)}\n'
f'c)=> {self.uid} [cancellee]\n'
f' |_{self} [with {len(tasks)} tasks]\n'
# f' |_tasks: {len(tasks)}\n'
# f'{tasks_str}'
)
for (
@ -1518,7 +1591,7 @@ class Actor:
def accept_addr(self) -> tuple[str, int]:
'''
Primary address to which the IPC transport server is
bound.
bound and listening for new connections.
'''
# throws OSError on failure
@ -1535,6 +1608,7 @@ class Actor:
def get_chans(
self,
uid: tuple[str, str],
) -> list[Channel]:
'''
Return all IPC channels to the actor with provided `uid`.
@ -1717,8 +1791,8 @@ async def async_main(
# Register with the arbiter if we're told its addr
log.runtime(
f'Registering `{actor.name}` ->\n'
f'{pformat(accept_addrs)}'
f'Registering `{actor.name}` => {pformat(accept_addrs)}\n'
# ^-TODO-^ we should instead show the maddr here^^
)
# TODO: ideally we don't fan out to all registrars
@ -1776,57 +1850,90 @@ async def async_main(
# Blocks here as expected until the root nursery is
# killed (i.e. this actor is cancelled or signalled by the parent)
except Exception as err:
log.runtime("Closing all actor lifetime contexts")
actor.lifetime_stack.close()
except Exception as internal_err:
if not is_registered:
err_report: str = (
'\n'
"Actor runtime (internally) failed BEFORE contacting the registry?\n"
f'registrars -> {actor.reg_addrs} ?!?!\n\n'
'^^^ THIS IS PROBABLY AN INTERNAL `tractor` BUG! ^^^\n\n'
'\t>> CALMLY CANCEL YOUR CHILDREN AND CALL YOUR PARENTS <<\n\n'
'\tIf this is a sub-actor hopefully its parent will keep running '
'and cancel/reap this sub-process..\n'
'(well, presuming this error was propagated upward)\n\n'
'\t---------------------------------------------\n'
'\tPLEASE REPORT THIS TRACEBACK IN A BUG REPORT @ ' # oneline
'https://github.com/goodboy/tractor/issues\n'
'\t---------------------------------------------\n'
)
# TODO: I guess we could try to connect back
# to the parent through a channel and engage a debugger
# once we have that all working with std streams locking?
log.exception(
f"Actor errored and failed to register with arbiter "
f"@ {actor.reg_addrs[0]}?")
log.error(
"\n\n\t^^^ THIS IS PROBABLY AN INTERNAL `tractor` BUG! ^^^\n\n"
"\t>> CALMLY CALL THE AUTHORITIES AND HIDE YOUR CHILDREN <<\n\n"
"\tIf this is a sub-actor hopefully its parent will keep running "
"correctly presuming this error was safely ignored..\n\n"
"\tPLEASE REPORT THIS TRACEBACK IN A BUG REPORT: "
"https://github.com/goodboy/tractor/issues\n"
)
log.exception(err_report)
if actor._parent_chan:
await try_ship_error_to_remote(
actor._parent_chan,
err,
internal_err,
)
# always!
match err:
match internal_err:
case ContextCancelled():
log.cancel(
f'Actor: {actor.uid} was task-context-cancelled with,\n'
f'str(err)'
f'str(internal_err)'
)
case _:
log.exception("Actor errored:")
raise
log.exception(
'Main actor-runtime task errored\n'
f'<x)\n'
f' |_{actor}\n'
)
raise internal_err
finally:
log.runtime(
'Runtime nursery complete'
'-> Closing all actor lifetime contexts..'
teardown_report: str = (
'Main actor-runtime task completed\n'
)
# tear down all lifetime contexts if not in guest mode
# XXX: should this just be in the entrypoint?
actor.lifetime_stack.close()
# TODO: we can't actually do this bc the debugger
# uses the _service_n to spawn the lock task, BUT,
# in theory if we had the root nursery surround this finally
# block it might be actually possible to debug THIS
# machinery in the same way as user task code?
# ?TODO? should this be in `._entry`/`._root` mods instead?
#
# teardown any actor-lifetime-bound contexts
ls: ExitStack = actor.lifetime_stack
# only report if there are any registered
cbs: list[Callable] = [
repr(tup[1].__wrapped__)
for tup in ls._exit_callbacks
]
if cbs:
cbs_str: str = '\n'.join(cbs)
teardown_report += (
'-> Closing actor-lifetime-bound callbacks\n\n'
f'}}>\n'
f' |_{ls}\n'
f' |_{cbs_str}\n'
)
# XXX NOTE XXX this will cause an error which
# prevents any `infected_aio` actor from continuing
# and any callbacks in the `ls` here WILL NOT be
# called!!
# await _debug.pause(shield=True)
ls.close()
# XXX TODO but hard XXX
# we can't actually do this bc the debugger uses the
# _service_n to spawn the lock task, BUT, in theory if we had
# the root nursery surround this finally block it might be
# actually possible to debug THIS machinery in the same way
# as user task code?
#
# if actor.name == 'brokerd.ib':
# with CancelScope(shield=True):
# await _debug.breakpoint()
@ -1856,9 +1963,9 @@ async def async_main(
failed = True
if failed:
log.warning(
f'Failed to unregister {actor.name} from '
f'registar @ {addr}'
teardown_report += (
f'-> Failed to unregister {actor.name} from '
f'registar @ {addr}\n'
)
# Ensure all peers (actors connected to us as clients) are finished
@ -1866,13 +1973,23 @@ async def async_main(
if any(
chan.connected() for chan in chain(*actor._peers.values())
):
log.runtime(
f"Waiting for remaining peers {actor._peers} to clear")
teardown_report += (
f'-> Waiting for remaining peers {actor._peers} to clear..\n'
)
log.runtime(teardown_report)
with CancelScope(shield=True):
await actor._no_more_peers.wait()
log.runtime("All peer channels are complete")
log.runtime("Runtime completed")
teardown_report += (
'-> All peer channels are complete\n'
)
teardown_report += (
'Actor runtime exiting\n'
f'>)\n'
f'|_{actor}\n'
)
log.info(teardown_report)
# TODO: rename to `Registry` and move to `._discovery`!

View File

@ -149,7 +149,7 @@ async def exhaust_portal(
# XXX: streams should never be reaped here since they should
# always be established and shutdown using a context manager api
final: Any = await portal.result()
final: Any = await portal.wait_for_result()
except (
Exception,
@ -223,8 +223,8 @@ async def cancel_on_completion(
async def hard_kill(
proc: trio.Process,
terminate_after: int = 1.6,
terminate_after: int = 1.6,
# NOTE: for mucking with `.pause()`-ing inside the runtime
# whilst also hacking on it XD
# terminate_after: int = 99999,
@ -246,8 +246,9 @@ async def hard_kill(
'''
log.cancel(
'Terminating sub-proc:\n'
f'|_{proc}\n'
'Terminating sub-proc\n'
f'>x)\n'
f' |_{proc}\n'
)
# NOTE: this timeout used to do nothing since we were shielding
# the ``.wait()`` inside ``new_proc()`` which will pretty much
@ -293,8 +294,8 @@ async def hard_kill(
log.critical(
# 'Well, the #ZOMBIE_LORD_IS_HERE# to collect\n'
'#T-800 deployed to collect zombie B0\n'
f'|\n'
f'|_{proc}\n'
f'>x)\n'
f' |_{proc}\n'
)
proc.kill()
@ -322,8 +323,9 @@ async def soft_kill(
uid: tuple[str, str] = portal.channel.uid
try:
log.cancel(
'Soft killing sub-actor via `Portal.cancel_actor()`\n'
f'|_{proc}\n'
'Soft killing sub-actor via portal request\n'
f'c)> {portal.chan.uid}\n'
f' |_{proc}\n'
)
# wait on sub-proc to signal termination
await wait_func(proc)
@ -552,8 +554,9 @@ async def trio_proc(
# cancel result waiter that may have been spawned in
# tandem if not done already
log.cancel(
'Cancelling existing result waiter task for '
f'{subactor.uid}'
'Cancelling portal result reaper task\n'
f'>c)\n'
f' |_{subactor.uid}\n'
)
nursery.cancel_scope.cancel()
@ -562,7 +565,11 @@ async def trio_proc(
# allowed! Do this **after** cancellation/teardown to avoid
# killing the process too early.
if proc:
log.cancel(f'Hard reap sequence starting for {subactor.uid}')
log.cancel(
f'Hard reap sequence starting for subactor\n'
f'>x)\n'
f' |_{subactor}@{subactor.uid}\n'
)
with trio.CancelScope(shield=True):
# don't clobber an ongoing pdb

View File

@ -44,7 +44,9 @@ _runtime_vars: dict[str, Any] = {
'_root_mailbox': (None, None),
'_registry_addrs': [],
# for `breakpoint()` support
'_is_infected_aio': False,
# for `tractor.pause_from_sync()` & `breakpoint()` support
'use_greenback': False,
}
@ -70,7 +72,8 @@ def current_actor(
'''
if (
err_on_no_runtime
and _current_actor is None
and
_current_actor is None
):
msg: str = 'No local actor has been initialized yet?\n'
from ._exceptions import NoRuntime

View File

@ -36,8 +36,8 @@ import warnings
import trio
from ._exceptions import (
# _raise_from_no_key_in_msg,
ContextCancelled,
RemoteActorError,
)
from .log import get_logger
from .trionics import (
@ -101,7 +101,7 @@ class MsgStream(trio.abc.Channel):
@property
def ctx(self) -> Context:
'''
This stream's IPC `Context` ref.
A read-only ref to this stream's inter-actor-task `Context`.
'''
return self._ctx
@ -145,9 +145,8 @@ class MsgStream(trio.abc.Channel):
'''
__tracebackhide__: bool = hide_tb
# NOTE: `trio.ReceiveChannel` implements
# EOC handling as follows (aka uses it
# to gracefully exit async for loops):
# NOTE FYI: `trio.ReceiveChannel` implements EOC handling as
# follows (aka uses it to gracefully exit async for loops):
#
# async def __anext__(self) -> ReceiveType:
# try:
@ -165,48 +164,29 @@ class MsgStream(trio.abc.Channel):
src_err: Exception|None = None # orig tb
try:
ctx: Context = self._ctx
return await ctx._pld_rx.recv_pld(ipc=self)
# XXX: the stream terminates on either of:
# - via `self._rx_chan.receive()` raising after manual closure
# by the rpc-runtime OR,
# - via a received `{'stop': ...}` msg from remote side.
# |_ NOTE: previously this was triggered by calling
# ``._rx_chan.aclose()`` on the send side of the channel inside
# `Actor._deliver_ctx_payload()`, but now the 'stop' message handling
# has been put just above inside `_raise_from_no_key_in_msg()`.
except (
trio.EndOfChannel,
) as eoc:
src_err = eoc
# - `self._rx_chan.receive()` raising after manual closure
# by the rpc-runtime,
# OR
# - via a `Stop`-msg received from remote peer task.
# NOTE
# |_ previously this was triggered by calling
# ``._rx_chan.aclose()`` on the send side of the channel
# inside `Actor._deliver_ctx_payload()`, but now the 'stop'
# message handling gets delegated to `PldRFx.recv_pld()`
# internals.
except trio.EndOfChannel as eoc:
# a graceful stream finished signal
self._eoc = eoc
src_err = eoc
# TODO: Locally, we want to close this stream gracefully, by
# terminating any local consumers tasks deterministically.
# Once we have broadcast support, we **don't** want to be
# closing this stream and not flushing a final value to
# remaining (clone) consumers who may not have been
# scheduled to receive it yet.
# try:
# maybe_err_msg_or_res: dict = self._rx_chan.receive_nowait()
# if maybe_err_msg_or_res:
# log.warning(
# 'Discarding un-processed msg:\n'
# f'{maybe_err_msg_or_res}'
# )
# except trio.WouldBlock:
# # no queued msgs that might be another remote
# # error, so just raise the original EoC
# pass
# raise eoc
# a ``ClosedResourceError`` indicates that the internal
# feeder memory receive channel was closed likely by the
# runtime after the associated transport-channel
# disconnected or broke.
# a `ClosedResourceError` indicates that the internal feeder
# memory receive channel was closed likely by the runtime
# after the associated transport-channel disconnected or
# broke.
except trio.ClosedResourceError as cre: # by self._rx_chan.receive()
src_err = cre
log.warning(
@ -218,14 +198,15 @@ class MsgStream(trio.abc.Channel):
# terminated and signal this local iterator to stop
drained: list[Exception|dict] = await self.aclose()
if drained:
# ?TODO? pass these to the `._ctx._drained_msgs: deque`
# and then iterate them as part of any `.wait_for_result()` call?
#
# from .devx import pause
# await pause()
log.warning(
'Drained context msgs during closure:\n'
'Drained context msgs during closure\n\n'
f'{drained}'
)
# TODO: pass these to the `._ctx._drained_msgs: deque`
# and then iterate them as part of any `.result()` call?
# NOTE XXX: if the context was cancelled or remote-errored
# but we received the stream close msg first, we
@ -238,28 +219,36 @@ class MsgStream(trio.abc.Channel):
from_src_exc=src_err,
)
# propagate any error but hide low-level frame details
# from the caller by default for debug noise reduction.
# propagate any error but hide low-level frame details from
# the caller by default for console/debug-REPL noise
# reduction.
if (
hide_tb
and (
# XXX NOTE XXX don't reraise on certain
# stream-specific internal error types like,
#
# - `trio.EoC` since we want to use the exact instance
# to ensure that it is the error that bubbles upward
# for silent absorption by `Context.open_stream()`.
and not self._eoc
# XXX NOTE special conditions: don't reraise on
# certain stream-specific internal error types like,
#
# - `trio.EoC` since we want to use the exact instance
# to ensure that it is the error that bubbles upward
# for silent absorption by `Context.open_stream()`.
not self._eoc
# - `RemoteActorError` (or `ContextCancelled`) if it gets
# raised from `_raise_from_no_key_in_msg()` since we
# want the same (as the above bullet) for any
# `.open_context()` block bubbled error raised by
# any nearby ctx API remote-failures.
# and not isinstance(src_err, RemoteActorError)
# - `RemoteActorError` (or subtypes like ctxc)
# since we want to present the error as though it is
# "sourced" directly from this `.receive()` call and
# generally NOT include the stack frames raised from
# inside the `PldRx` and/or the transport stack
# layers.
or isinstance(src_err, RemoteActorError)
)
):
raise type(src_err)(*src_err.args) from src_err
else:
# for any non-graceful-EOC we want to NOT hide this frame
if not self._eoc:
__tracebackhide__: bool = False
raise src_err
async def aclose(self) -> list[Exception|dict]:
@ -385,6 +374,8 @@ class MsgStream(trio.abc.Channel):
if not self._eoc:
message: str = (
f'Stream self-closed by {self._ctx.side!r}-side before EoC\n'
# } bc a stream is a "scope"/msging-phase inside an IPC
f'x}}>\n'
f'|_{self}\n'
)
log.cancel(message)

View File

@ -80,6 +80,7 @@ class ActorNursery:
'''
def __init__(
self,
# TODO: maybe def these as fields of a struct looking type?
actor: Actor,
ria_nursery: trio.Nursery,
da_nursery: trio.Nursery,
@ -88,8 +89,10 @@ class ActorNursery:
) -> None:
# self.supervisor = supervisor # TODO
self._actor: Actor = actor
self._ria_nursery = ria_nursery
# TODO: rename to `._tn` for our conventional "task-nursery"
self._da_nursery = da_nursery
self._children: dict[
tuple[str, str],
tuple[
@ -98,15 +101,13 @@ class ActorNursery:
Portal | None,
]
] = {}
# portals spawned with ``run_in_actor()`` are
# cancelled when their "main" result arrives
self._cancel_after_result_on_exit: set = set()
self.cancelled: bool = False
self._join_procs = trio.Event()
self._at_least_one_child_in_debug: bool = False
self.errors = errors
self.exited = trio.Event()
self._scope_error: BaseException|None = None
self.exited = trio.Event()
# NOTE: when no explicit call is made to
# `.open_root_actor()` by application code,
@ -116,6 +117,13 @@ class ActorNursery:
# and syncing purposes to any actor opened nurseries.
self._implicit_runtime_started: bool = False
# TODO: remove the `.run_in_actor()` API and thus this 2ndary
# nursery when that API get's moved outside this primitive!
self._ria_nursery = ria_nursery
# portals spawned with ``run_in_actor()`` are
# cancelled when their "main" result arrives
self._cancel_after_result_on_exit: set = set()
async def start_actor(
self,
name: str,
@ -126,10 +134,14 @@ class ActorNursery:
rpc_module_paths: list[str]|None = None,
enable_modules: list[str]|None = None,
loglevel: str|None = None, # set log level per subactor
nursery: trio.Nursery|None = None,
debug_mode: bool|None = None,
infect_asyncio: bool = False,
# TODO: ideally we can rm this once we no longer have
# a `._ria_nursery` since the dependent APIs have been
# removed!
nursery: trio.Nursery|None = None,
) -> Portal:
'''
Start a (daemon) actor: an process that has no designated
@ -146,6 +158,7 @@ class ActorNursery:
# configure and pass runtime state
_rtv = _state._runtime_vars.copy()
_rtv['_is_root'] = False
_rtv['_is_infected_aio'] = infect_asyncio
# allow setting debug policy per actor
if debug_mode is not None:
@ -200,6 +213,7 @@ class ActorNursery:
# |_ dynamic @context decoration on child side
# |_ implicit `Portal.open_context() as (ctx, first):`
# and `return first` on parent side.
# |_ mention how it's similar to `trio-parallel` API?
# -[ ] use @api_frame on the wrapper
async def run_in_actor(
self,
@ -269,11 +283,14 @@ class ActorNursery:
) -> None:
'''
Cancel this nursery by instructing each subactor to cancel
itself and wait for all subactors to terminate.
Cancel this actor-nursery by instructing each subactor's
runtime to cancel and wait for all underlying sub-processes
to terminate.
If ``hard_killl`` is set to ``True`` then kill the processes
directly without any far end graceful ``trio`` cancellation.
If `hard_kill` is set then kill the processes directly using
the spawning-backend's API/OS-machinery without any attempt
at (graceful) `trio`-style cancellation using our
`Actor.cancel()`.
'''
__runtimeframe__: int = 1 # noqa
@ -357,11 +374,12 @@ class ActorNursery:
@acm
async def _open_and_supervise_one_cancels_all_nursery(
actor: Actor,
tb_hide: bool = False,
) -> typing.AsyncGenerator[ActorNursery, None]:
# normally don't need to show user by default
__tracebackhide__: bool = True
__tracebackhide__: bool = tb_hide
outer_err: BaseException|None = None
inner_err: BaseException|None = None
@ -629,8 +647,12 @@ async def open_nursery(
f'|_{an}\n'
)
# shutdown runtime if it was started
if implicit_runtime:
# shutdown runtime if it was started and report noisly
# that we're did so.
msg += '=> Shutting down actor runtime <=\n'
log.info(msg)
log.info(msg)
else:
# keep noise low during std operation.
log.runtime(msg)

View File

@ -54,6 +54,25 @@ def examples_dir() -> pathlib.Path:
return repodir() / 'examples'
def mk_cmd(
ex_name: str,
exs_subpath: str = 'debugging',
) -> str:
'''
Generate a shell command suitable to pass to ``pexpect.spawn()``.
'''
script_path: pathlib.Path = (
examples_dir()
/ exs_subpath
/ f'{ex_name}.py'
)
return ' '.join([
'python',
str(script_path)
])
@acm
async def expect_ctxc(
yay: bool,

View File

@ -26,9 +26,10 @@ from ._debug import (
breakpoint as breakpoint,
pause as pause,
pause_from_sync as pause_from_sync,
shield_sigint_handler as shield_sigint_handler,
sigint_shield as sigint_shield,
open_crash_handler as open_crash_handler,
maybe_open_crash_handler as maybe_open_crash_handler,
maybe_init_greenback as maybe_init_greenback,
post_mortem as post_mortem,
mk_pdb as mk_pdb,
)

File diff suppressed because it is too large Load Diff

View File

@ -24,13 +24,24 @@ disjoint, parallel executing tasks in separate actors.
'''
from __future__ import annotations
# from functools import partial
from threading import (
current_thread,
Thread,
RLock,
)
import multiprocessing as mp
from signal import (
signal,
getsignal,
SIGUSR1,
)
import traceback
from typing import TYPE_CHECKING
# import traceback
from types import ModuleType
from typing import (
Callable,
TYPE_CHECKING,
)
import trio
from tractor import (
@ -51,26 +62,45 @@ if TYPE_CHECKING:
@trio.lowlevel.disable_ki_protection
def dump_task_tree() -> None:
import stackscope
from tractor.log import get_console_log
'''
Do a classic `stackscope.extract()` task-tree dump to console at
`.devx()` level.
'''
import stackscope
tree_str: str = str(
stackscope.extract(
trio.lowlevel.current_root_task(),
recurse_child_tasks=True
)
)
log = get_console_log(
name=__name__,
level='cancel',
)
actor: Actor = _state.current_actor()
thr: Thread = current_thread()
log.devx(
f'Dumping `stackscope` tree for actor\n'
f'{actor.name}: {actor}\n'
f' |_{mp.current_process()}\n\n'
f'{actor.uid}:\n'
f'|_{mp.current_process()}\n'
f' |_{thr}\n'
f' |_{actor}\n\n'
# start-of-trace-tree delimiter (mostly for testing)
'------ - ------\n'
'\n'
+
f'{tree_str}\n'
+
# end-of-trace-tree delimiter (mostly for testing)
f'\n'
f'------ {actor.uid!r} ------\n'
)
# TODO: can remove this right?
# -[ ] was original code from author
#
# print(
# 'DUMPING FROM PRINT\n'
# +
# content
# )
# import logging
# try:
# with open("/dev/tty", "w") as tty:
@ -80,58 +110,130 @@ def dump_task_tree() -> None:
# "task_tree"
# ).exception("Error printing task tree")
_handler_lock = RLock()
_tree_dumped: bool = False
def signal_handler(
def dump_tree_on_sig(
sig: int,
frame: object,
relay_to_subs: bool = True,
) -> None:
try:
trio.lowlevel.current_trio_token(
).run_sync_soon(dump_task_tree)
except RuntimeError:
# not in async context -- print a normal traceback
traceback.print_stack()
global _tree_dumped, _handler_lock
with _handler_lock:
if _tree_dumped:
log.warning(
'Already dumped for this actor...??'
)
return
_tree_dumped = True
# actor: Actor = _state.current_actor()
log.devx(
'Trying to dump `stackscope` tree..\n'
)
try:
dump_task_tree()
# await actor._service_n.start_soon(
# partial(
# trio.to_thread.run_sync,
# dump_task_tree,
# )
# )
# trio.lowlevel.current_trio_token().run_sync_soon(
# dump_task_tree
# )
except RuntimeError:
log.exception(
'Failed to dump `stackscope` tree..\n'
)
# not in async context -- print a normal traceback
# traceback.print_stack()
raise
except BaseException:
log.exception(
'Failed to dump `stackscope` tree..\n'
)
raise
log.devx(
'Supposedly we dumped just fine..?'
)
if not relay_to_subs:
return
an: ActorNursery
for an in _state.current_actor()._actoruid2nursery.values():
subproc: ProcessType
subactor: Actor
for subactor, subproc, _ in an._children.values():
log.devx(
log.warning(
f'Relaying `SIGUSR1`[{sig}] to sub-actor\n'
f'{subactor}\n'
f' |_{subproc}\n'
)
if isinstance(subproc, trio.Process):
subproc.send_signal(sig)
# bc of course stdlib can't have a std API.. XD
match subproc:
case trio.Process():
subproc.send_signal(sig)
elif isinstance(subproc, mp.Process):
subproc._send_signal(sig)
case mp.Process():
subproc._send_signal(sig)
def enable_stack_on_sig(
sig: int = SIGUSR1
) -> None:
sig: int = SIGUSR1,
) -> ModuleType:
'''
Enable `stackscope` tracing on reception of a signal; by
default this is SIGUSR1.
HOT TIP: a task/ctx-tree dump can be triggered from a shell with
fancy cmds.
For ex. from `bash` using `pgrep` and cmd-sustitution
(https://www.gnu.org/software/bash/manual/bash.html#Command-Substitution)
you could use:
>> kill -SIGUSR1 $(pgrep -f '<cmd>')
Or with with `xonsh` (which has diff capture-from-subproc syntax)
>> kill -SIGUSR1 @$(pgrep -f '<cmd>')
'''
try:
import stackscope
except ImportError:
log.warning(
'`stackscope` not installed for use in debug mode!'
)
return None
handler: Callable|int = getsignal(sig)
if handler is dump_tree_on_sig:
log.devx(
'A `SIGUSR1` handler already exists?\n'
f'|_ {handler!r}\n'
)
return
signal(
sig,
signal_handler,
dump_tree_on_sig,
)
# NOTE: not the above can be triggered from
# a (xonsh) shell using:
# kill -SIGUSR1 @$(pgrep -f '<cmd>')
#
# for example if you were looking to trace a `pytest` run
# kill -SIGUSR1 @$(pgrep -f 'pytest')
log.devx(
'Enabling trace-trees on `SIGUSR1` '
'since `stackscope` is installed @ \n'
f'{stackscope!r}\n\n'
f'With `SIGUSR1` handler\n'
f'|_{dump_tree_on_sig}\n'
)
return stackscope

View File

@ -54,11 +54,12 @@ LOG_FORMAT = (
DATE_FORMAT = '%b %d %H:%M:%S'
# FYI, ERROR is 40
# TODO: use a `bidict` to avoid the :155 check?
CUSTOM_LEVELS: dict[str, int] = {
'TRANSPORT': 5,
'RUNTIME': 15,
'DEVX': 17,
'CANCEL': 18,
'CANCEL': 22,
'PDB': 500,
}
STD_PALETTE = {
@ -147,6 +148,8 @@ class StackLevelAdapter(LoggerAdapter):
Delegate a log call to the underlying logger, after adding
contextual information from this adapter instance.
NOTE: all custom level methods (above) delegate to this!
'''
if self.isEnabledFor(level):
stacklevel: int = 3

View File

@ -374,7 +374,7 @@ class PldRx(Struct):
case _:
src_err = InternalError(
'Unknown IPC msg ??\n\n'
'Invalid IPC msg ??\n\n'
f'{msg}\n'
)
@ -499,7 +499,7 @@ async def maybe_limit_plds(
yield None
return
# sanity on scoping
# sanity check on IPC scoping
curr_ctx: Context = current_ipc_ctx()
assert ctx is curr_ctx
@ -510,6 +510,8 @@ async def maybe_limit_plds(
) as msgdec:
yield msgdec
# when the applied spec is unwound/removed, the same IPC-ctx
# should still be in scope.
curr_ctx: Context = current_ipc_ctx()
assert ctx is curr_ctx
@ -525,16 +527,26 @@ async def drain_to_final_msg(
list[MsgType]
]:
'''
Drain IPC msgs delivered to the underlying IPC primitive's
rx-mem-chan (eg. `Context._rx_chan`) from the runtime in
search for a final result or error.
Drain IPC msgs delivered to the underlying IPC context's
rx-mem-chan (i.e. from `Context._rx_chan`) in search for a final
`Return` or `Error` msg.
The motivation here is to ideally capture errors during ctxc
conditions where a canc-request/or local error is sent but the
local task also excepts and enters the
`Portal.open_context().__aexit__()` block wherein we prefer to
capture and raise any remote error or ctxc-ack as part of the
`ctx.result()` cleanup and teardown sequence.
Deliver the `Return` + preceding drained msgs (`list[MsgType]`)
as a pair unless an `Error` is found, in which unpack and raise
it.
The motivation here is to always capture any remote error relayed
by the remote peer task during a ctxc condition.
For eg. a ctxc-request may be sent to the peer as part of the
local task's (request for) cancellation but then that same task
**also errors** before executing the teardown in the
`Portal.open_context().__aexit__()` block. In such error-on-exit
cases we want to always capture and raise any delivered remote
error (like an expected ctxc-ACK) as part of the final
`ctx.wait_for_result()` teardown sequence such that the
`Context.outcome` related state always reflect what transpired
even after ctx closure and the `.open_context()` block exit.
'''
__tracebackhide__: bool = hide_tb
@ -572,22 +584,42 @@ async def drain_to_final_msg(
# |_from tractor.devx._debug import pause
# await pause()
# NOTE: we get here if the far end was
# `ContextCancelled` in 2 cases:
# 1. we requested the cancellation and thus
# SHOULD NOT raise that far end error,
# 2. WE DID NOT REQUEST that cancel and thus
# SHOULD RAISE HERE!
except trio.Cancelled as taskc:
except trio.Cancelled as _taskc:
taskc: trio.Cancelled = _taskc
# report when the cancellation wasn't (ostensibly) due to
# RPC operation, some surrounding parent cancel-scope.
if not ctx._scope.cancel_called:
task: trio.lowlevel.Task = trio.lowlevel.current_task()
rent_n: trio.Nursery = task.parent_nursery
if (
(local_cs := rent_n.cancel_scope).cancel_called
):
log.cancel(
'RPC-ctx cancelled by local-parent scope during drain!\n\n'
f'c}}>\n'
f' |_{rent_n}\n'
f' |_.cancel_scope = {local_cs}\n'
f' |_>c}}\n'
f' |_{ctx.pformat(indent=" "*9)}'
# ^TODO, some (other) simpler repr here?
)
__tracebackhide__: bool = False
# CASE 2: mask the local cancelled-error(s)
# only when we are sure the remote error is
# the source cause of this local task's
# cancellation.
ctx.maybe_raise(
# TODO: when use this/
# from_src_exc=taskc,
hide_tb=hide_tb,
from_src_exc=taskc,
# ?TODO? when *should* we use this?
)
# CASE 1: we DID request the cancel we simply
@ -659,7 +691,7 @@ async def drain_to_final_msg(
# Stop()
case Stop():
pre_result_drained.append(msg)
log.cancel(
log.runtime( # normal/expected shutdown transaction
'Remote stream terminated due to "stop" msg:\n\n'
f'{pretty_struct.pformat(msg)}\n'
)
@ -719,13 +751,19 @@ async def drain_to_final_msg(
pre_result_drained.append(msg)
# It's definitely an internal error if any other
# msg type without a`'cid'` field arrives here!
report: str = (
f'Invalid or unknown msg type {type(msg)!r}!?\n'
)
if not msg.cid:
raise InternalError(
'Unexpected cid-missing msg?\n\n'
f'{msg}\n'
report += (
'\nWhich also has no `.cid` field?\n'
)
raise RuntimeError('Unknown msg type: {msg}')
raise MessagingError(
report
+
f'\n{msg}\n'
)
else:
log.cancel(

View File

@ -34,6 +34,9 @@ from pprint import (
saferepr,
)
from tractor.log import get_logger
log = get_logger()
# TODO: auto-gen type sig for input func both for
# type-msgs and logging of RPC tasks?
# taken and modified from:
@ -143,7 +146,13 @@ def pformat(
else: # the `pprint` recursion-safe format:
# https://docs.python.org/3.11/library/pprint.html#pprint.saferepr
val_str: str = saferepr(v)
try:
val_str: str = saferepr(v)
except Exception:
log.exception(
'Failed to `saferepr({type(struct)})` !?\n'
)
return _Struct.__repr__(struct)
# TODO: LOLOL use `textwrap.indent()` instead dawwwwwg!
obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n')
@ -194,12 +203,20 @@ class Struct(
return sin_props
pformat = pformat
# __repr__ = pformat
# __str__ = __repr__ = pformat
# TODO: use a pprint.PrettyPrinter instance around ONLY rendering
# inside a known tty?
# def __repr__(self) -> str:
# ...
__repr__ = pformat
def __repr__(self) -> str:
try:
return pformat(self)
except Exception:
log.exception(
f'Failed to `pformat({type(self)})` !?\n'
)
return _Struct.__repr__(self)
def copy(
self,

View File

@ -18,11 +18,13 @@
Infection apis for ``asyncio`` loops running ``trio`` using guest mode.
'''
from __future__ import annotations
import asyncio
from asyncio.exceptions import CancelledError
from contextlib import asynccontextmanager as acm
from dataclasses import dataclass
import inspect
import traceback
from typing import (
Any,
Callable,
@ -30,25 +32,30 @@ from typing import (
Awaitable,
)
import trio
from outcome import Error
from tractor.log import get_logger
import tractor
from tractor._exceptions import AsyncioCancelled
from tractor._state import (
current_actor,
debug_mode,
)
from tractor.devx import _debug
from tractor._exceptions import AsyncioCancelled
from tractor.log import get_logger
from tractor.trionics._broadcast import (
broadcast_receiver,
BroadcastReceiver,
)
import trio
from outcome import (
Error,
Outcome,
)
log = get_logger(__name__)
__all__ = ['run_task', 'run_as_asyncio_guest']
__all__ = [
'run_task',
'run_as_asyncio_guest',
]
@dataclass
@ -152,16 +159,17 @@ def _run_asyncio_task(
*,
qsize: int = 1,
provide_channels: bool = False,
hide_tb: bool = False,
**kwargs,
) -> LinkedTaskChannel:
'''
Run an ``asyncio`` async function or generator in a task, return
or stream the result back to ``trio``.
or stream the result back to the caller `trio.lowleve.Task`.
'''
__tracebackhide__ = True
if not current_actor().is_infected_aio():
__tracebackhide__: bool = hide_tb
if not tractor.current_actor().is_infected_aio():
raise RuntimeError(
"`infect_asyncio` mode is not enabled!?"
)
@ -172,7 +180,6 @@ def _run_asyncio_task(
to_trio, from_aio = trio.open_memory_channel(qsize) # type: ignore
args = tuple(inspect.getfullargspec(func).args)
if getattr(func, '_tractor_steam_function', None):
# the assumption is that the target async routine accepts the
# send channel then it intends to yield more then one return
@ -222,6 +229,7 @@ def _run_asyncio_task(
try:
result = await coro
except BaseException as aio_err:
chan._aio_err = aio_err
if isinstance(aio_err, CancelledError):
log.runtime(
'`asyncio` task was cancelled..\n'
@ -230,7 +238,6 @@ def _run_asyncio_task(
log.exception(
'`asyncio` task errored\n'
)
chan._aio_err = aio_err
raise
else:
@ -266,10 +273,13 @@ def _run_asyncio_task(
aio_task_complete
)
)
chan._aio_task = task
chan._aio_task: asyncio.Task = task
# XXX TODO XXX get this actually workin.. XD
# maybe setup `greenback` for `asyncio`-side task REPLing
# -[ ] we need logic to setup `greenback` for `asyncio`-side task
# REPLing.. which should normally be nearly the same as for
# `trio`?
# -[ ] add to a new `.devx._greenback.maybe_init_for_asyncio()`?
if (
debug_mode()
and
@ -282,31 +292,38 @@ def _run_asyncio_task(
def cancel_trio(task: asyncio.Task) -> None:
'''
Cancel the calling ``trio`` task on error.
Cancel the calling `trio` task on error.
'''
nonlocal chan
aio_err = chan._aio_err
aio_err: BaseException|None = chan._aio_err
task_err: BaseException|None = None
# only to avoid ``asyncio`` complaining about uncaptured
# only to avoid `asyncio` complaining about uncaptured
# task exceptions
try:
task.exception()
res: Any = task.result()
except BaseException as terr:
task_err = terr
task_err: BaseException = terr
msg: str = (
'Infected `asyncio` task {etype_str}\n'
f'|_{task}\n'
)
if isinstance(terr, CancelledError):
msg += (
f'c)>\n'
f' |_{task}\n'
)
log.cancel(
msg.format(etype_str='cancelled')
)
else:
msg += (
f'x)>\n'
f' |_{task}\n'
)
log.exception(
msg.format(etype_str='cancelled')
msg.format(etype_str='errored')
)
assert type(terr) is type(aio_err), (
@ -326,29 +343,45 @@ def _run_asyncio_task(
if task_err is None:
assert aio_err
aio_err.with_traceback(aio_err.__traceback__)
# log.error(
# 'infected task errorred'
# )
# wait, wut?
# aio_err.with_traceback(aio_err.__traceback__)
# TODO: show that the cancellation originated
# from the ``trio`` side? right?
# elif type(aio_err) is CancelledError:
# TODO: show when cancellation originated
# from each side more pedantically?
# elif (
# type(aio_err) is CancelledError
# and # trio was the cause?
# cancel_scope.cancel_called
# ):
# log.cancel(
# 'infected task was cancelled'
# 'infected task was cancelled by `trio`-side'
# )
# raise aio_err from task_err
# if cancel_scope.cancelled:
# raise aio_err from err
# XXX: alway cancel the scope on error
# in case the trio task is blocking
# on a checkpoint.
# XXX: if not already, alway cancel the scope
# on a task error in case the trio task is blocking on
# a checkpoint.
cancel_scope.cancel()
# raise any ``asyncio`` side error.
if (
task_err
and
aio_err is not task_err
):
raise aio_err from task_err
# raise any `asyncio` side error.
raise aio_err
log.info(
'`trio` received final result from {task}\n'
f'|_{res}\n'
)
# TODO: do we need this?
# if task_err:
# cancel_scope.cancel()
# raise task_err
task.add_done_callback(cancel_trio)
return chan
@ -375,7 +408,9 @@ async def translate_aio_errors(
) -> None:
aio_err = chan._aio_err
if (
aio_err is not None and
aio_err is not None
and
# not isinstance(aio_err, CancelledError)
type(aio_err) != CancelledError
):
# always raise from any captured asyncio error
@ -407,13 +442,17 @@ async def translate_aio_errors(
):
aio_err = chan._aio_err
if (
task.cancelled() and
task.cancelled()
and
type(aio_err) is CancelledError
):
# if an underlying ``asyncio.CancelledError`` triggered this
# if an underlying `asyncio.CancelledError` triggered this
# channel close, raise our (non-``BaseException``) wrapper
# error: ``AsyncioCancelled`` from that source error.
raise AsyncioCancelled from aio_err
raise AsyncioCancelled(
f'Task cancelled\n'
f'|_{task}\n'
) from aio_err
else:
raise
@ -456,8 +495,8 @@ async def run_task(
) -> Any:
'''
Run an ``asyncio`` async function or generator in a task, return
or stream the result back to ``trio``.
Run an `asyncio` async function or generator in a task, return
or stream the result back to `trio`.
'''
# simple async func
@ -515,11 +554,124 @@ async def open_channel_from(
chan._to_trio.close()
def run_as_asyncio_guest(
class AsyncioRuntimeTranslationError(RuntimeError):
'''
We failed to correctly relay runtime semantics and/or maintain SC
supervision rules cross-event-loop.
'''
def run_trio_task_in_future(
async_fn,
*args,
) -> asyncio.Future:
'''
Run an async-func as a `trio` task from an `asyncio.Task` wrapped
in a `asyncio.Future` which is returned to the caller.
Another astounding feat by the great @oremanj !!
Bo
'''
result_future = asyncio.Future()
cancel_scope = trio.CancelScope()
finished: bool = False
# monkey-patch the future's `.cancel()` meth to
# allow cancellation relay to `trio`-task.
cancel_message: str|None = None
orig_cancel = result_future.cancel
def wrapped_cancel(
msg: str|None = None,
):
nonlocal cancel_message
if finished:
# We're being called back after the task completed
if msg is not None:
return orig_cancel(msg)
elif cancel_message is not None:
return orig_cancel(cancel_message)
else:
return orig_cancel()
if result_future.done():
return False
# Forward cancellation to the Trio task, don't mark
# future as cancelled until it completes
cancel_message = msg
cancel_scope.cancel()
return True
result_future.cancel = wrapped_cancel
async def trio_task() -> None:
nonlocal finished
try:
with cancel_scope:
try:
# TODO: type this with new tech in 3.13
result: Any = await async_fn(*args)
finally:
finished = True
# Propagate result or cancellation to the Future
if cancel_scope.cancelled_caught:
result_future.cancel()
elif not result_future.cancelled():
result_future.set_result(result)
except BaseException as exc:
# the result future gets all the non-Cancelled
# exceptions. Any Cancelled need to keep propagating
# out of this stack frame in order to reach the cancel
# scope for which they're intended.
cancelled: BaseException|None
rest: BaseException|None
if isinstance(exc, BaseExceptionGroup):
cancelled, rest = exc.split(trio.Cancelled)
elif isinstance(exc, trio.Cancelled):
cancelled, rest = exc, None
else:
cancelled, rest = None, exc
if not result_future.cancelled():
if rest:
result_future.set_exception(rest)
else:
result_future.cancel()
if cancelled:
raise cancelled
trio.lowlevel.spawn_system_task(
trio_task,
name=async_fn,
)
return result_future
def run_as_asyncio_guest(
trio_main: Callable,
# ^-NOTE-^ when spawned with `infected_aio=True` this func is
# normally `Actor._async_main()` as is passed by some boostrap
# entrypoint like `._entry._trio_main()`.
_sigint_loop_pump_delay: float = 0,
) -> None:
# ^-TODO-^ technically whatever `trio_main` returns.. we should
# try to use func-typevar-params at leaast by 3.13!
# -[ ] https://typing.readthedocs.io/en/latest/spec/callables.html#callback-protocols
# -[ ] https://peps.python.org/pep-0646/#using-type-variable-tuples-in-functions
# -[ ] https://typing.readthedocs.io/en/latest/spec/callables.html#unpack-for-keyword-arguments
# -[ ] https://peps.python.org/pep-0718/
'''
Entry for an "infected ``asyncio`` actor".
@ -545,51 +697,213 @@ def run_as_asyncio_guest(
# :)
async def aio_main(trio_main):
'''
Main `asyncio.Task` which calls
`trio.lowlevel.start_guest_run()` to "infect" the `asyncio`
event-loop by embedding the `trio` scheduler allowing us to
boot the `tractor` runtime and connect back to our parent.
'''
loop = asyncio.get_running_loop()
trio_done_fut = asyncio.Future()
trio_done_fute = asyncio.Future()
startup_msg: str = (
'Starting `asyncio` guest-loop-run\n'
'-> got running loop\n'
'-> built a `trio`-done future\n'
)
if debug_mode():
# XXX make it obvi we know this isn't supported yet!
log.error(
'Attempting to enter unsupported `greenback` init '
'from `asyncio` task..'
)
await _debug.maybe_init_greenback(
force_reload=True,
)
# TODO: shoudn't this be done in the guest-run trio task?
# if debug_mode():
# # XXX make it obvi we know this isn't supported yet!
# log.error(
# 'Attempting to enter unsupported `greenback` init '
# 'from `asyncio` task..'
# )
# await _debug.maybe_init_greenback(
# force_reload=True,
# )
def trio_done_callback(main_outcome):
log.runtime(
f'`trio` guest-run finishing with outcome\n'
f'>) {main_outcome}\n'
f'|_{trio_done_fute}\n'
)
if isinstance(main_outcome, Error):
error = main_outcome.error
trio_done_fut.set_exception(error)
error: BaseException = main_outcome.error
# TODO: explicit asyncio tb?
# traceback.print_exception(error)
# XXX: do we need this?
# actor.cancel_soon()
# show an dedicated `asyncio`-side tb from the error
tb_str: str = ''.join(traceback.format_exception(error))
log.exception(
'Guest-run errored!?\n\n'
f'{main_outcome}\n'
f'{error}\n\n'
f'{tb_str}\n'
)
trio_done_fute.set_exception(error)
# raise inline
main_outcome.unwrap()
else:
trio_done_fut.set_result(main_outcome)
log.runtime(f"trio_main finished: {main_outcome!r}")
trio_done_fute.set_result(main_outcome)
log.info(
f'`trio` guest-run finished with outcome\n'
f')>\n'
f'|_{trio_done_fute}\n'
)
startup_msg += (
f'-> created {trio_done_callback!r}\n'
f'-> scheduling `trio_main`: {trio_main!r}\n'
)
# start the infection: run trio on the asyncio loop in "guest mode"
log.runtime(
'Infecting `asyncio`-process with a `trio` guest-run of\n\n'
f'{trio_main!r}\n\n'
f'{trio_done_callback}\n'
f'{startup_msg}\n\n'
+
'Infecting `asyncio`-process with a `trio` guest-run!\n'
)
trio.lowlevel.start_guest_run(
trio_main,
run_sync_soon_threadsafe=loop.call_soon_threadsafe,
done_callback=trio_done_callback,
)
# NOTE `.unwrap()` will raise on error
return (await trio_done_fut).unwrap()
fute_err: BaseException|None = None
try:
out: Outcome = await asyncio.shield(trio_done_fute)
# NOTE will raise (via `Error.unwrap()`) from any
# exception packed into the guest-run's `main_outcome`.
return out.unwrap()
except (
# XXX special SIGINT-handling is required since
# `asyncio.shield()`-ing seems to NOT handle that case as
# per recent changes in 3.11:
# https://docs.python.org/3/library/asyncio-runner.html#handling-keyboard-interruption
#
# NOTE: further, apparently ONLY need to handle this
# special SIGINT case since all other `asyncio`-side
# errors can be processed via our `chan._aio_err`
# relaying (right?); SIGINT seems to be totally diff
# error path in `asyncio`'s runtime..?
asyncio.CancelledError,
) as _fute_err:
fute_err = _fute_err
err_message: str = (
'main `asyncio` task '
)
if isinstance(fute_err, asyncio.CancelledError):
err_message += 'was cancelled!\n'
else:
err_message += f'errored with {out.error!r}\n'
actor: tractor.Actor = tractor.current_actor()
log.exception(
err_message
+
'Cancelling `trio`-side `tractor`-runtime..\n'
f'c)>\n'
f' |_{actor}.cancel_soon()\n'
)
# XXX WARNING XXX the next LOCs are super important, since
# without them, we can get guest-run abandonment cases
# where `asyncio` will not schedule or wait on the `trio`
# guest-run task before final shutdown! This is
# particularly true if the `trio` side has tasks doing
# shielded work when a SIGINT condition occurs.
#
# We now have the
# `test_infected_asyncio.test_sigint_closes_lifetime_stack()`
# suite to ensure we do not suffer this issues
# (hopefully) ever again.
#
# The original abandonment issue surfaced as 2 different
# race-condition dependent types scenarios all to do with
# `asyncio` handling SIGINT from the system:
#
# - "silent-abandon" (WORST CASE):
# `asyncio` abandons the `trio` guest-run task silently
# and no `trio`-guest-run or `tractor`-actor-runtime
# teardown happens whatsoever..
#
# - "loud-abandon" (BEST-ish CASE):
# the guest run get's abaondoned "loudly" with `trio`
# reporting a console traceback and further tbs of all
# the (failed) GC-triggered shutdown routines which
# thankfully does get dumped to console..
#
# The abandonment is most easily reproduced if the `trio`
# side has tasks doing shielded work where those tasks
# ignore the normal `Cancelled` condition and continue to
# run, but obviously `asyncio` isn't aware of this and at
# some point bails on the guest-run unless we take manual
# intervention..
#
# To repeat, *WITHOUT THIS* stuff below the guest-run can
# get race-conditionally abandoned!!
#
# XXX SOLUTION XXX
# ------ - ------
# XXX FIRST PART:
# ------ - ------
# the obvious fix to the "silent-abandon" case is to
# explicitly cancel the actor runtime such that no
# runtime tasks are even left unaware that the guest-run
# should be terminated due to OS cancellation.
#
actor.cancel_soon()
# ------ - ------
# XXX SECOND PART:
# ------ - ------
# Pump the `asyncio` event-loop to allow
# `trio`-side to `trio`-guest-run to complete and
# teardown !!
#
# oh `asyncio`, how i don't miss you at all XD
while not trio_done_fute.done():
log.runtime(
'Waiting on main guest-run `asyncio` task to complete..\n'
f'|_trio_done_fut: {trio_done_fute}\n'
)
await asyncio.sleep(_sigint_loop_pump_delay)
# XXX is there any alt API/approach like the internal
# call below but that doesn't block indefinitely..?
# loop._run_once()
try:
return trio_done_fute.result()
except asyncio.exceptions.InvalidStateError as state_err:
# XXX be super dupere noisy about abandonment issues!
aio_task: asyncio.Task = asyncio.current_task()
message: str = (
'The `asyncio`-side task likely exited before the '
'`trio`-side guest-run completed!\n\n'
)
if fute_err:
message += (
f'The main {aio_task}\n'
f'STOPPED due to {type(fute_err)}\n\n'
)
message += (
f'Likely something inside our guest-run-as-task impl is '
f'not effectively waiting on the `trio`-side to complete ?!\n'
f'This code -> {aio_main!r}\n\n'
'Below you will likely see a '
'"RuntimeWarning: Trio guest run got abandoned.." !!\n'
)
raise AsyncioRuntimeTranslationError(message) from state_err
# might as well if it's installed.
try:
@ -597,6 +911,8 @@ def run_as_asyncio_guest(
loop = uvloop.new_event_loop()
asyncio.set_event_loop(loop)
except ImportError:
pass
log.runtime('`uvloop` not available..')
return asyncio.run(aio_main(trio_main))
return asyncio.run(
aio_main(trio_main),
)

View File

@ -156,11 +156,12 @@ class BroadcastState(Struct):
class BroadcastReceiver(ReceiveChannel):
'''
A memory receive channel broadcaster which is non-lossy for the
fastest consumer.
A memory receive channel broadcaster which is non-lossy for
the fastest consumer.
Additional consumer tasks can receive all produced values by registering
with ``.subscribe()`` and receiving from the new instance it delivers.
Additional consumer tasks can receive all produced values by
registering with ``.subscribe()`` and receiving from the new
instance it delivers.
'''
def __init__(

View File

@ -18,8 +18,12 @@
Async context manager primitives with hard ``trio``-aware semantics
'''
from contextlib import asynccontextmanager as acm
from __future__ import annotations
from contextlib import (
asynccontextmanager as acm,
)
import inspect
from types import ModuleType
from typing import (
Any,
AsyncContextManager,
@ -30,13 +34,16 @@ from typing import (
Optional,
Sequence,
TypeVar,
TYPE_CHECKING,
)
import trio
from tractor._state import current_actor
from tractor.log import get_logger
if TYPE_CHECKING:
from tractor import ActorNursery
log = get_logger(__name__)
@ -46,8 +53,10 @@ T = TypeVar("T")
@acm
async def maybe_open_nursery(
nursery: trio.Nursery | None = None,
nursery: trio.Nursery|ActorNursery|None = None,
shield: bool = False,
lib: ModuleType = trio,
) -> AsyncGenerator[trio.Nursery, Any]:
'''
Create a new nursery if None provided.
@ -58,13 +67,12 @@ async def maybe_open_nursery(
if nursery is not None:
yield nursery
else:
async with trio.open_nursery() as nursery:
async with lib.open_nursery() as nursery:
nursery.cancel_scope.shield = shield
yield nursery
async def _enter_and_wait(
mngr: AsyncContextManager[T],
unwrapped: dict[int, T],
all_entered: trio.Event,
@ -91,7 +99,6 @@ async def _enter_and_wait(
@acm
async def gather_contexts(
mngrs: Sequence[AsyncContextManager[T]],
) -> AsyncGenerator[
@ -102,15 +109,17 @@ async def gather_contexts(
None,
]:
'''
Concurrently enter a sequence of async context managers, each in
a separate ``trio`` task and deliver the unwrapped values in the
same order once all managers have entered. On exit all contexts are
subsequently and concurrently exited.
Concurrently enter a sequence of async context managers (acms),
each from a separate `trio` task and deliver the unwrapped
`yield`-ed values in the same order once all managers have entered.
This function is somewhat similar to common usage of
``contextlib.AsyncExitStack.enter_async_context()`` (in a loop) in
combo with ``asyncio.gather()`` except the managers are concurrently
entered and exited, and cancellation just works.
On exit, all acms are subsequently and concurrently exited.
This function is somewhat similar to a batch of non-blocking
calls to `contextlib.AsyncExitStack.enter_async_context()`
(inside a loop) *in combo with* a `asyncio.gather()` to get the
`.__aenter__()`-ed values, except the managers are both
concurrently entered and exited and *cancellation just works*(R).
'''
seed: int = id(mngrs)
@ -210,9 +219,10 @@ async def maybe_open_context(
) -> AsyncIterator[tuple[bool, T]]:
'''
Maybe open a context manager if there is not already a _Cached
version for the provided ``key`` for *this* actor. Return the
_Cached instance on a _Cache hit.
Maybe open an async-context-manager (acm) if there is not already
a `_Cached` version for the provided (input) `key` for *this* actor.
Return the `_Cached` instance on a _Cache hit.
'''
fid = id(acm_func)
@ -273,8 +283,13 @@ async def maybe_open_context(
else:
_Cache.users += 1
log.runtime(
f'Reusing resource for `_Cache` user {_Cache.users}\n\n'
f'{ctx_key!r} -> {yielded!r}\n'
f'Re-using cached resource for user {_Cache.users}\n\n'
f'{ctx_key!r} -> {type(yielded)}\n'
# TODO: make this work with values but without
# `msgspec.Struct` causing frickin crashes on field-type
# lookups..
# f'{ctx_key!r} -> {yielded!r}\n'
)
lock.release()
yield True, yielded