Hack asyncio to not abandon a guest-mode run? #2

Open
goodboy wants to merge 42 commits from aio_abandons into runtime_to_msgspec
41 changed files with 3122 additions and 1163 deletions

View File

@ -2,7 +2,10 @@ import asyncio
import trio import trio
import tractor import tractor
from tractor import to_asyncio from tractor import (
to_asyncio,
Portal,
)
async def aio_sleep_forever(): async def aio_sleep_forever():
@ -43,7 +46,7 @@ async def bp_then_error(
@tractor.context @tractor.context
async def trio_ctx( async def trio_ctx(
ctx: tractor.Context, ctx: tractor.Context,
bp_before_started: bool = False, bp_before_started: bool = True,
): ):
# this will block until the ``asyncio`` task sends a "first" # this will block until the ``asyncio`` task sends a "first"
@ -57,7 +60,6 @@ async def trio_ctx(
trio.open_nursery() as n, trio.open_nursery() as n,
): ):
assert first == 'start' assert first == 'start'
if bp_before_started: if bp_before_started:
@ -73,23 +75,24 @@ async def trio_ctx(
async def main( async def main(
bps_all_over: bool = False, bps_all_over: bool = True,
) -> None: ) -> None:
async with tractor.open_nursery( async with tractor.open_nursery(
# debug_mode=True, debug_mode=True,
maybe_enable_greenback=True,
# loglevel='devx',
) as n: ) as n:
ptl: Portal = await n.start_actor(
p = await n.start_actor(
'aio_daemon', 'aio_daemon',
enable_modules=[__name__], enable_modules=[__name__],
infect_asyncio=True, infect_asyncio=True,
debug_mode=True, debug_mode=True,
loglevel='cancel', # loglevel='cancel',
) )
async with p.open_context( async with ptl.open_context(
trio_ctx, trio_ctx,
bp_before_started=bps_all_over, bp_before_started=bps_all_over,
) as (ctx, first): ) as (ctx, first):
@ -105,7 +108,7 @@ async def main(
# TODO: case where we cancel from trio-side while asyncio task # TODO: case where we cancel from trio-side while asyncio task
# has debugger lock? # has debugger lock?
# await p.cancel_actor() # await ptl.cancel_actor()
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -25,7 +25,8 @@ async def main():
""" """
async with tractor.open_nursery( async with tractor.open_nursery(
debug_mode=True, debug_mode=True,
loglevel='cancel', # loglevel='cancel',
# loglevel='devx',
) as n: ) as n:
p0 = await n.start_actor('bp_forever', enable_modules=[__name__]) p0 = await n.start_actor('bp_forever', enable_modules=[__name__])

View File

@ -0,0 +1,81 @@
'''
Verify we can dump a `stackscope` tree on a hang.
'''
import os
import signal
import trio
import tractor
@tractor.context
async def start_n_shield_hang(
ctx: tractor.Context,
):
# actor: tractor.Actor = tractor.current_actor()
# sync to parent-side task
await ctx.started(os.getpid())
print('Entering shield sleep..')
with trio.CancelScope(shield=True):
await trio.sleep_forever() # in subactor
# XXX NOTE ^^^ since this shields, we expect
# the zombie reaper (aka T800) to engage on
# SIGINT from the user and eventually hard-kill
# this subprocess!
async def main(
from_test: bool = False,
) -> None:
async with (
tractor.open_nursery(
debug_mode=True,
enable_stack_on_sig=True,
# maybe_enable_greenback=False,
loglevel='devx',
) as an,
):
ptl: tractor.Portal = await an.start_actor(
'hanger',
enable_modules=[__name__],
debug_mode=True,
)
async with ptl.open_context(
start_n_shield_hang,
) as (ctx, cpid):
_, proc, _ = an._children[ptl.chan.uid]
assert cpid == proc.pid
print(
'Yo my child hanging..?\n'
'Sending SIGUSR1 to see a tree-trace!\n'
)
# XXX simulate the wrapping test's "user actions"
# (i.e. if a human didn't run this manually but wants to
# know what they should do to reproduce test behaviour)
if from_test:
os.kill(
cpid,
signal.SIGUSR1,
)
# simulate user cancelling program
await trio.sleep(0.5)
os.kill(
os.getpid(),
signal.SIGINT,
)
else:
# actually let user send the ctl-c
await trio.sleep_forever() # in root
if __name__ == '__main__':
trio.run(main)

View File

@ -4,6 +4,13 @@ import time
import trio import trio
import tractor import tractor
# TODO: only import these when not running from test harness?
# can we detect `pexpect` usage maybe?
# from tractor.devx._debug import (
# get_lock,
# get_debug_req,
# )
def sync_pause( def sync_pause(
use_builtin: bool = False, use_builtin: bool = False,
@ -18,7 +25,13 @@ def sync_pause(
breakpoint(hide_tb=hide_tb) breakpoint(hide_tb=hide_tb)
else: else:
# TODO: maybe for testing some kind of cm style interface
# where the `._set_trace()` call doesn't happen until block
# exit?
# assert get_lock().ctx_in_debug is None
# assert get_debug_req().repl is None
tractor.pause_from_sync() tractor.pause_from_sync()
# assert get_debug_req().repl is None
if error: if error:
raise RuntimeError('yoyo sync code error') raise RuntimeError('yoyo sync code error')
@ -41,10 +54,11 @@ async def start_n_sync_pause(
async def main() -> None: async def main() -> None:
async with ( async with (
tractor.open_nursery( tractor.open_nursery(
# NOTE: required for pausing from sync funcs
maybe_enable_greenback=True,
debug_mode=True, debug_mode=True,
# loglevel='cancel', maybe_enable_greenback=True,
enable_stack_on_sig=True,
# loglevel='warning',
# loglevel='devx',
) as an, ) as an,
trio.open_nursery() as tn, trio.open_nursery() as tn,
): ):
@ -138,7 +152,9 @@ async def main() -> None:
# the case 2. from above still exists! # the case 2. from above still exists!
use_builtin=True, use_builtin=True,
), ),
abandon_on_cancel=False, # TODO: with this `False` we can hang!??!
# abandon_on_cancel=False,
abandon_on_cancel=True,
thread_name='inline_root_bg_thread', thread_name='inline_root_bg_thread',
) )

View File

@ -9,7 +9,7 @@ async def main(service_name):
async with tractor.open_nursery() as an: async with tractor.open_nursery() as an:
await an.start_actor(service_name) await an.start_actor(service_name)
async with tractor.get_arbiter('127.0.0.1', 1616) as portal: async with tractor.get_registry('127.0.0.1', 1616) as portal:
print(f"Arbiter is listening on {portal.channel}") print(f"Arbiter is listening on {portal.channel}")
async with tractor.wait_for_actor(service_name) as sockaddr: async with tractor.wait_for_actor(service_name) as sockaddr:

View File

View File

@ -0,0 +1,168 @@
'''
`tractor.devx.*` tooling sub-pkg test space.
'''
from typing import (
Callable,
)
import pytest
from pexpect.exceptions import (
TIMEOUT,
)
from pexpect.spawnbase import SpawnBase
from tractor._testing import (
mk_cmd,
)
@pytest.fixture
def spawn(
start_method,
testdir: pytest.Testdir,
reg_addr: tuple[str, int],
) -> Callable[[str], None]:
'''
Use the `pexpect` module shipped via `testdir.spawn()` to
run an `./examples/..` script by name.
'''
if start_method != 'trio':
pytest.skip(
'`pexpect` based tests only supported on `trio` backend'
)
def _spawn(
cmd: str,
**mkcmd_kwargs,
):
return testdir.spawn(
cmd=mk_cmd(
cmd,
**mkcmd_kwargs,
),
expect_timeout=3,
)
# such that test-dep can pass input script name.
return _spawn
@pytest.fixture(
params=[False, True],
ids='ctl-c={}'.format,
)
def ctlc(
request,
ci_env: bool,
) -> bool:
use_ctlc = request.param
node = request.node
markers = node.own_markers
for mark in markers:
if mark.name == 'has_nested_actors':
pytest.skip(
f'Test {node} has nested actors and fails with Ctrl-C.\n'
f'The test can sometimes run fine locally but until'
' we solve' 'this issue this CI test will be xfail:\n'
'https://github.com/goodboy/tractor/issues/320'
)
if use_ctlc:
# XXX: disable pygments highlighting for auto-tests
# since some envs (like actions CI) will struggle
# the the added color-char encoding..
from tractor.devx._debug import TractorConfig
TractorConfig.use_pygements = False
yield use_ctlc
def expect(
child,
# normally a `pdb` prompt by default
patt: str,
**kwargs,
) -> None:
'''
Expect wrapper that prints last seen console
data before failing.
'''
try:
child.expect(
patt,
**kwargs,
)
except TIMEOUT:
before = str(child.before.decode())
print(before)
raise
def in_prompt_msg(
child: SpawnBase,
parts: list[str],
pause_on_false: bool = False,
err_on_false: bool = False,
print_prompt_on_false: bool = True,
) -> bool:
'''
Predicate check if (the prompt's) std-streams output has all
`str`-parts in it.
Can be used in test asserts for bulk matching expected
log/REPL output for a given `pdb` interact point.
'''
__tracebackhide__: bool = False
before: str = str(child.before.decode())
for part in parts:
if part not in before:
if pause_on_false:
import pdbp
pdbp.set_trace()
if print_prompt_on_false:
print(before)
if err_on_false:
raise ValueError(
f'Could not find pattern in `before` output?\n'
f'part: {part!r}\n'
)
return False
return True
# TODO: todo support terminal color-chars stripping so we can match
# against call stack frame output from the the 'll' command the like!
# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789
def assert_before(
child: SpawnBase,
patts: list[str],
**kwargs,
) -> None:
__tracebackhide__: bool = False
assert in_prompt_msg(
child=child,
parts=patts,
# since this is an "assert" helper ;)
err_on_false=True,
**kwargs
)

View File

@ -13,11 +13,9 @@ TODO:
from functools import partial from functools import partial
import itertools import itertools
import platform import platform
import pathlib
import time import time
import pytest import pytest
import pexpect
from pexpect.exceptions import ( from pexpect.exceptions import (
TIMEOUT, TIMEOUT,
EOF, EOF,
@ -28,12 +26,14 @@ from tractor.devx._debug import (
_crash_msg, _crash_msg,
_repl_fail_msg, _repl_fail_msg,
) )
from tractor._testing import (
examples_dir,
)
from conftest import ( from conftest import (
_ci_env, _ci_env,
) )
from .conftest import (
expect,
in_prompt_msg,
assert_before,
)
# TODO: The next great debugger audit could be done by you! # TODO: The next great debugger audit could be done by you!
# - recurrent entry to breakpoint() from single actor *after* and an # - recurrent entry to breakpoint() from single actor *after* and an
@ -52,15 +52,6 @@ if platform.system() == 'Windows':
) )
def mk_cmd(ex_name: str) -> str:
'''
Generate a command suitable to pass to ``pexpect.spawn()``.
'''
script_path: pathlib.Path = examples_dir() / 'debugging' / f'{ex_name}.py'
return ' '.join(['python', str(script_path)])
# TODO: was trying to this xfail style but some weird bug i see in CI # TODO: was trying to this xfail style but some weird bug i see in CI
# that's happening at collect time.. pretty soon gonna dump actions i'm # that's happening at collect time.. pretty soon gonna dump actions i'm
# thinkin... # thinkin...
@ -79,142 +70,9 @@ has_nested_actors = pytest.mark.has_nested_actors
# ) # )
@pytest.fixture
def spawn(
start_method,
testdir,
reg_addr,
) -> 'pexpect.spawn':
if start_method != 'trio':
pytest.skip(
"Debugger tests are only supported on the trio backend"
)
def _spawn(cmd):
return testdir.spawn(
cmd=mk_cmd(cmd),
expect_timeout=3,
)
return _spawn
PROMPT = r"\(Pdb\+\)" PROMPT = r"\(Pdb\+\)"
def expect(
child,
# prompt by default
patt: str = PROMPT,
**kwargs,
) -> None:
'''
Expect wrapper that prints last seen console
data before failing.
'''
try:
child.expect(
patt,
**kwargs,
)
except TIMEOUT:
before = str(child.before.decode())
print(before)
raise
def in_prompt_msg(
prompt: str,
parts: list[str],
pause_on_false: bool = False,
print_prompt_on_false: bool = True,
) -> bool:
'''
Predicate check if (the prompt's) std-streams output has all
`str`-parts in it.
Can be used in test asserts for bulk matching expected
log/REPL output for a given `pdb` interact point.
'''
__tracebackhide__: bool = False
for part in parts:
if part not in prompt:
if pause_on_false:
import pdbp
pdbp.set_trace()
if print_prompt_on_false:
print(prompt)
return False
return True
# TODO: todo support terminal color-chars stripping so we can match
# against call stack frame output from the the 'll' command the like!
# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789
def assert_before(
child,
patts: list[str],
**kwargs,
) -> None:
__tracebackhide__: bool = False
# as in before the prompt end
before: str = str(child.before.decode())
assert in_prompt_msg(
prompt=before,
parts=patts,
**kwargs
)
@pytest.fixture(
params=[False, True],
ids='ctl-c={}'.format,
)
def ctlc(
request,
ci_env: bool,
) -> bool:
use_ctlc = request.param
node = request.node
markers = node.own_markers
for mark in markers:
if mark.name == 'has_nested_actors':
pytest.skip(
f'Test {node} has nested actors and fails with Ctrl-C.\n'
f'The test can sometimes run fine locally but until'
' we solve' 'this issue this CI test will be xfail:\n'
'https://github.com/goodboy/tractor/issues/320'
)
if use_ctlc:
# XXX: disable pygments highlighting for auto-tests
# since some envs (like actions CI) will struggle
# the the added color-char encoding..
from tractor.devx._debug import TractorConfig
TractorConfig.use_pygements = False
yield use_ctlc
@pytest.mark.parametrize( @pytest.mark.parametrize(
'user_in_out', 'user_in_out',
[ [
@ -238,14 +96,15 @@ def test_root_actor_error(
# scan for the prompt # scan for the prompt
expect(child, PROMPT) expect(child, PROMPT)
before = str(child.before.decode())
# make sure expected logging and error arrives # make sure expected logging and error arrives
assert in_prompt_msg( assert in_prompt_msg(
before, child,
[_crash_msg, "('root'"] [
_crash_msg,
"('root'",
'AssertionError',
]
) )
assert 'AssertionError' in before
# send user command # send user command
child.sendline(user_input) child.sendline(user_input)
@ -279,7 +138,7 @@ def test_root_actor_bp(spawn, user_in_out):
child.expect('\r\n') child.expect('\r\n')
# process should exit # process should exit
child.expect(pexpect.EOF) child.expect(EOF)
if expect_err_str is None: if expect_err_str is None:
assert 'Error' not in str(child.before) assert 'Error' not in str(child.before)
@ -299,7 +158,9 @@ def do_ctlc(
# needs some further investigation potentially... # needs some further investigation potentially...
expect_prompt: bool = not _ci_env, expect_prompt: bool = not _ci_env,
) -> None: ) -> str|None:
before: str|None = None
# make sure ctl-c sends don't do anything but repeat output # make sure ctl-c sends don't do anything but repeat output
for _ in range(count): for _ in range(count):
@ -309,15 +170,18 @@ def do_ctlc(
# TODO: figure out why this makes CI fail.. # TODO: figure out why this makes CI fail..
# if you run this test manually it works just fine.. # if you run this test manually it works just fine..
if expect_prompt: if expect_prompt:
before = str(child.before.decode())
time.sleep(delay) time.sleep(delay)
child.expect(PROMPT) child.expect(PROMPT)
before = str(child.before.decode())
time.sleep(delay) time.sleep(delay)
if patt: if patt:
# should see the last line on console # should see the last line on console
assert patt in before assert patt in before
# return the console content up to the final prompt
return before
def test_root_actor_bp_forever( def test_root_actor_bp_forever(
spawn, spawn,
@ -358,7 +222,7 @@ def test_root_actor_bp_forever(
# quit out of the loop # quit out of the loop
child.sendline('q') child.sendline('q')
child.expect(pexpect.EOF) child.expect(EOF)
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -380,10 +244,12 @@ def test_subactor_error(
# scan for the prompt # scan for the prompt
child.expect(PROMPT) child.expect(PROMPT)
before = str(child.before.decode())
assert in_prompt_msg( assert in_prompt_msg(
before, child,
[_crash_msg, "('name_error'"] [
_crash_msg,
"('name_error'",
]
) )
if do_next: if do_next:
@ -402,17 +268,15 @@ def test_subactor_error(
child.sendline('continue') child.sendline('continue')
child.expect(PROMPT) child.expect(PROMPT)
before = str(child.before.decode())
# root actor gets debugger engaged
assert in_prompt_msg( assert in_prompt_msg(
before, child,
[_crash_msg, "('root'"] [
) _crash_msg,
# error is a remote error propagated from the subactor # root actor gets debugger engaged
assert in_prompt_msg( "('root'",
before, # error is a remote error propagated from the subactor
[_crash_msg, "('name_error'"] "('name_error'",
]
) )
# another round # another round
@ -423,7 +287,7 @@ def test_subactor_error(
child.expect('\r\n') child.expect('\r\n')
# process should exit # process should exit
child.expect(pexpect.EOF) child.expect(EOF)
def test_subactor_breakpoint( def test_subactor_breakpoint(
@ -433,14 +297,11 @@ def test_subactor_breakpoint(
"Single subactor with an infinite breakpoint loop" "Single subactor with an infinite breakpoint loop"
child = spawn('subactor_breakpoint') child = spawn('subactor_breakpoint')
# scan for the prompt
child.expect(PROMPT) child.expect(PROMPT)
before = str(child.before.decode())
assert in_prompt_msg( assert in_prompt_msg(
before, child,
[_pause_msg, "('breakpoint_forever'"] [_pause_msg,
"('breakpoint_forever'",]
) )
# do some "next" commands to demonstrate recurrent breakpoint # do some "next" commands to demonstrate recurrent breakpoint
@ -456,9 +317,8 @@ def test_subactor_breakpoint(
for _ in range(5): for _ in range(5):
child.sendline('continue') child.sendline('continue')
child.expect(PROMPT) child.expect(PROMPT)
before = str(child.before.decode())
assert in_prompt_msg( assert in_prompt_msg(
before, child,
[_pause_msg, "('breakpoint_forever'"] [_pause_msg, "('breakpoint_forever'"]
) )
@ -471,9 +331,8 @@ def test_subactor_breakpoint(
# child process should exit but parent will capture pdb.BdbQuit # child process should exit but parent will capture pdb.BdbQuit
child.expect(PROMPT) child.expect(PROMPT)
before = str(child.before.decode())
assert in_prompt_msg( assert in_prompt_msg(
before, child,
['RemoteActorError:', ['RemoteActorError:',
"('breakpoint_forever'", "('breakpoint_forever'",
'bdb.BdbQuit',] 'bdb.BdbQuit',]
@ -486,11 +345,10 @@ def test_subactor_breakpoint(
child.sendline('c') child.sendline('c')
# process should exit # process should exit
child.expect(pexpect.EOF) child.expect(EOF)
before = str(child.before.decode())
assert in_prompt_msg( assert in_prompt_msg(
before, child,
['RemoteActorError:', ['RemoteActorError:',
"('breakpoint_forever'", "('breakpoint_forever'",
'bdb.BdbQuit',] 'bdb.BdbQuit',]
@ -514,7 +372,7 @@ def test_multi_subactors(
before = str(child.before.decode()) before = str(child.before.decode())
assert in_prompt_msg( assert in_prompt_msg(
before, child,
[_pause_msg, "('breakpoint_forever'"] [_pause_msg, "('breakpoint_forever'"]
) )
@ -535,12 +393,14 @@ def test_multi_subactors(
# first name_error failure # first name_error failure
child.expect(PROMPT) child.expect(PROMPT)
before = str(child.before.decode())
assert in_prompt_msg( assert in_prompt_msg(
before, child,
[_crash_msg, "('name_error'"] [
_crash_msg,
"('name_error'",
"NameError",
]
) )
assert "NameError" in before
if ctlc: if ctlc:
do_ctlc(child) do_ctlc(child)
@ -564,9 +424,8 @@ def test_multi_subactors(
# breakpoint loop should re-engage # breakpoint loop should re-engage
child.sendline('c') child.sendline('c')
child.expect(PROMPT) child.expect(PROMPT)
before = str(child.before.decode())
assert in_prompt_msg( assert in_prompt_msg(
before, child,
[_pause_msg, "('breakpoint_forever'"] [_pause_msg, "('breakpoint_forever'"]
) )
@ -629,7 +488,7 @@ def test_multi_subactors(
# process should exit # process should exit
child.sendline('c') child.sendline('c')
child.expect(pexpect.EOF) child.expect(EOF)
# repeat of previous multierror for final output # repeat of previous multierror for final output
assert_before(child, [ assert_before(child, [
@ -659,25 +518,28 @@ def test_multi_daemon_subactors(
# the root's tty lock first so anticipate either crash # the root's tty lock first so anticipate either crash
# message on the first entry. # message on the first entry.
bp_forev_parts = [_pause_msg, "('bp_forever'"] bp_forev_parts = [
_pause_msg,
"('bp_forever'",
]
bp_forev_in_msg = partial( bp_forev_in_msg = partial(
in_prompt_msg, in_prompt_msg,
parts=bp_forev_parts, parts=bp_forev_parts,
) )
name_error_msg = "NameError: name 'doggypants' is not defined" name_error_msg: str = "NameError: name 'doggypants' is not defined"
name_error_parts = [name_error_msg] name_error_parts: list[str] = [name_error_msg]
before = str(child.before.decode()) before = str(child.before.decode())
if bp_forev_in_msg(prompt=before): if bp_forev_in_msg(child=child):
next_parts = name_error_parts next_parts = name_error_parts
elif name_error_msg in before: elif name_error_msg in before:
next_parts = bp_forev_parts next_parts = bp_forev_parts
else: else:
raise ValueError("Neither log msg was found !?") raise ValueError('Neither log msg was found !?')
if ctlc: if ctlc:
do_ctlc(child) do_ctlc(child)
@ -746,14 +608,12 @@ def test_multi_daemon_subactors(
# wait for final error in root # wait for final error in root
# where it crashs with boxed error # where it crashs with boxed error
while True: while True:
try: child.sendline('c')
child.sendline('c') child.expect(PROMPT)
child.expect(PROMPT) if not in_prompt_msg(
assert_before( child,
child, bp_forev_parts
bp_forev_parts ):
)
except AssertionError:
break break
assert_before( assert_before(
@ -769,7 +629,7 @@ def test_multi_daemon_subactors(
) )
child.sendline('c') child.sendline('c')
child.expect(pexpect.EOF) child.expect(EOF)
@has_nested_actors @has_nested_actors
@ -845,7 +705,7 @@ def test_multi_subactors_root_errors(
]) ])
child.sendline('c') child.sendline('c')
child.expect(pexpect.EOF) child.expect(EOF)
assert_before(child, [ assert_before(child, [
# "Attaching to pdb in crashed actor: ('root'", # "Attaching to pdb in crashed actor: ('root'",
@ -934,10 +794,13 @@ def test_root_nursery_cancels_before_child_releases_tty_lock(
child = spawn('root_cancelled_but_child_is_in_tty_lock') child = spawn('root_cancelled_but_child_is_in_tty_lock')
child.expect(PROMPT) child.expect(PROMPT)
assert_before(
before = str(child.before.decode()) child,
assert "NameError: name 'doggypants' is not defined" in before [
assert "tractor._exceptions.RemoteActorError: ('name_error'" not in before "NameError: name 'doggypants' is not defined",
"tractor._exceptions.RemoteActorError: ('name_error'",
],
)
time.sleep(0.5) time.sleep(0.5)
if ctlc: if ctlc:
@ -975,7 +838,7 @@ def test_root_nursery_cancels_before_child_releases_tty_lock(
for i in range(3): for i in range(3):
try: try:
child.expect(pexpect.EOF, timeout=0.5) child.expect(EOF, timeout=0.5)
break break
except TIMEOUT: except TIMEOUT:
child.sendline('c') child.sendline('c')
@ -1017,7 +880,7 @@ def test_root_cancels_child_context_during_startup(
do_ctlc(child) do_ctlc(child)
child.sendline('c') child.sendline('c')
child.expect(pexpect.EOF) child.expect(EOF)
def test_different_debug_mode_per_actor( def test_different_debug_mode_per_actor(
@ -1028,9 +891,8 @@ def test_different_debug_mode_per_actor(
child.expect(PROMPT) child.expect(PROMPT)
# only one actor should enter the debugger # only one actor should enter the debugger
before = str(child.before.decode())
assert in_prompt_msg( assert in_prompt_msg(
before, child,
[_crash_msg, "('debugged_boi'", "RuntimeError"], [_crash_msg, "('debugged_boi'", "RuntimeError"],
) )
@ -1038,9 +900,7 @@ def test_different_debug_mode_per_actor(
do_ctlc(child) do_ctlc(child)
child.sendline('c') child.sendline('c')
child.expect(pexpect.EOF) child.expect(EOF)
before = str(child.before.decode())
# NOTE: this debugged actor error currently WON'T show up since the # NOTE: this debugged actor error currently WON'T show up since the
# root will actually cancel and terminate the nursery before the error # root will actually cancel and terminate the nursery before the error
@ -1085,17 +945,16 @@ def test_pause_from_sync(
) )
if ctlc: if ctlc:
do_ctlc(child) do_ctlc(child)
# ^NOTE^ subactor not spawned yet; don't need extra delay.
child.sendline('c') child.sendline('c')
# first `await tractor.pause()` inside `p.open_context()` body # first `await tractor.pause()` inside `p.open_context()` body
child.expect(PROMPT) child.expect(PROMPT)
# XXX shouldn't see gb loaded message with PDB loglevel! # XXX shouldn't see gb loaded message with PDB loglevel!
before = str(child.before.decode())
assert not in_prompt_msg( assert not in_prompt_msg(
before, child,
['`greenback` portal opened!'], ['`greenback` portal opened!'],
) )
# should be same root task # should be same root task
@ -1109,7 +968,27 @@ def test_pause_from_sync(
) )
if ctlc: if ctlc:
do_ctlc(child) do_ctlc(
child,
# NOTE: setting this to 0 (or some other sufficient
# small val) can cause the test to fail since the
# `subactor` suffers a race where the root/parent
# sends an actor-cancel prior to it hitting its pause
# point; by def the value is 0.1
delay=0.4,
)
# XXX, fwiw without a brief sleep here the SIGINT might actually
# trigger "subactor" cancellation by its parent before the
# shield-handler is engaged.
#
# => similar to the `delay` input to `do_ctlc()` below, setting
# this too low can cause the test to fail since the `subactor`
# suffers a race where the root/parent sends an actor-cancel
# prior to the context task hitting its pause point (and thus
# engaging the `sigint_shield()` handler in time); this value
# seems be good enuf?
time.sleep(0.6)
# one of the bg thread or subactor should have # one of the bg thread or subactor should have
# `Lock.acquire()`-ed # `Lock.acquire()`-ed
@ -1128,32 +1007,48 @@ def test_pause_from_sync(
"('root'", "('root'",
], ],
} }
conts: int = 0 # for debugging below matching logic on failure
while attach_patts: while attach_patts:
child.sendline('c') child.sendline('c')
conts += 1
child.expect(PROMPT) child.expect(PROMPT)
before = str(child.before.decode()) before = str(child.before.decode())
for key in attach_patts.copy(): for key in attach_patts:
if key in before: if key in before:
attach_key: str = key
expected_patts: str = attach_patts.pop(key) expected_patts: str = attach_patts.pop(key)
assert_before( assert_before(
child, child,
[_pause_msg] + expected_patts [_pause_msg]
+
expected_patts
) )
break break
else:
pytest.fail(
f'No keys found?\n\n'
f'{attach_patts.keys()}\n\n'
f'{before}\n'
)
# ensure no other task/threads engaged a REPL # ensure no other task/threads engaged a REPL
# at the same time as the one that was detected above. # at the same time as the one that was detected above.
for key, other_patts in attach_patts.items(): for key, other_patts in attach_patts.copy().items():
assert not in_prompt_msg( assert not in_prompt_msg(
before, child,
other_patts, other_patts,
) )
if ctlc: if ctlc:
do_ctlc(child) do_ctlc(
child,
patt=attach_key,
# NOTE same as comment above
delay=0.4,
)
child.sendline('c') child.sendline('c')
child.expect(pexpect.EOF) child.expect(EOF)
def test_post_mortem_api( def test_post_mortem_api(
@ -1258,7 +1153,7 @@ def test_post_mortem_api(
# ) # )
child.sendline('c') child.sendline('c')
child.expect(pexpect.EOF) child.expect(EOF)
def test_shield_pause( def test_shield_pause(
@ -1333,9 +1228,26 @@ def test_shield_pause(
] ]
) )
child.sendline('c') child.sendline('c')
child.expect(pexpect.EOF) child.expect(EOF)
# TODO: better error for "non-ideal" usage from the root actor.
# -[ ] if called from an async scope emit a message that suggests
# using `await tractor.pause()` instead since it's less overhead
# (in terms of `greenback` and/or extra threads) and if it's from
# a sync scope suggest that usage must first call
# `ensure_portal()` in the (eventual parent) async calling scope?
def test_sync_pause_from_bg_task_in_root_actor_():
'''
When used from the root actor, normally we can only implicitly
support `.pause_from_sync()` from the main-parent-task (that
opens the runtime via `open_root_actor()`) since `greenback`
requires a `.ensure_portal()` call per `trio.Task` where it is
used.
'''
...
# TODO: needs ANSI code stripping tho, see `assert_before()` # above! # TODO: needs ANSI code stripping tho, see `assert_before()` # above!
def test_correct_frames_below_hidden(): def test_correct_frames_below_hidden():
''' '''

View File

@ -0,0 +1,120 @@
'''
That "native" runtime-hackin toolset better be dang useful!
Verify the funtion of a variety of "developer-experience" tools we
offer from the `.devx` sub-pkg:
- use of the lovely `stackscope` for dumping actor `trio`-task trees
during operation and hangs.
TODO:
- demonstration of `CallerInfo` call stack frame filtering such that
for logging and REPL purposes a user sees exactly the layers needed
when debugging a problem inside the stack vs. in their app.
'''
import os
import signal
from .conftest import (
expect,
assert_before,
# in_prompt_msg,
)
def test_shield_pause(
spawn,
):
'''
Verify the `tractor.pause()/.post_mortem()` API works inside an
already cancelled `trio.CancelScope` and that you can step to the
next checkpoint wherein the cancelled will get raised.
'''
child = spawn(
'shield_hang_in_sub'
)
expect(
child,
'Yo my child hanging..?',
)
assert_before(
child,
[
'Entering shield sleep..',
'Enabling trace-trees on `SIGUSR1` since `stackscope` is installed @',
]
)
print(
'Sending SIGUSR1 to see a tree-trace!',
)
os.kill(
child.pid,
signal.SIGUSR1,
)
expect(
child,
# end-of-tree delimiter
"------ \('root', ",
)
assert_before(
child,
[
'Trying to dump `stackscope` tree..',
'Dumping `stackscope` tree for actor',
"('root'", # uid line
# parent block point (non-shielded)
'await trio.sleep_forever() # in root',
]
)
# expect(
# child,
# # relay to the sub should be reported
# 'Relaying `SIGUSR1`[10] to sub-actor',
# )
expect(
child,
# end-of-tree delimiter
"------ \('hanger', ",
)
assert_before(
child,
[
# relay to the sub should be reported
'Relaying `SIGUSR1`[10] to sub-actor',
"('hanger'", # uid line
# hanger LOC where it's shield-halted
'await trio.sleep_forever() # in subactor',
]
)
# breakpoint()
# simulate the user sending a ctl-c to the hanging program.
# this should result in the terminator kicking in since
# the sub is shield blocking and can't respond to SIGINT.
os.kill(
child.pid,
signal.SIGINT,
)
expect(
child,
'Shutting down actor runtime',
timeout=6,
)
assert_before(
child,
[
'raise KeyboardInterrupt',
# 'Shutting down actor runtime',
'#T-800 deployed to collect zombie B0',
"'--uid', \"('hanger',",
]
)

View File

@ -91,7 +91,8 @@ def test_ipc_channel_break_during_stream(
# non-`trio` spawners should never hit the hang condition that # non-`trio` spawners should never hit the hang condition that
# requires the user to do ctl-c to cancel the actor tree. # requires the user to do ctl-c to cancel the actor tree.
expect_final_exc = trio.ClosedResourceError # expect_final_exc = trio.ClosedResourceError
expect_final_exc = tractor.TransportClosed
mod: ModuleType = import_path( mod: ModuleType = import_path(
examples_dir() / 'advanced_faults' examples_dir() / 'advanced_faults'
@ -157,7 +158,7 @@ def test_ipc_channel_break_during_stream(
if pre_aclose_msgstream: if pre_aclose_msgstream:
expect_final_exc = KeyboardInterrupt expect_final_exc = KeyboardInterrupt
# NOTE when the parent IPC side dies (even if the child's does as well # NOTE when the parent IPC side dies (even if the child does as well
# but the child fails BEFORE the parent) we always expect the # but the child fails BEFORE the parent) we always expect the
# IPC layer to raise a closed-resource, NEVER do we expect # IPC layer to raise a closed-resource, NEVER do we expect
# a stop msg since the parent-side ctx apis will error out # a stop msg since the parent-side ctx apis will error out
@ -169,7 +170,8 @@ def test_ipc_channel_break_during_stream(
and and
ipc_break['break_child_ipc_after'] is False ipc_break['break_child_ipc_after'] is False
): ):
expect_final_exc = trio.ClosedResourceError # expect_final_exc = trio.ClosedResourceError
expect_final_exc = tractor.TransportClosed
# BOTH but, PARENT breaks FIRST # BOTH but, PARENT breaks FIRST
elif ( elif (
@ -180,7 +182,8 @@ def test_ipc_channel_break_during_stream(
ipc_break['break_parent_ipc_after'] ipc_break['break_parent_ipc_after']
) )
): ):
expect_final_exc = trio.ClosedResourceError # expect_final_exc = trio.ClosedResourceError
expect_final_exc = tractor.TransportClosed
with pytest.raises( with pytest.raises(
expected_exception=( expected_exception=(
@ -199,8 +202,8 @@ def test_ipc_channel_break_during_stream(
**ipc_break, **ipc_break,
) )
) )
except KeyboardInterrupt as kbi: except KeyboardInterrupt as _kbi:
_err = kbi kbi = _kbi
if expect_final_exc is not KeyboardInterrupt: if expect_final_exc is not KeyboardInterrupt:
pytest.fail( pytest.fail(
'Rxed unexpected KBI !?\n' 'Rxed unexpected KBI !?\n'
@ -209,6 +212,21 @@ def test_ipc_channel_break_during_stream(
raise raise
except tractor.TransportClosed as _tc:
tc = _tc
if expect_final_exc is KeyboardInterrupt:
pytest.fail(
'Unexpected transport failure !?\n'
f'{repr(tc)}'
)
cause: Exception = tc.__cause__
assert (
type(cause) is trio.ClosedResourceError
and
cause.args[0] == 'another task closed this fd'
)
raise
# get raw instance from pytest wrapper # get raw instance from pytest wrapper
value = excinfo.value value = excinfo.value
if isinstance(value, ExceptionGroup): if isinstance(value, ExceptionGroup):

View File

@ -11,9 +11,6 @@ from typing import (
Type, Type,
Union, Union,
) )
from contextvars import (
Context,
)
from msgspec import ( from msgspec import (
structs, structs,
@ -27,6 +24,7 @@ import tractor
from tractor import ( from tractor import (
_state, _state,
MsgTypeError, MsgTypeError,
Context,
) )
from tractor.msg import ( from tractor.msg import (
_codec, _codec,
@ -41,7 +39,7 @@ from tractor.msg import (
from tractor.msg.types import ( from tractor.msg.types import (
_payload_msgs, _payload_msgs,
log, log,
Msg, PayloadMsg,
Started, Started,
mk_msg_spec, mk_msg_spec,
) )
@ -61,7 +59,7 @@ def mk_custom_codec(
uid: tuple[str, str] = tractor.current_actor().uid uid: tuple[str, str] = tractor.current_actor().uid
# XXX NOTE XXX: despite defining `NamespacePath` as a type # XXX NOTE XXX: despite defining `NamespacePath` as a type
# field on our `Msg.pld`, we still need a enc/dec_hook() pair # field on our `PayloadMsg.pld`, we still need a enc/dec_hook() pair
# to cast to/from that type on the wire. See the docs: # to cast to/from that type on the wire. See the docs:
# https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types
@ -321,12 +319,12 @@ def dec_type_union(
import importlib import importlib
types: list[Type] = [] types: list[Type] = []
for type_name in type_names: for type_name in type_names:
for ns in [ for mod in [
typing, typing,
importlib.import_module(__name__), importlib.import_module(__name__),
]: ]:
if type_ref := getattr( if type_ref := getattr(
ns, mod,
type_name, type_name,
False, False,
): ):
@ -744,7 +742,7 @@ def chk_pld_type(
# 'Error', .pld: ErrorData # 'Error', .pld: ErrorData
codec: MsgCodec = mk_codec( codec: MsgCodec = mk_codec(
# NOTE: this ONLY accepts `Msg.pld` fields of a specified # NOTE: this ONLY accepts `PayloadMsg.pld` fields of a specified
# type union. # type union.
ipc_pld_spec=payload_spec, ipc_pld_spec=payload_spec,
) )
@ -752,7 +750,7 @@ def chk_pld_type(
# make a one-off dec to compare with our `MsgCodec` instance # make a one-off dec to compare with our `MsgCodec` instance
# which does the below `mk_msg_spec()` call internally # which does the below `mk_msg_spec()` call internally
ipc_msg_spec: Union[Type[Struct]] ipc_msg_spec: Union[Type[Struct]]
msg_types: list[Msg[payload_spec]] msg_types: list[PayloadMsg[payload_spec]]
( (
ipc_msg_spec, ipc_msg_spec,
msg_types, msg_types,
@ -761,7 +759,7 @@ def chk_pld_type(
) )
_enc = msgpack.Encoder() _enc = msgpack.Encoder()
_dec = msgpack.Decoder( _dec = msgpack.Decoder(
type=ipc_msg_spec or Any, # like `Msg[Any]` type=ipc_msg_spec or Any, # like `PayloadMsg[Any]`
) )
assert ( assert (
@ -806,7 +804,7 @@ def chk_pld_type(
'cid': '666', 'cid': '666',
'pld': pld, 'pld': pld,
} }
enc_msg: Msg = typedef(**kwargs) enc_msg: PayloadMsg = typedef(**kwargs)
_wire_bytes: bytes = _enc.encode(enc_msg) _wire_bytes: bytes = _enc.encode(enc_msg)
wire_bytes: bytes = codec.enc.encode(enc_msg) wire_bytes: bytes = codec.enc.encode(enc_msg)
@ -883,25 +881,16 @@ def test_limit_msgspec():
debug_mode=True debug_mode=True
): ):
# ensure we can round-trip a boxing `Msg` # ensure we can round-trip a boxing `PayloadMsg`
assert chk_pld_type( assert chk_pld_type(
# Msg, payload_spec=Any,
Any, pld=None,
None,
expect_roundtrip=True, expect_roundtrip=True,
) )
# TODO: don't need this any more right since
# `msgspec>=0.15` has the nice generics stuff yah??
#
# manually override the type annot of the payload
# field and ensure it propagates to all msg-subtypes.
# Msg.__annotations__['pld'] = Any
# verify that a mis-typed payload value won't decode # verify that a mis-typed payload value won't decode
assert not chk_pld_type( assert not chk_pld_type(
# Msg, payload_spec=int,
int,
pld='doggy', pld='doggy',
) )
@ -913,18 +902,16 @@ def test_limit_msgspec():
value: Any value: Any
assert not chk_pld_type( assert not chk_pld_type(
# Msg, payload_spec=CustomPayload,
CustomPayload,
pld='doggy', pld='doggy',
) )
assert chk_pld_type( assert chk_pld_type(
# Msg, payload_spec=CustomPayload,
CustomPayload,
pld=CustomPayload(name='doggy', value='urmom') pld=CustomPayload(name='doggy', value='urmom')
) )
# uhh bc we can `.pause_from_sync()` now! :surfer: # yah, we can `.pause_from_sync()` now!
# breakpoint() # breakpoint()
trio.run(main) trio.run(main)

View File

@ -26,7 +26,7 @@ async def test_reg_then_unreg(reg_addr):
portal = await n.start_actor('actor', enable_modules=[__name__]) portal = await n.start_actor('actor', enable_modules=[__name__])
uid = portal.channel.uid uid = portal.channel.uid
async with tractor.get_arbiter(*reg_addr) as aportal: async with tractor.get_registry(*reg_addr) as aportal:
# this local actor should be the arbiter # this local actor should be the arbiter
assert actor is aportal.actor assert actor is aportal.actor
@ -160,7 +160,7 @@ async def spawn_and_check_registry(
async with tractor.open_root_actor( async with tractor.open_root_actor(
registry_addrs=[reg_addr], registry_addrs=[reg_addr],
): ):
async with tractor.get_arbiter(*reg_addr) as portal: async with tractor.get_registry(*reg_addr) as portal:
# runtime needs to be up to call this # runtime needs to be up to call this
actor = tractor.current_actor() actor = tractor.current_actor()
@ -298,7 +298,7 @@ async def close_chans_before_nursery(
async with tractor.open_root_actor( async with tractor.open_root_actor(
registry_addrs=[reg_addr], registry_addrs=[reg_addr],
): ):
async with tractor.get_arbiter(*reg_addr) as aportal: async with tractor.get_registry(*reg_addr) as aportal:
try: try:
get_reg = partial(unpack_reg, aportal) get_reg = partial(unpack_reg, aportal)

View File

@ -19,7 +19,7 @@ from tractor._testing import (
@pytest.fixture @pytest.fixture
def run_example_in_subproc( def run_example_in_subproc(
loglevel: str, loglevel: str,
testdir, testdir: pytest.Testdir,
reg_addr: tuple[str, int], reg_addr: tuple[str, int],
): ):

View File

@ -2,19 +2,30 @@
The hipster way to force SC onto the stdlib's "async": 'infection mode'. The hipster way to force SC onto the stdlib's "async": 'infection mode'.
''' '''
from typing import Optional, Iterable, Union
import asyncio import asyncio
import builtins import builtins
from contextlib import ExitStack
import itertools import itertools
import importlib import importlib
import os
from pathlib import Path
import signal
from typing import (
Callable,
Iterable,
Union,
)
import pytest import pytest
import trio import trio
import tractor import tractor
from tractor import ( from tractor import (
current_actor,
Actor,
to_asyncio, to_asyncio,
RemoteActorError, RemoteActorError,
ContextCancelled, ContextCancelled,
_state,
) )
from tractor.trionics import BroadcastReceiver from tractor.trionics import BroadcastReceiver
from tractor._testing import expect_ctxc from tractor._testing import expect_ctxc
@ -25,8 +36,8 @@ async def sleep_and_err(
# just signature placeholders for compat with # just signature placeholders for compat with
# ``to_asyncio.open_channel_from()`` # ``to_asyncio.open_channel_from()``
to_trio: Optional[trio.MemorySendChannel] = None, to_trio: trio.MemorySendChannel|None = None,
from_trio: Optional[asyncio.Queue] = None, from_trio: asyncio.Queue|None = None,
): ):
if to_trio: if to_trio:
@ -36,7 +47,7 @@ async def sleep_and_err(
assert 0 assert 0
async def sleep_forever(): async def aio_sleep_forever():
await asyncio.sleep(float('inf')) await asyncio.sleep(float('inf'))
@ -44,7 +55,7 @@ async def trio_cancels_single_aio_task():
# spawn an ``asyncio`` task to run a func and return result # spawn an ``asyncio`` task to run a func and return result
with trio.move_on_after(.2): with trio.move_on_after(.2):
await tractor.to_asyncio.run_task(sleep_forever) await tractor.to_asyncio.run_task(aio_sleep_forever)
def test_trio_cancels_aio_on_actor_side(reg_addr): def test_trio_cancels_aio_on_actor_side(reg_addr):
@ -66,14 +77,22 @@ def test_trio_cancels_aio_on_actor_side(reg_addr):
async def asyncio_actor( async def asyncio_actor(
target: str, target: str,
expect_err: Exception|None = None expect_err: Exception|None = None
) -> None: ) -> None:
assert tractor.current_actor().is_infected_aio() # ensure internal runtime state is consistent
target = globals()[target] actor: Actor = tractor.current_actor()
assert (
actor.is_infected_aio()
and
actor._infected_aio
and
_state._runtime_vars['_is_infected_aio']
)
target: Callable = globals()[target]
if '.' in expect_err: if '.' in expect_err:
modpath, _, name = expect_err.rpartition('.') modpath, _, name = expect_err.rpartition('.')
@ -128,7 +147,7 @@ def test_aio_simple_error(reg_addr):
assert err assert err
assert isinstance(err, RemoteActorError) assert isinstance(err, RemoteActorError)
assert err.boxed_type == AssertionError assert err.boxed_type is AssertionError
def test_tractor_cancels_aio(reg_addr): def test_tractor_cancels_aio(reg_addr):
@ -140,7 +159,7 @@ def test_tractor_cancels_aio(reg_addr):
async with tractor.open_nursery() as n: async with tractor.open_nursery() as n:
portal = await n.run_in_actor( portal = await n.run_in_actor(
asyncio_actor, asyncio_actor,
target='sleep_forever', target='aio_sleep_forever',
expect_err='trio.Cancelled', expect_err='trio.Cancelled',
infect_asyncio=True, infect_asyncio=True,
) )
@ -164,7 +183,7 @@ def test_trio_cancels_aio(reg_addr):
async with tractor.open_nursery() as n: async with tractor.open_nursery() as n:
await n.run_in_actor( await n.run_in_actor(
asyncio_actor, asyncio_actor,
target='sleep_forever', target='aio_sleep_forever',
expect_err='trio.Cancelled', expect_err='trio.Cancelled',
infect_asyncio=True, infect_asyncio=True,
) )
@ -195,7 +214,7 @@ async def trio_ctx(
# spawn another asyncio task for the cuck of it. # spawn another asyncio task for the cuck of it.
n.start_soon( n.start_soon(
tractor.to_asyncio.run_task, tractor.to_asyncio.run_task,
sleep_forever, aio_sleep_forever,
) )
await trio.sleep_forever() await trio.sleep_forever()
@ -272,7 +291,7 @@ def test_context_spawns_aio_task_that_errors(
err = excinfo.value err = excinfo.value
assert isinstance(err, expect) assert isinstance(err, expect)
assert err.boxed_type == AssertionError assert err.boxed_type is AssertionError
async def aio_cancel(): async def aio_cancel():
@ -281,23 +300,35 @@ async def aio_cancel():
''' '''
await asyncio.sleep(0.5) await asyncio.sleep(0.5)
task = asyncio.current_task()
# cancel and enter sleep # cancel and enter sleep
task = asyncio.current_task()
task.cancel() task.cancel()
await sleep_forever() await aio_sleep_forever()
def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr): def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr):
'''
When the `asyncio.Task` cancels itself the `trio` side cshould
also cancel and teardown and relay the cancellation cross-process
to the caller (parent).
'''
async def main(): async def main():
async with tractor.open_nursery() as n:
await n.run_in_actor( an: tractor.ActorNursery
async with tractor.open_nursery() as an:
p: tractor.Portal = await an.run_in_actor(
asyncio_actor, asyncio_actor,
target='aio_cancel', target='aio_cancel',
expect_err='tractor.to_asyncio.AsyncioCancelled', expect_err='tractor.to_asyncio.AsyncioCancelled',
infect_asyncio=True, infect_asyncio=True,
) )
# NOTE: normally the `an.__aexit__()` waits on the
# portal's result but we do it explicitly here
# to avoid indent levels.
with trio.fail_after(1):
await p.wait_for_result()
with pytest.raises( with pytest.raises(
expected_exception=(RemoteActorError, ExceptionGroup), expected_exception=(RemoteActorError, ExceptionGroup),
@ -305,7 +336,7 @@ def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr):
trio.run(main) trio.run(main)
# might get multiple `trio.Cancelled`s as well inside an inception # might get multiple `trio.Cancelled`s as well inside an inception
err = excinfo.value err: RemoteActorError|ExceptionGroup = excinfo.value
if isinstance(err, ExceptionGroup): if isinstance(err, ExceptionGroup):
err = next(itertools.dropwhile( err = next(itertools.dropwhile(
lambda exc: not isinstance(exc, tractor.RemoteActorError), lambda exc: not isinstance(exc, tractor.RemoteActorError),
@ -313,7 +344,8 @@ def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr):
)) ))
assert err assert err
# ensure boxed error is correct # relayed boxed error should be our `trio`-task's
# cancel-signal-proxy-equivalent of `asyncio.CancelledError`.
assert err.boxed_type == to_asyncio.AsyncioCancelled assert err.boxed_type == to_asyncio.AsyncioCancelled
@ -355,7 +387,6 @@ async def push_from_aio_task(
async def stream_from_aio( async def stream_from_aio(
exit_early: bool = False, exit_early: bool = False,
raise_err: bool = False, raise_err: bool = False,
aio_raise_err: bool = False, aio_raise_err: bool = False,
@ -466,7 +497,7 @@ def test_trio_error_cancels_intertask_chan(reg_addr):
trio.run(main) trio.run(main)
# ensure boxed error type # ensure boxed error type
excinfo.value.boxed_type == Exception excinfo.value.boxed_type is Exception
def test_trio_closes_early_and_channel_exits(reg_addr): def test_trio_closes_early_and_channel_exits(reg_addr):
@ -502,7 +533,7 @@ def test_aio_errors_and_channel_propagates_and_closes(reg_addr):
) as excinfo: ) as excinfo:
trio.run(main) trio.run(main)
excinfo.value.boxed_type == Exception excinfo.value.boxed_type is Exception
@tractor.context @tractor.context
@ -618,6 +649,242 @@ def test_echoserver_detailed_mechanics(
trio.run(main) trio.run(main)
@tractor.context
async def manage_file(
ctx: tractor.Context,
tmp_path_str: str,
send_sigint_to: str,
trio_side_is_shielded: bool = True,
bg_aio_task: bool = False,
):
'''
Start an `asyncio` task that just sleeps after registering a context
with `Actor.lifetime_stack`. Trigger a SIGINT to kill the actor tree
and ensure the stack is closed in the infected mode child.
To verify the teardown state just write a tmpfile to the `testdir`
and delete it on actor close.
'''
tmp_path: Path = Path(tmp_path_str)
tmp_file: Path = tmp_path / f'{" ".join(ctx._actor.uid)}.file'
# create a the tmp file and tell the parent where it's at
assert not tmp_file.is_file()
tmp_file.touch()
stack: ExitStack = current_actor().lifetime_stack
stack.callback(tmp_file.unlink)
await ctx.started((
str(tmp_file),
os.getpid(),
))
# expect to be cancelled from here!
try:
# NOTE: turns out you don't even need to sched an aio task
# since the original issue, even though seemingly was due to
# the guest-run being abandoned + a `._debug.pause()` inside
# `._runtime._async_main()` (which was originally trying to
# debug the `.lifetime_stack` not closing), IS NOT actually
# the core issue?
#
# further notes:
#
# - `trio` only issues the " RuntimeWarning: Trio guest run
# got abandoned without properly finishing... weird stuff
# might happen" IFF you DO run a asyncio task here, BUT
# - the original issue of the `.lifetime_stack` not closing
# will still happen even if you don't run an `asyncio` task
# here even though the "abandon" messgage won't be shown..
#
# => ????? honestly i'm lost but it seems to be some issue
# with `asyncio` and SIGINT..
#
# honestly, this REALLY reminds me why i haven't used
# `asyncio` by choice in years.. XD
#
async with trio.open_nursery() as tn:
if bg_aio_task:
tn.start_soon(
tractor.to_asyncio.run_task,
aio_sleep_forever,
)
# XXX don't-need/doesn't-make-a-diff right
# since we're already doing it from parent?
# if send_sigint_to == 'child':
# os.kill(
# os.getpid(),
# signal.SIGINT,
# )
# XXX spend a half sec doing shielded checkpointing to
# ensure that despite the `trio`-side task ignoring the
# SIGINT, the `asyncio` side won't abandon the guest-run!
if trio_side_is_shielded:
with trio.CancelScope(shield=True):
for i in range(5):
await trio.sleep(0.1)
await trio.sleep_forever()
# signalled manually at the OS level (aka KBI) by the parent actor.
except KeyboardInterrupt:
print('child raised KBI..')
assert tmp_file.exists()
raise
raise RuntimeError('shoulda received a KBI?')
@pytest.mark.parametrize(
'trio_side_is_shielded',
[
False,
True,
],
ids=[
'trio_side_no_shielding',
'trio_side_does_shielded_work',
],
)
@pytest.mark.parametrize(
'send_sigint_to',
[
'child',
'parent',
],
ids='send_SIGINT_to={}'.format,
)
@pytest.mark.parametrize(
'bg_aio_task',
[
False,
# NOTE: (and see notes in `manage_file()` above as well) if
# we FOR SURE SPAWN AN AIO TASK in the child it seems the
# "silent-abandon" case (as is described in detail in
# `to_asyncio.run_as_asyncio_guest()`) does not happen and
# `asyncio`'s loop will at least abandon the `trio` side
# loudly? .. prolly the state-spot to start looking for
# a soln that results in NO ABANDONMENT.. XD
True,
],
ids=[
'bg_aio_task',
'just_trio_slee',
],
)
@pytest.mark.parametrize(
'wait_for_ctx',
[
False,
True,
],
ids=[
'raise_KBI_in_rent',
'wait_for_ctx',
],
)
def test_sigint_closes_lifetime_stack(
tmp_path: Path,
wait_for_ctx: bool,
bg_aio_task: bool,
trio_side_is_shielded: bool,
debug_mode: bool,
send_sigint_to: str,
):
'''
Ensure that an infected child can use the `Actor.lifetime_stack`
to make a file on boot and it's automatically cleaned up by the
actor-lifetime-linked exit stack closure.
'''
async def main():
try:
an: tractor.ActorNursery
async with tractor.open_nursery(
debug_mode=debug_mode,
) as an:
p: tractor.Portal = await an.start_actor(
'file_mngr',
enable_modules=[__name__],
infect_asyncio=True,
)
async with p.open_context(
manage_file,
tmp_path_str=str(tmp_path),
send_sigint_to=send_sigint_to,
bg_aio_task=bg_aio_task,
trio_side_is_shielded=trio_side_is_shielded,
) as (ctx, first):
path_str, cpid = first
tmp_file: Path = Path(path_str)
assert tmp_file.exists()
# XXX originally to simulate what (hopefully)
# the below now triggers.. had to manually
# trigger a SIGINT from a ctl-c in the root.
# await trio.sleep_forever()
# XXX NOTE XXX signal infected-`asyncio` child to
# OS-cancel with SIGINT; this should trigger the
# bad `asyncio` cancel behaviour that can cause
# a guest-run abandon as was seen causing
# shm-buffer leaks in `piker`'s live quote stream
# susbys!
#
await trio.sleep(.2)
pid: int = (
cpid if send_sigint_to == 'child'
else os.getpid()
)
os.kill(
pid,
signal.SIGINT,
)
# XXX CASE 1: without the bug fixed, in
# the non-KBI-raised-in-parent case, this
# timeout should trigger!
if wait_for_ctx:
print('waiting for ctx outcome in parent..')
try:
with trio.fail_after(1):
await ctx.wait_for_result()
except tractor.ContextCancelled as ctxc:
assert ctxc.canceller == ctx.chan.uid
raise
# XXX CASE 2: this seems to be the source of the
# original issue which exhibited BEFORE we put
# a `Actor.cancel_soon()` inside
# `run_as_asyncio_guest()`..
else:
raise KeyboardInterrupt
pytest.fail('should have raised some kinda error?!?')
except (
KeyboardInterrupt,
ContextCancelled,
):
# XXX CASE 2: without the bug fixed, in the
# KBI-raised-in-parent case, the actor teardown should
# never get run (silently abaondoned by `asyncio`..) and
# thus the file should leak!
assert not tmp_file.exists()
assert ctx.maybe_error
trio.run(main)
# TODO: debug_mode tests once we get support for `asyncio`! # TODO: debug_mode tests once we get support for `asyncio`!
# #
# -[ ] need tests to wrap both scripts: # -[ ] need tests to wrap both scripts:

View File

@ -38,7 +38,7 @@ async def test_self_is_registered_localportal(reg_addr):
"Verify waiting on the arbiter to register itself using a local portal." "Verify waiting on the arbiter to register itself using a local portal."
actor = tractor.current_actor() actor = tractor.current_actor()
assert actor.is_arbiter assert actor.is_arbiter
async with tractor.get_arbiter(*reg_addr) as portal: async with tractor.get_registry(*reg_addr) as portal:
assert isinstance(portal, tractor._portal.LocalPortal) assert isinstance(portal, tractor._portal.LocalPortal)
with trio.fail_after(0.2): with trio.fail_after(0.2):

View File

@ -32,7 +32,7 @@ def test_abort_on_sigint(daemon):
@tractor_test @tractor_test
async def test_cancel_remote_arbiter(daemon, reg_addr): async def test_cancel_remote_arbiter(daemon, reg_addr):
assert not tractor.current_actor().is_arbiter assert not tractor.current_actor().is_arbiter
async with tractor.get_arbiter(*reg_addr) as portal: async with tractor.get_registry(*reg_addr) as portal:
await portal.cancel_actor() await portal.cancel_actor()
time.sleep(0.1) time.sleep(0.1)
@ -41,7 +41,7 @@ async def test_cancel_remote_arbiter(daemon, reg_addr):
# no arbiter socket should exist # no arbiter socket should exist
with pytest.raises(OSError): with pytest.raises(OSError):
async with tractor.get_arbiter(*reg_addr) as portal: async with tractor.get_registry(*reg_addr) as portal:
pass pass

View File

@ -285,14 +285,14 @@ def test_basic_payload_spec(
if invalid_started: if invalid_started:
msg_type_str: str = 'Started' msg_type_str: str = 'Started'
bad_value_str: str = '10' bad_value: int = 10
elif invalid_return: elif invalid_return:
msg_type_str: str = 'Return' msg_type_str: str = 'Return'
bad_value_str: str = "'yo'" bad_value: str = 'yo'
else: else:
# XXX but should never be used below then.. # XXX but should never be used below then..
msg_type_str: str = '' msg_type_str: str = ''
bad_value_str: str = '' bad_value: str = ''
maybe_mte: MsgTypeError|None = None maybe_mte: MsgTypeError|None = None
should_raise: Exception|None = ( should_raise: Exception|None = (
@ -307,8 +307,10 @@ def test_basic_payload_spec(
raises=should_raise, raises=should_raise,
ensure_in_message=[ ensure_in_message=[
f"invalid `{msg_type_str}` msg payload", f"invalid `{msg_type_str}` msg payload",
f"value: `{bad_value_str}` does not " f'{bad_value}',
f"match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", f'has type {type(bad_value)!r}',
'not match type-spec',
f'`{msg_type_str}.pld: PldMsg|NoneType`',
], ],
# only for debug # only for debug
# post_mortem=True, # post_mortem=True,

View File

@ -30,7 +30,7 @@ from ._streaming import (
stream as stream, stream as stream,
) )
from ._discovery import ( from ._discovery import (
get_arbiter as get_arbiter, get_registry as get_registry,
find_actor as find_actor, find_actor as find_actor,
wait_for_actor as wait_for_actor, wait_for_actor as wait_for_actor,
query_actor as query_actor, query_actor as query_actor,
@ -49,6 +49,7 @@ from ._exceptions import (
ModuleNotExposed as ModuleNotExposed, ModuleNotExposed as ModuleNotExposed,
MsgTypeError as MsgTypeError, MsgTypeError as MsgTypeError,
RemoteActorError as RemoteActorError, RemoteActorError as RemoteActorError,
TransportClosed as TransportClosed,
) )
from .devx import ( from .devx import (
breakpoint as breakpoint, breakpoint as breakpoint,

View File

@ -38,6 +38,7 @@ from collections import deque
from contextlib import ( from contextlib import (
asynccontextmanager as acm, asynccontextmanager as acm,
) )
from contextvars import Token
from dataclasses import ( from dataclasses import (
dataclass, dataclass,
field, field,
@ -45,6 +46,7 @@ from dataclasses import (
from functools import partial from functools import partial
import inspect import inspect
from pprint import pformat from pprint import pformat
import textwrap
from typing import ( from typing import (
Any, Any,
AsyncGenerator, AsyncGenerator,
@ -121,10 +123,19 @@ class Unresolved:
@dataclass @dataclass
class Context: class Context:
''' '''
An inter-actor, SC transitive, `Task` communication context. An inter-actor, SC transitive, `trio.Task` (pair)
communication context.
NB: This class should **never be instatiated directly**, it is allocated (We've also considered other names and ideas:
by the runtime in 2 ways: - "communicating tasks scope": cts
- "distributed task scope": dts
- "communicating tasks context": ctc
**Got a better idea for naming? Make an issue dawg!**
)
NB: This class should **never be instatiated directly**, it is
allocated by the runtime in 2 ways:
- by entering `Portal.open_context()` which is the primary - by entering `Portal.open_context()` which is the primary
public API for any "parent" task or, public API for any "parent" task or,
- by the RPC machinery's `._rpc._invoke()` as a `ctx` arg - by the RPC machinery's `._rpc._invoke()` as a `ctx` arg
@ -210,6 +221,16 @@ class Context:
# more the the `Context` is needed? # more the the `Context` is needed?
_portal: Portal | None = None _portal: Portal | None = None
@property
def portal(self) -> Portal|None:
'''
Return any wrapping memory-`Portal` if this is
a 'parent'-side task which called `Portal.open_context()`,
otherwise `None`.
'''
return self._portal
# NOTE: each side of the context has its own cancel scope # NOTE: each side of the context has its own cancel scope
# which is exactly the primitive that allows for # which is exactly the primitive that allows for
# cross-actor-task-supervision and thus SC. # cross-actor-task-supervision and thus SC.
@ -299,6 +320,8 @@ class Context:
# boxed exception. NOW, it's used for spawning overrun queuing # boxed exception. NOW, it's used for spawning overrun queuing
# tasks when `.allow_overruns == True` !!! # tasks when `.allow_overruns == True` !!!
_scope_nursery: trio.Nursery|None = None _scope_nursery: trio.Nursery|None = None
# ^-TODO-^ change name?
# -> `._scope_tn` "scope task nursery"
# streaming overrun state tracking # streaming overrun state tracking
_in_overrun: bool = False _in_overrun: bool = False
@ -313,6 +336,7 @@ class Context:
extra_fields: dict[str, Any]|None = None, extra_fields: dict[str, Any]|None = None,
# ^-TODO-^ some built-in extra state fields # ^-TODO-^ some built-in extra state fields
# we'll want in some devx specific cases? # we'll want in some devx specific cases?
indent: str|None = None,
) -> str: ) -> str:
ds: str = '=' ds: str = '='
@ -332,7 +356,6 @@ class Context:
show_error_fields=True show_error_fields=True
) )
fmtstr: str = ( fmtstr: str = (
f'<Context(\n'
# f'\n' # f'\n'
# f' ---\n' # f' ---\n'
f' |_ipc: {self.dst_maddr}\n' f' |_ipc: {self.dst_maddr}\n'
@ -379,11 +402,20 @@ class Context:
f' {key}{ds}{val!r}\n' f' {key}{ds}{val!r}\n'
) )
if indent:
fmtstr = textwrap.indent(
fmtstr,
prefix=indent,
)
return ( return (
'<Context(\n'
+
fmtstr fmtstr
+ +
')>\n' f'{indent})>\n'
) )
# NOTE: making this return a value that can be passed to # NOTE: making this return a value that can be passed to
# `eval()` is entirely **optional** dawggg B) # `eval()` is entirely **optional** dawggg B)
# https://docs.python.org/3/library/functions.html#repr # https://docs.python.org/3/library/functions.html#repr
@ -408,10 +440,23 @@ class Context:
''' '''
return self._cancel_called return self._cancel_called
@cancel_called.setter
def cancel_called(self, val: bool) -> None:
'''
Set the self-cancelled request `bool` value.
'''
# to debug who frickin sets it..
# if val:
# from .devx import pause_from_sync
# pause_from_sync()
self._cancel_called = val
@property @property
def canceller(self) -> tuple[str, str]|None: def canceller(self) -> tuple[str, str]|None:
''' '''
``Actor.uid: tuple[str, str]`` of the (remote) `Actor.uid: tuple[str, str]` of the (remote)
actor-process who's task was cancelled thus causing this actor-process who's task was cancelled thus causing this
(side of the) context to also be cancelled. (side of the) context to also be cancelled.
@ -515,7 +560,7 @@ class Context:
# the local scope was never cancelled # the local scope was never cancelled
# and instead likely we received a remote side # and instead likely we received a remote side
# # cancellation that was raised inside `.result()` # # cancellation that was raised inside `.wait_for_result()`
# or ( # or (
# (se := self._local_error) # (se := self._local_error)
# and se is re # and se is re
@ -585,6 +630,10 @@ class Context:
self, self,
error: BaseException, error: BaseException,
# TODO: manual toggle for cases where we wouldn't normally
# mark ourselves cancelled but want to?
# set_cancel_called: bool = False,
) -> None: ) -> None:
''' '''
(Maybe) cancel this local scope due to a received remote (Maybe) cancel this local scope due to a received remote
@ -603,7 +652,7 @@ class Context:
- `Portal.open_context()` - `Portal.open_context()`
- `Portal.result()` - `Portal.result()`
- `Context.open_stream()` - `Context.open_stream()`
- `Context.result()` - `Context.wait_for_result()`
when called/closed by actor local task(s). when called/closed by actor local task(s).
@ -729,7 +778,7 @@ class Context:
# Cancel the local `._scope`, catch that # Cancel the local `._scope`, catch that
# `._scope.cancelled_caught` and re-raise any remote error # `._scope.cancelled_caught` and re-raise any remote error
# once exiting (or manually calling `.result()`) the # once exiting (or manually calling `.wait_for_result()`) the
# `.open_context()` block. # `.open_context()` block.
cs: trio.CancelScope = self._scope cs: trio.CancelScope = self._scope
if ( if (
@ -764,8 +813,9 @@ class Context:
# `trio.Cancelled` subtype here ;) # `trio.Cancelled` subtype here ;)
# https://github.com/goodboy/tractor/issues/368 # https://github.com/goodboy/tractor/issues/368
message: str = 'Cancelling `Context._scope` !\n\n' message: str = 'Cancelling `Context._scope` !\n\n'
# from .devx import pause_from_sync
# pause_from_sync()
self._scope.cancel() self._scope.cancel()
else: else:
message: str = 'NOT cancelling `Context._scope` !\n\n' message: str = 'NOT cancelling `Context._scope` !\n\n'
# from .devx import mk_pdb # from .devx import mk_pdb
@ -845,15 +895,15 @@ class Context:
@property @property
def repr_api(self) -> str: def repr_api(self) -> str:
return 'Portal.open_context()'
# TODO: use `.dev._frame_stack` scanning to find caller!
# ci: CallerInfo|None = self._caller_info # ci: CallerInfo|None = self._caller_info
# if ci: # if ci:
# return ( # return (
# f'{ci.api_nsp}()\n' # f'{ci.api_nsp}()\n'
# ) # )
# TODO: use `.dev._frame_stack` scanning to find caller!
return 'Portal.open_context()'
async def cancel( async def cancel(
self, self,
timeout: float = 0.616, timeout: float = 0.616,
@ -889,16 +939,18 @@ class Context:
''' '''
side: str = self.side side: str = self.side
self._cancel_called: bool = True # XXX for debug via the `@.setter`
self.cancel_called = True
header: str = ( header: str = (
f'Cancelling ctx with peer from {side.upper()} side\n\n' f'Cancelling ctx from {side.upper()}-side\n'
) )
reminfo: str = ( reminfo: str = (
# ' =>\n' # ' =>\n'
f'Context.cancel() => {self.chan.uid}\n' # f'Context.cancel() => {self.chan.uid}\n'
f'c)=> {self.chan.uid}\n'
# f'{self.chan.uid}\n' # f'{self.chan.uid}\n'
f' |_ @{self.dst_maddr}\n' f' |_ @{self.dst_maddr}\n'
f' >> {self.repr_rpc}\n' f' >> {self.repr_rpc}\n'
# f' >> {self._nsf}() -> {codec}[dict]:\n\n' # f' >> {self._nsf}() -> {codec}[dict]:\n\n'
# TODO: pull msg-type from spec re #320 # TODO: pull msg-type from spec re #320
@ -912,7 +964,7 @@ class Context:
# `._scope.cancel()` since we expect the eventual # `._scope.cancel()` since we expect the eventual
# `ContextCancelled` from the other side to trigger this # `ContextCancelled` from the other side to trigger this
# when the runtime finally receives it during teardown # when the runtime finally receives it during teardown
# (normally in `.result()` called from # (normally in `.wait_for_result()` called from
# `Portal.open_context().__aexit__()`) # `Portal.open_context().__aexit__()`)
if side == 'parent': if side == 'parent':
if not self._portal: if not self._portal:
@ -1025,10 +1077,10 @@ class Context:
''' '''
__tracebackhide__: bool = hide_tb __tracebackhide__: bool = hide_tb
our_uid: tuple = self.chan.uid peer_uid: tuple = self.chan.uid
# XXX NOTE XXX: `ContextCancelled`/`StreamOverrun` absorption # XXX NOTE XXX: `ContextCancelled`/`StreamOverrun` absorption
# for "graceful cancellation" case: # for "graceful cancellation" case(s):
# #
# Whenever a "side" of a context (a `Task` running in # Whenever a "side" of a context (a `Task` running in
# an actor) **is** the side which requested ctx # an actor) **is** the side which requested ctx
@ -1045,9 +1097,11 @@ class Context:
# set to the `Actor.uid` of THIS task (i.e. the # set to the `Actor.uid` of THIS task (i.e. the
# cancellation requesting task's actor is the actor # cancellation requesting task's actor is the actor
# checking whether it should absorb the ctxc). # checking whether it should absorb the ctxc).
self_ctxc: bool = self._is_self_cancelled(remote_error)
if ( if (
self_ctxc
and
not raise_ctxc_from_self_call not raise_ctxc_from_self_call
and self._is_self_cancelled(remote_error)
# TODO: ?potentially it is useful to emit certain # TODO: ?potentially it is useful to emit certain
# warning/cancel logs for the cases where the # warning/cancel logs for the cases where the
@ -1077,8 +1131,8 @@ class Context:
and isinstance(remote_error, RemoteActorError) and isinstance(remote_error, RemoteActorError)
and remote_error.boxed_type is StreamOverrun and remote_error.boxed_type is StreamOverrun
# and tuple(remote_error.msgdata['sender']) == our_uid # and tuple(remote_error.msgdata['sender']) == peer_uid
and tuple(remote_error.sender) == our_uid and tuple(remote_error.sender) == peer_uid
): ):
# NOTE: we set the local scope error to any "self # NOTE: we set the local scope error to any "self
# cancellation" error-response thus "absorbing" # cancellation" error-response thus "absorbing"
@ -1140,9 +1194,9 @@ class Context:
of the remote cancellation. of the remote cancellation.
''' '''
__tracebackhide__ = hide_tb __tracebackhide__: bool = hide_tb
assert self._portal, ( assert self._portal, (
"Context.result() can not be called from callee side!" '`Context.wait_for_result()` can not be called from callee side!'
) )
if self._final_result_is_set(): if self._final_result_is_set():
return self._result return self._result
@ -1197,10 +1251,11 @@ class Context:
# raising something we know might happen # raising something we know might happen
# during cancellation ;) # during cancellation ;)
(not self._cancel_called) (not self._cancel_called)
) ),
hide_tb=hide_tb,
) )
# TODO: eventually make `.outcome: Outcome` and thus return # TODO: eventually make `.outcome: Outcome` and thus return
# `self.outcome.unwrap()` here! # `self.outcome.unwrap()` here?
return self.outcome return self.outcome
# TODO: switch this with above! # TODO: switch this with above!
@ -1223,6 +1278,12 @@ class Context:
@property @property
def maybe_error(self) -> BaseException|None: def maybe_error(self) -> BaseException|None:
'''
Return the (remote) error as outcome or `None`.
Remote errors take precedence over local ones.
'''
le: BaseException|None = self._local_error le: BaseException|None = self._local_error
re: RemoteActorError|ContextCancelled|None = self._remote_error re: RemoteActorError|ContextCancelled|None = self._remote_error
@ -1284,17 +1345,24 @@ class Context:
Any| Any|
RemoteActorError| RemoteActorError|
ContextCancelled ContextCancelled
# TODO: make this a `outcome.Outcome`!
): ):
''' '''
The final "outcome" from an IPC context which can either be Return the "final outcome" (state) of the far end peer task
some Value returned from the target `@context`-decorated non-blocking. If the remote task has not completed then this
remote task-as-func, or an `Error` wrapping an exception field always resolves to the module defined `Unresolved`
raised from an RPC task fault or cancellation. handle.
Note that if the remote task has not terminated then this ------ - ------
field always resolves to the module defined `Unresolved` handle. TODO->( this is doc-driven-dev content not yet actual ;P )
TODO: implement this using `outcome.Outcome` types? The final "outcome" from an IPC context which can be any of:
- some `outcome.Value` which boxes the returned output from the peer task's
`@context`-decorated remote task-as-func, or
- an `outcome.Error` wrapping an exception raised that same RPC task
after a fault or cancellation, or
- an unresolved `outcome.Outcome` when the peer task is still
executing and has not yet completed.
''' '''
return ( return (
@ -1583,7 +1651,7 @@ class Context:
- NEVER `return` early before delivering the msg! - NEVER `return` early before delivering the msg!
bc if the error is a ctxc and there is a task waiting on bc if the error is a ctxc and there is a task waiting on
`.result()` we need the msg to be `.wait_for_result()` we need the msg to be
`send_chan.send_nowait()`-ed over the `._rx_chan` so `send_chan.send_nowait()`-ed over the `._rx_chan` so
that the error is relayed to that waiter task and thus that the error is relayed to that waiter task and thus
raised in user code! raised in user code!
@ -1828,7 +1896,7 @@ async def open_context_from_portal(
When the "callee" (side that is "called"/started by a call When the "callee" (side that is "called"/started by a call
to *this* method) returns, the caller side (this) unblocks to *this* method) returns, the caller side (this) unblocks
and any final value delivered from the other end can be and any final value delivered from the other end can be
retrieved using the `Contex.result()` api. retrieved using the `Contex.wait_for_result()` api.
The yielded ``Context`` instance further allows for opening The yielded ``Context`` instance further allows for opening
bidirectional streams, explicit cancellation and bidirectional streams, explicit cancellation and
@ -1893,7 +1961,7 @@ async def open_context_from_portal(
) )
assert ctx._remote_func_type == 'context' assert ctx._remote_func_type == 'context'
assert ctx._caller_info assert ctx._caller_info
_ctxvar_Context.set(ctx) prior_ctx_tok: Token = _ctxvar_Context.set(ctx)
# placeholder for any exception raised in the runtime # placeholder for any exception raised in the runtime
# or by user tasks which cause this context's closure. # or by user tasks which cause this context's closure.
@ -1965,14 +2033,14 @@ async def open_context_from_portal(
yield ctx, first yield ctx, first
# ??TODO??: do we still want to consider this or is # ??TODO??: do we still want to consider this or is
# the `else:` block handling via a `.result()` # the `else:` block handling via a `.wait_for_result()`
# call below enough?? # call below enough??
# #
# -[ ] pretty sure `.result()` internals do the # -[ ] pretty sure `.wait_for_result()` internals do the
# same as our ctxc handler below so it ended up # same as our ctxc handler below so it ended up
# being same (repeated?) behaviour, but ideally we # being same (repeated?) behaviour, but ideally we
# wouldn't have that duplication either by somehow # wouldn't have that duplication either by somehow
# factoring the `.result()` handler impl in a way # factoring the `.wait_for_result()` handler impl in a way
# that we can re-use it around the `yield` ^ here # that we can re-use it around the `yield` ^ here
# or vice versa? # or vice versa?
# #
@ -2110,7 +2178,7 @@ async def open_context_from_portal(
# AND a group-exc is only raised if there was > 1 # AND a group-exc is only raised if there was > 1
# tasks started *here* in the "caller" / opener # tasks started *here* in the "caller" / opener
# block. If any one of those tasks calls # block. If any one of those tasks calls
# `.result()` or `MsgStream.receive()` # `.wait_for_result()` or `MsgStream.receive()`
# `._maybe_raise_remote_err()` will be transitively # `._maybe_raise_remote_err()` will be transitively
# called and the remote error raised causing all # called and the remote error raised causing all
# tasks to be cancelled. # tasks to be cancelled.
@ -2131,9 +2199,16 @@ async def open_context_from_portal(
# handled in the block above ^^^ !! # handled in the block above ^^^ !!
# await _debug.pause() # await _debug.pause()
# log.cancel( # log.cancel(
log.exception( match scope_err:
f'{ctx.side}-side of `Context` terminated with ' case trio.Cancelled:
f'.outcome => {ctx.repr_outcome()}\n' logmeth = log.cancel
# XXX explicitly report on any non-graceful-taskc cases
case _:
logmeth = log.exception
logmeth(
f'ctx {ctx.side!r}-side exited with {ctx.repr_outcome()}\n'
) )
if debug_mode(): if debug_mode():
@ -2180,7 +2255,7 @@ async def open_context_from_portal(
f'|_{ctx._task}\n' f'|_{ctx._task}\n'
) )
# XXX NOTE XXX: the below call to # XXX NOTE XXX: the below call to
# `Context.result()` will ALWAYS raise # `Context.wait_for_result()` will ALWAYS raise
# a `ContextCancelled` (via an embedded call to # a `ContextCancelled` (via an embedded call to
# `Context._maybe_raise_remote_err()`) IFF # `Context._maybe_raise_remote_err()`) IFF
# a `Context._remote_error` was set by the runtime # a `Context._remote_error` was set by the runtime
@ -2190,10 +2265,10 @@ async def open_context_from_portal(
# ALWAYS SET any time "callee" side fails and causes "caller # ALWAYS SET any time "callee" side fails and causes "caller
# side" cancellation via a `ContextCancelled` here. # side" cancellation via a `ContextCancelled` here.
try: try:
result_or_err: Exception|Any = await ctx.result() result_or_err: Exception|Any = await ctx.wait_for_result()
except BaseException as berr: except BaseException as berr:
# on normal teardown, if we get some error # on normal teardown, if we get some error
# raised in `Context.result()` we still want to # raised in `Context.wait_for_result()` we still want to
# save that error on the ctx's state to # save that error on the ctx's state to
# determine things like `.cancelled_caught` for # determine things like `.cancelled_caught` for
# cases where there was remote cancellation but # cases where there was remote cancellation but
@ -2311,8 +2386,9 @@ async def open_context_from_portal(
and ctx.cancel_acked and ctx.cancel_acked
): ):
log.cancel( log.cancel(
f'Context cancelled by {ctx.side!r}-side task\n' f'Context cancelled by local {ctx.side!r}-side task\n'
f'|_{ctx._task}\n\n' f'c)>\n'
f' |_{ctx._task}\n\n'
f'{repr(scope_err)}\n' f'{repr(scope_err)}\n'
) )
@ -2328,8 +2404,10 @@ async def open_context_from_portal(
# type_only=True, # type_only=True,
) )
log.cancel( log.cancel(
f'Context terminated due to local {ctx.side!r}-side error:\n\n' f'Context terminated due to {ctx.side!r}-side\n\n'
f'{ctx.chan.uid} => {outcome_str}\n' # TODO: do an x)> on err and c)> only for ctxc?
f'c)> {outcome_str}\n'
f' |_{ctx.repr_rpc}\n'
) )
# FINALLY, remove the context from runtime tracking and # FINALLY, remove the context from runtime tracking and
@ -2344,6 +2422,9 @@ async def open_context_from_portal(
None, None,
) )
# XXX revert to prior IPC-task-ctx scope
_ctxvar_Context.reset(prior_ctx_tok)
def mk_context( def mk_context(
chan: Channel, chan: Channel,

View File

@ -26,8 +26,8 @@ from typing import (
TYPE_CHECKING, TYPE_CHECKING,
) )
from contextlib import asynccontextmanager as acm from contextlib import asynccontextmanager as acm
import warnings
from tractor.log import get_logger
from .trionics import gather_contexts from .trionics import gather_contexts
from ._ipc import _connect_chan, Channel from ._ipc import _connect_chan, Channel
from ._portal import ( from ._portal import (
@ -40,11 +40,13 @@ from ._state import (
_runtime_vars, _runtime_vars,
) )
if TYPE_CHECKING: if TYPE_CHECKING:
from ._runtime import Actor from ._runtime import Actor
log = get_logger(__name__)
@acm @acm
async def get_registry( async def get_registry(
host: str, host: str,
@ -56,14 +58,12 @@ async def get_registry(
]: ]:
''' '''
Return a portal instance connected to a local or remote Return a portal instance connected to a local or remote
arbiter. registry-service actor; if a connection already exists re-use it
(presumably to call a `.register_actor()` registry runtime RPC
ep).
''' '''
actor = current_actor() actor: Actor = current_actor()
if not actor:
raise RuntimeError("No actor instance has been defined yet?")
if actor.is_registrar: if actor.is_registrar:
# we're already the arbiter # we're already the arbiter
# (likely a re-entrant call from the arbiter actor) # (likely a re-entrant call from the arbiter actor)
@ -72,6 +72,8 @@ async def get_registry(
Channel((host, port)) Channel((host, port))
) )
else: else:
# TODO: try to look pre-existing connection from
# `Actor._peers` and use it instead?
async with ( async with (
_connect_chan(host, port) as chan, _connect_chan(host, port) as chan,
open_portal(chan) as regstr_ptl, open_portal(chan) as regstr_ptl,
@ -80,19 +82,6 @@ async def get_registry(
# TODO: deprecate and this remove _arbiter form!
@acm
async def get_arbiter(*args, **kwargs):
warnings.warn(
'`tractor.get_arbiter()` is now deprecated!\n'
'Use `.get_registry()` instead!',
DeprecationWarning,
stacklevel=2,
)
async with get_registry(*args, **kwargs) as to_yield:
yield to_yield
@acm @acm
async def get_root( async def get_root(
**kwargs, **kwargs,
@ -110,22 +99,53 @@ async def get_root(
yield portal yield portal
def get_peer_by_name(
name: str,
# uuid: str|None = None,
) -> list[Channel]|None: # at least 1
'''
Scan for an existing connection (set) to a named actor
and return any channels from `Actor._peers`.
This is an optimization method over querying the registrar for
the same info.
'''
actor: Actor = current_actor()
to_scan: dict[tuple, list[Channel]] = actor._peers.copy()
pchan: Channel|None = actor._parent_chan
if pchan:
to_scan[pchan.uid].append(pchan)
for aid, chans in to_scan.items():
_, peer_name = aid
if name == peer_name:
if not chans:
log.warning(
'No IPC chans for matching peer {peer_name}\n'
)
continue
return chans
return None
@acm @acm
async def query_actor( async def query_actor(
name: str, name: str,
arbiter_sockaddr: tuple[str, int] | None = None, regaddr: tuple[str, int]|None = None,
regaddr: tuple[str, int] | None = None,
) -> AsyncGenerator[ ) -> AsyncGenerator[
tuple[str, int] | None, tuple[str, int]|None,
None, None,
]: ]:
''' '''
Make a transport address lookup for an actor name to a specific Lookup a transport address (by actor name) via querying a registrar
registrar. listening @ `regaddr`.
Returns the (socket) address or ``None`` if no entry under that Returns the transport protocol (socket) address or `None` if no
name exists for the given registrar listening @ `regaddr`. entry under that name exists.
''' '''
actor: Actor = current_actor() actor: Actor = current_actor()
@ -137,14 +157,10 @@ async def query_actor(
'The current actor IS the registry!?' 'The current actor IS the registry!?'
) )
if arbiter_sockaddr is not None: maybe_peers: list[Channel]|None = get_peer_by_name(name)
warnings.warn( if maybe_peers:
'`tractor.query_actor(regaddr=<blah>)` is deprecated.\n' yield maybe_peers[0].raddr
'Use `registry_addrs: list[tuple]` instead!', return
DeprecationWarning,
stacklevel=2,
)
regaddr: list[tuple[str, int]] = arbiter_sockaddr
reg_portal: Portal reg_portal: Portal
regaddr: tuple[str, int] = regaddr or actor.reg_addrs[0] regaddr: tuple[str, int] = regaddr or actor.reg_addrs[0]
@ -159,10 +175,28 @@ async def query_actor(
yield sockaddr yield sockaddr
@acm
async def maybe_open_portal(
addr: tuple[str, int],
name: str,
):
async with query_actor(
name=name,
regaddr=addr,
) as sockaddr:
pass
if sockaddr:
async with _connect_chan(*sockaddr) as chan:
async with open_portal(chan) as portal:
yield portal
else:
yield None
@acm @acm
async def find_actor( async def find_actor(
name: str, name: str,
arbiter_sockaddr: tuple[str, int]|None = None,
registry_addrs: list[tuple[str, int]]|None = None, registry_addrs: list[tuple[str, int]]|None = None,
only_first: bool = True, only_first: bool = True,
@ -179,29 +213,12 @@ async def find_actor(
known to the arbiter. known to the arbiter.
''' '''
if arbiter_sockaddr is not None: # optimization path, use any pre-existing peer channel
warnings.warn( maybe_peers: list[Channel]|None = get_peer_by_name(name)
'`tractor.find_actor(arbiter_sockaddr=<blah>)` is deprecated.\n' if maybe_peers and only_first:
'Use `registry_addrs: list[tuple]` instead!', async with open_portal(maybe_peers[0]) as peer_portal:
DeprecationWarning, yield peer_portal
stacklevel=2, return
)
registry_addrs: list[tuple[str, int]] = [arbiter_sockaddr]
@acm
async def maybe_open_portal_from_reg_addr(
addr: tuple[str, int],
):
async with query_actor(
name=name,
regaddr=addr,
) as sockaddr:
if sockaddr:
async with _connect_chan(*sockaddr) as chan:
async with open_portal(chan) as portal:
yield portal
else:
yield None
if not registry_addrs: if not registry_addrs:
# XXX NOTE: make sure to dynamically read the value on # XXX NOTE: make sure to dynamically read the value on
@ -217,10 +234,13 @@ async def find_actor(
maybe_portals: list[ maybe_portals: list[
AsyncContextManager[tuple[str, int]] AsyncContextManager[tuple[str, int]]
] = list( ] = list(
maybe_open_portal_from_reg_addr(addr) maybe_open_portal(
addr=addr,
name=name,
)
for addr in registry_addrs for addr in registry_addrs
) )
portals: list[Portal]
async with gather_contexts( async with gather_contexts(
mngrs=maybe_portals, mngrs=maybe_portals,
) as portals: ) as portals:
@ -254,31 +274,31 @@ async def find_actor(
@acm @acm
async def wait_for_actor( async def wait_for_actor(
name: str, name: str,
arbiter_sockaddr: tuple[str, int] | None = None,
registry_addr: tuple[str, int] | None = None, registry_addr: tuple[str, int] | None = None,
) -> AsyncGenerator[Portal, None]: ) -> AsyncGenerator[Portal, None]:
''' '''
Wait on an actor to register with the arbiter. Wait on at least one peer actor to register `name` with the
registrar, yield a `Portal to the first registree.
A portal to the first registered actor is returned.
''' '''
actor: Actor = current_actor() actor: Actor = current_actor()
if arbiter_sockaddr is not None: # optimization path, use any pre-existing peer channel
warnings.warn( maybe_peers: list[Channel]|None = get_peer_by_name(name)
'`tractor.wait_for_actor(arbiter_sockaddr=<foo>)` is deprecated.\n' if maybe_peers:
'Use `registry_addr: tuple` instead!', async with open_portal(maybe_peers[0]) as peer_portal:
DeprecationWarning, yield peer_portal
stacklevel=2, return
)
registry_addr: tuple[str, int] = arbiter_sockaddr
regaddr: tuple[str, int] = (
registry_addr
or
actor.reg_addrs[0]
)
# TODO: use `.trionics.gather_contexts()` like # TODO: use `.trionics.gather_contexts()` like
# above in `find_actor()` as well? # above in `find_actor()` as well?
reg_portal: Portal reg_portal: Portal
regaddr: tuple[str, int] = registry_addr or actor.reg_addrs[0]
async with get_registry(*regaddr) as reg_portal: async with get_registry(*regaddr) as reg_portal:
sockaddrs = await reg_portal.run_from_ns( sockaddrs = await reg_portal.run_from_ns(
'self', 'self',

View File

@ -20,7 +20,9 @@ Sub-process entry points.
""" """
from __future__ import annotations from __future__ import annotations
from functools import partial from functools import partial
# import textwrap import multiprocessing as mp
import os
import textwrap
from typing import ( from typing import (
Any, Any,
TYPE_CHECKING, TYPE_CHECKING,
@ -58,25 +60,27 @@ def _mp_main(
) -> None: ) -> None:
''' '''
The routine called *after fork* which invokes a fresh ``trio.run`` The routine called *after fork* which invokes a fresh `trio.run()`
''' '''
actor._forkserver_info = forkserver_info actor._forkserver_info = forkserver_info
from ._spawn import try_set_start_method from ._spawn import try_set_start_method
spawn_ctx = try_set_start_method(start_method) spawn_ctx: mp.context.BaseContext = try_set_start_method(start_method)
assert spawn_ctx
if actor.loglevel is not None: if actor.loglevel is not None:
log.info( log.info(
f"Setting loglevel for {actor.uid} to {actor.loglevel}") f'Setting loglevel for {actor.uid} to {actor.loglevel}'
)
get_console_log(actor.loglevel) get_console_log(actor.loglevel)
assert spawn_ctx # TODO: use scops headers like for `trio` below!
# (well after we libify it maybe..)
log.info( log.info(
f"Started new {spawn_ctx.current_process()} for {actor.uid}") f'Started new {spawn_ctx.current_process()} for {actor.uid}'
# f"parent_addr is {parent_addr}"
_state._current_actor = actor )
_state._current_actor: Actor = actor
log.debug(f"parent_addr is {parent_addr}")
trio_main = partial( trio_main = partial(
async_main, async_main,
actor=actor, actor=actor,
@ -93,7 +97,110 @@ def _mp_main(
pass # handle it the same way trio does? pass # handle it the same way trio does?
finally: finally:
log.info(f"Subactor {actor.uid} terminated") log.info(
f'`mp`-subactor {actor.uid} exited'
)
# TODO: move this func to some kinda `.devx._conc_lang.py` eventually
# as we work out our multi-domain state-flow-syntax!
def nest_from_op(
input_op: str,
#
# ?TODO? an idea for a syntax to the state of concurrent systems
# as a "3-domain" (execution, scope, storage) model and using
# a minimal ascii/utf-8 operator-set.
#
# try not to take any of this seriously yet XD
#
# > is a "play operator" indicating (CPU bound)
# exec/work/ops required at the "lowest level computing"
#
# execution primititves (tasks, threads, actors..) denote their
# lifetime with '(' and ')' since parentheses normally are used
# in many langs to denote function calls.
#
# starting = (
# >( opening/starting; beginning of the thread-of-exec (toe?)
# (> opened/started, (finished spawning toe)
# |_<Task: blah blah..> repr of toe, in py these look like <objs>
#
# >) closing/exiting/stopping,
# )> closed/exited/stopped,
# |_<Task: blah blah..>
# [OR <), )< ?? ]
#
# ending = )
# >c) cancelling to close/exit
# c)> cancelled (caused close), OR?
# |_<Actor: ..>
# OR maybe "<c)" which better indicates the cancel being
# "delivered/returned" / returned" to LHS?
#
# >x) erroring to eventuall exit
# x)> errored and terminated
# |_<Actor: ...>
#
# scopes: supers/nurseries, IPC-ctxs, sessions, perms, etc.
# >{ opening
# {> opened
# }> closed
# >} closing
#
# storage: like queues, shm-buffers, files, etc..
# >[ opening
# [> opened
# |_<FileObj: ..>
#
# >] closing
# ]> closed
# IPC ops: channels, transports, msging
# => req msg
# <= resp msg
# <=> 2-way streaming (of msgs)
# <- recv 1 msg
# -> send 1 msg
#
# TODO: still not sure on R/L-HS approach..?
# =>( send-req to exec start (task, actor, thread..)
# (<= recv-req to ^
#
# (<= recv-req ^
# <=( recv-resp opened remote exec primitive
# <=) recv-resp closed
#
# )<=c req to stop due to cancel
# c=>) req to stop due to cancel
#
# =>{ recv-req to open
# <={ send-status that it closed
tree_str: str,
# NOTE: so move back-from-the-left of the `input_op` by
# this amount.
back_from_op: int = 0,
) -> str:
'''
Depth-increment the input (presumably hierarchy/supervision)
input "tree string" below the provided `input_op` execution
operator, so injecting a `"\n|_{input_op}\n"`and indenting the
`tree_str` to nest content aligned with the ops last char.
'''
return (
f'{input_op}\n'
+
textwrap.indent(
tree_str,
prefix=(
len(input_op)
-
(back_from_op + 1)
) * ' ',
)
)
def _trio_main( def _trio_main(
@ -107,7 +214,6 @@ def _trio_main(
Entry point for a `trio_run_in_process` subactor. Entry point for a `trio_run_in_process` subactor.
''' '''
# __tracebackhide__: bool = True
_debug.hide_runtime_frames() _debug.hide_runtime_frames()
_state._current_actor = actor _state._current_actor = actor
@ -119,7 +225,6 @@ def _trio_main(
if actor.loglevel is not None: if actor.loglevel is not None:
get_console_log(actor.loglevel) get_console_log(actor.loglevel)
import os
actor_info: str = ( actor_info: str = (
f'|_{actor}\n' f'|_{actor}\n'
f' uid: {actor.uid}\n' f' uid: {actor.uid}\n'
@ -128,13 +233,24 @@ def _trio_main(
f' loglevel: {actor.loglevel}\n' f' loglevel: {actor.loglevel}\n'
) )
log.info( log.info(
'Started new trio subactor:\n' 'Starting new `trio` subactor:\n'
+ +
'>\n' # like a "started/play"-icon from super perspective nest_from_op(
+ input_op='>(', # see syntax ideas above
actor_info, tree_str=actor_info,
back_from_op=1,
)
) )
logmeth = log.info
exit_status: str = (
'Subactor exited\n'
+
nest_from_op(
input_op=')>', # like a "closed-to-play"-icon from super perspective
tree_str=actor_info,
back_from_op=1,
)
)
try: try:
if infect_asyncio: if infect_asyncio:
actor._infected_aio = True actor._infected_aio = True
@ -143,16 +259,28 @@ def _trio_main(
trio.run(trio_main) trio.run(trio_main)
except KeyboardInterrupt: except KeyboardInterrupt:
log.cancel( logmeth = log.cancel
'Actor received KBI\n' exit_status: str = (
'Actor received KBI (aka an OS-cancel)\n'
+ +
actor_info nest_from_op(
input_op='c)>', # closed due to cancel (see above)
tree_str=actor_info,
)
) )
except BaseException as err:
logmeth = log.error
exit_status: str = (
'Main actor task exited due to crash?\n'
+
nest_from_op(
input_op='x)>', # closed by error
tree_str=actor_info,
)
)
# NOTE since we raise a tb will already be shown on the
# console, thus we do NOT use `.exception()` above.
raise err
finally: finally:
log.info( logmeth(exit_status)
'Subactor terminated\n'
+
'x\n' # like a "crossed-out/killed" from super perspective
+
actor_info
)

View File

@ -906,8 +906,59 @@ class StreamOverrun(
''' '''
class TransportClosed(trio.ClosedResourceError): class TransportClosed(trio.BrokenResourceError):
"Underlying channel transport was closed prior to use" '''
IPC transport (protocol) connection was closed or broke and
indicates that the wrapping communication `Channel` can no longer
be used to send/receive msgs from the remote peer.
'''
def __init__(
self,
message: str,
loglevel: str = 'transport',
cause: BaseException|None = None,
raise_on_report: bool = False,
) -> None:
self.message: str = message
self._loglevel = loglevel
super().__init__(message)
if cause is not None:
self.__cause__ = cause
# flag to toggle whether the msg loop should raise
# the exc in its `TransportClosed` handler block.
self._raise_on_report = raise_on_report
def report_n_maybe_raise(
self,
message: str|None = None,
) -> None:
'''
Using the init-specified log level emit a logging report
for this error.
'''
message: str = message or self.message
# when a cause is set, slap it onto the log emission.
if cause := self.__cause__:
cause_tb_str: str = ''.join(
traceback.format_tb(cause.__traceback__)
)
message += (
f'{cause_tb_str}\n' # tb
f' {cause}\n' # exc repr
)
getattr(log, self._loglevel)(message)
# some errors we want to blow up from
# inside the RPC msg loop
if self._raise_on_report:
raise self from cause
class NoResult(RuntimeError): class NoResult(RuntimeError):
@ -922,6 +973,12 @@ class NoRuntime(RuntimeError):
"The root actor has not been initialized yet" "The root actor has not been initialized yet"
class MessagingError(Exception):
'''
IPC related msg (typing), transaction (ordering) or dialog
handling error.
'''
class AsyncioCancelled(Exception): class AsyncioCancelled(Exception):
''' '''
@ -929,12 +986,8 @@ class AsyncioCancelled(Exception):
for use with the ``to_asyncio`` module for use with the ``to_asyncio`` module
to be raised in the ``trio`` side task to be raised in the ``trio`` side task
''' NOTE: this should NOT inherit from `asyncio.CancelledError` or
tests should break!
class MessagingError(Exception):
'''
IPC related msg (typing), transaction (ordering) or dialog
handling error.
''' '''
@ -1324,7 +1377,9 @@ def _mk_recv_mte(
any_pld: Any = msgpack.decode(msg.pld) any_pld: Any = msgpack.decode(msg.pld)
message: str = ( message: str = (
f'invalid `{msg_type.__qualname__}` msg payload\n\n' f'invalid `{msg_type.__qualname__}` msg payload\n\n'
f'value: `{any_pld!r}` does not match type-spec: ' f'{any_pld!r}\n\n'
f'has type {type(any_pld)!r}\n\n'
f'and does not match type-spec '
f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`' f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`'
) )
bad_msg = msg bad_msg = msg

View File

@ -54,7 +54,7 @@ from tractor._exceptions import (
) )
from tractor.msg import ( from tractor.msg import (
_ctxvar_MsgCodec, _ctxvar_MsgCodec,
_codec, # _codec, XXX see `self._codec` sanity/debug checks
MsgCodec, MsgCodec,
types as msgtypes, types as msgtypes,
pretty_struct, pretty_struct,
@ -65,8 +65,18 @@ log = get_logger(__name__)
_is_windows = platform.system() == 'Windows' _is_windows = platform.system() == 'Windows'
def get_stream_addrs(stream: trio.SocketStream) -> tuple: def get_stream_addrs(
# should both be IP sockets stream: trio.SocketStream
) -> tuple[
tuple[str, int], # local
tuple[str, int], # remote
]:
'''
Return the `trio` streaming transport prot's socket-addrs for
both the local and remote sides as a pair.
'''
# rn, should both be IP sockets
lsockname = stream.socket.getsockname() lsockname = stream.socket.getsockname()
rsockname = stream.socket.getpeername() rsockname = stream.socket.getpeername()
return ( return (
@ -75,17 +85,22 @@ def get_stream_addrs(stream: trio.SocketStream) -> tuple:
) )
# TODO: this should be our `Union[*msgtypes.__spec__]` now right? # from tractor.msg.types import MsgType
MsgType = TypeVar("MsgType") # ?TODO? this should be our `Union[*msgtypes.__spec__]` alias now right..?
# => BLEH, except can't bc prots must inherit typevar or param-spec
# TODO: consider using a generic def and indexing with our eventual # vars..
# msg definition/types? MsgType = TypeVar('MsgType')
# - https://docs.python.org/3/library/typing.html#typing.Protocol
# - https://jcristharif.com/msgspec/usage.html#structs
# TODO: break up this mod into a subpkg so we can start adding new
# backends and move this type stuff into a dedicated file.. Bo
#
@runtime_checkable @runtime_checkable
class MsgTransport(Protocol[MsgType]): class MsgTransport(Protocol[MsgType]):
#
# ^-TODO-^ consider using a generic def and indexing with our
# eventual msg definition/types?
# - https://docs.python.org/3/library/typing.html#typing.Protocol
stream: trio.SocketStream stream: trio.SocketStream
drained: list[MsgType] drained: list[MsgType]
@ -120,9 +135,9 @@ class MsgTransport(Protocol[MsgType]):
... ...
# TODO: not sure why we have to inherit here, but it seems to be an # TODO: typing oddity.. not sure why we have to inherit here, but it
# issue with ``get_msg_transport()`` returning a ``Type[Protocol]``; # seems to be an issue with `get_msg_transport()` returning
# probably should make a `mypy` issue? # a `Type[Protocol]`; probably should make a `mypy` issue?
class MsgpackTCPStream(MsgTransport): class MsgpackTCPStream(MsgTransport):
''' '''
A ``trio.SocketStream`` delivering ``msgpack`` formatted data A ``trio.SocketStream`` delivering ``msgpack`` formatted data
@ -145,7 +160,7 @@ class MsgpackTCPStream(MsgTransport):
# https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types
# #
# TODO: define this as a `Codec` struct which can be # TODO: define this as a `Codec` struct which can be
# overriden dynamically by the application/runtime. # overriden dynamically by the application/runtime?
codec: tuple[ codec: tuple[
Callable[[Any], Any]|None, # coder Callable[[Any], Any]|None, # coder
Callable[[type, Any], Any]|None, # decoder Callable[[type, Any], Any]|None, # decoder
@ -160,7 +175,7 @@ class MsgpackTCPStream(MsgTransport):
self._laddr, self._raddr = get_stream_addrs(stream) self._laddr, self._raddr = get_stream_addrs(stream)
# create read loop instance # create read loop instance
self._agen = self._iter_packets() self._aiter_pkts = self._iter_packets()
self._send_lock = trio.StrictFIFOLock() self._send_lock = trio.StrictFIFOLock()
# public i guess? # public i guess?
@ -174,15 +189,12 @@ class MsgpackTCPStream(MsgTransport):
# allow for custom IPC msg interchange format # allow for custom IPC msg interchange format
# dynamic override Bo # dynamic override Bo
self._task = trio.lowlevel.current_task() self._task = trio.lowlevel.current_task()
self._codec: MsgCodec = (
codec # XXX for ctxvar debug only!
or # self._codec: MsgCodec = (
_codec._ctxvar_MsgCodec.get() # codec
) # or
# TODO: mask out before release? # _codec._ctxvar_MsgCodec.get()
# log.runtime(
# f'New {self} created with codec\n'
# f'codec: {self._codec}\n'
# ) # )
async def _iter_packets(self) -> AsyncGenerator[dict, None]: async def _iter_packets(self) -> AsyncGenerator[dict, None]:
@ -190,6 +202,11 @@ class MsgpackTCPStream(MsgTransport):
Yield `bytes`-blob decoded packets from the underlying TCP Yield `bytes`-blob decoded packets from the underlying TCP
stream using the current task's `MsgCodec`. stream using the current task's `MsgCodec`.
This is a streaming routine implemented as an async generator
func (which was the original design, but could be changed?)
and is allocated by a `.__call__()` inside `.__init__()` where
it is assigned to the `._aiter_pkts` attr.
''' '''
decodes_failed: int = 0 decodes_failed: int = 0
@ -204,16 +221,82 @@ class MsgpackTCPStream(MsgTransport):
# seem to be getting racy failures here on # seem to be getting racy failures here on
# arbiter/registry name subs.. # arbiter/registry name subs..
trio.BrokenResourceError, trio.BrokenResourceError,
):
raise TransportClosed(
f'transport {self} was already closed prior ro read'
)
) as trans_err:
loglevel = 'transport'
match trans_err:
# case (
# ConnectionResetError()
# ):
# loglevel = 'transport'
# peer actor (graceful??) TCP EOF but `tricycle`
# seems to raise a 0-bytes-read?
case ValueError() if (
'unclean EOF' in trans_err.args[0]
):
pass
# peer actor (task) prolly shutdown quickly due
# to cancellation
case trio.BrokenResourceError() if (
'Connection reset by peer' in trans_err.args[0]
):
pass
# unless the disconnect condition falls under "a
# normal operation breakage" we usualy console warn
# about it.
case _:
loglevel: str = 'warning'
raise TransportClosed(
message=(
f'IPC transport already closed by peer\n'
f'x)> {type(trans_err)}\n'
f' |_{self}\n'
),
loglevel=loglevel,
) from trans_err
# XXX definitely can happen if transport is closed
# manually by another `trio.lowlevel.Task` in the
# same actor; we use this in some simulated fault
# testing for ex, but generally should never happen
# under normal operation!
#
# NOTE: as such we always re-raise this error from the
# RPC msg loop!
except trio.ClosedResourceError as closure_err:
raise TransportClosed(
message=(
f'IPC transport already manually closed locally?\n'
f'x)> {type(closure_err)} \n'
f' |_{self}\n'
),
loglevel='error',
raise_on_report=(
closure_err.args[0] == 'another task closed this fd'
or
closure_err.args[0] in ['another task closed this fd']
),
) from closure_err
# graceful TCP EOF disconnect
if header == b'': if header == b'':
raise TransportClosed( raise TransportClosed(
f'transport {self} was already closed prior ro read' message=(
f'IPC transport already gracefully closed\n'
f')>\n'
f'|_{self}\n'
),
loglevel='transport',
# cause=??? # handy or no?
) )
size: int
size, = struct.unpack("<I", header) size, = struct.unpack("<I", header)
log.transport(f'received header {size}') # type: ignore log.transport(f'received header {size}') # type: ignore
@ -225,33 +308,20 @@ class MsgpackTCPStream(MsgTransport):
# the current `MsgCodec`. # the current `MsgCodec`.
codec: MsgCodec = _ctxvar_MsgCodec.get() codec: MsgCodec = _ctxvar_MsgCodec.get()
# TODO: mask out before release? # XXX for ctxvar debug only!
if self._codec.pld_spec != codec.pld_spec: # if self._codec.pld_spec != codec.pld_spec:
# assert ( # assert (
# task := trio.lowlevel.current_task() # task := trio.lowlevel.current_task()
# ) is not self._task # ) is not self._task
# self._task = task # self._task = task
self._codec = codec # self._codec = codec
log.runtime( # log.runtime(
f'Using new codec in {self}.recv()\n' # f'Using new codec in {self}.recv()\n'
f'codec: {self._codec}\n\n' # f'codec: {self._codec}\n\n'
f'msg_bytes: {msg_bytes}\n' # f'msg_bytes: {msg_bytes}\n'
) # )
yield codec.decode(msg_bytes) yield codec.decode(msg_bytes)
# TODO: remove, was only for orig draft impl
# testing.
#
# curr_codec: MsgCodec = _ctxvar_MsgCodec.get()
# obj = curr_codec.decode(msg_bytes)
# if (
# curr_codec is not
# _codec._def_msgspec_codec
# ):
# print(f'OBJ: {obj}\n')
#
# yield obj
# XXX NOTE: since the below error derives from # XXX NOTE: since the below error derives from
# `DecodeError` we need to catch is specially # `DecodeError` we need to catch is specially
# and always raise such that spec violations # and always raise such that spec violations
@ -295,7 +365,8 @@ class MsgpackTCPStream(MsgTransport):
msg: msgtypes.MsgType, msg: msgtypes.MsgType,
strict_types: bool = True, strict_types: bool = True,
# hide_tb: bool = False, hide_tb: bool = False,
) -> None: ) -> None:
''' '''
Send a msgpack encoded py-object-blob-as-msg over TCP. Send a msgpack encoded py-object-blob-as-msg over TCP.
@ -304,21 +375,24 @@ class MsgpackTCPStream(MsgTransport):
invalid msg type invalid msg type
''' '''
# __tracebackhide__: bool = hide_tb __tracebackhide__: bool = hide_tb
# XXX see `trio._sync.AsyncContextManagerMixin` for details
# on the `.acquire()`/`.release()` sequencing..
async with self._send_lock: async with self._send_lock:
# NOTE: lookup the `trio.Task.context`'s var for # NOTE: lookup the `trio.Task.context`'s var for
# the current `MsgCodec`. # the current `MsgCodec`.
codec: MsgCodec = _ctxvar_MsgCodec.get() codec: MsgCodec = _ctxvar_MsgCodec.get()
# TODO: mask out before release? # XXX for ctxvar debug only!
if self._codec.pld_spec != codec.pld_spec: # if self._codec.pld_spec != codec.pld_spec:
self._codec = codec # self._codec = codec
log.runtime( # log.runtime(
f'Using new codec in {self}.send()\n' # f'Using new codec in {self}.send()\n'
f'codec: {self._codec}\n\n' # f'codec: {self._codec}\n\n'
f'msg: {msg}\n' # f'msg: {msg}\n'
) # )
if type(msg) not in msgtypes.__msg_types__: if type(msg) not in msgtypes.__msg_types__:
if strict_types: if strict_types:
@ -352,6 +426,16 @@ class MsgpackTCPStream(MsgTransport):
size: bytes = struct.pack("<I", len(bytes_data)) size: bytes = struct.pack("<I", len(bytes_data))
return await self.stream.send_all(size + bytes_data) return await self.stream.send_all(size + bytes_data)
# ?TODO? does it help ever to dynamically show this
# frame?
# try:
# <the-above_code>
# except BaseException as _err:
# err = _err
# if not isinstance(err, MsgTypeError):
# __tracebackhide__: bool = False
# raise
@property @property
def laddr(self) -> tuple[str, int]: def laddr(self) -> tuple[str, int]:
return self._laddr return self._laddr
@ -361,7 +445,7 @@ class MsgpackTCPStream(MsgTransport):
return self._raddr return self._raddr
async def recv(self) -> Any: async def recv(self) -> Any:
return await self._agen.asend(None) return await self._aiter_pkts.asend(None)
async def drain(self) -> AsyncIterator[dict]: async def drain(self) -> AsyncIterator[dict]:
''' '''
@ -378,7 +462,7 @@ class MsgpackTCPStream(MsgTransport):
yield msg yield msg
def __aiter__(self): def __aiter__(self):
return self._agen return self._aiter_pkts
def connected(self) -> bool: def connected(self) -> bool:
return self.stream.socket.fileno() != -1 return self.stream.socket.fileno() != -1
@ -433,7 +517,7 @@ class Channel:
# set after handshake - always uid of far end # set after handshake - always uid of far end
self.uid: tuple[str, str]|None = None self.uid: tuple[str, str]|None = None
self._agen = self._aiter_recv() self._aiter_msgs = self._iter_msgs()
self._exc: Exception|None = None # set if far end actor errors self._exc: Exception|None = None # set if far end actor errors
self._closed: bool = False self._closed: bool = False
@ -497,8 +581,6 @@ class Channel:
) )
return self._transport return self._transport
# TODO: something simliar at the IPC-`Context`
# level so as to support
@cm @cm
def apply_codec( def apply_codec(
self, self,
@ -517,6 +599,7 @@ class Channel:
finally: finally:
self._transport.codec = orig self._transport.codec = orig
# TODO: do a .src/.dst: str for maddrs?
def __repr__(self) -> str: def __repr__(self) -> str:
if not self._transport: if not self._transport:
return '<Channel with inactive transport?>' return '<Channel with inactive transport?>'
@ -560,27 +643,43 @@ class Channel:
) )
return transport return transport
# TODO: something like,
# `pdbp.hideframe_on(errors=[MsgTypeError])`
# instead of the `try/except` hack we have rn..
# seems like a pretty useful thing to have in general
# along with being able to filter certain stack frame(s / sets)
# possibly based on the current log-level?
async def send( async def send(
self, self,
payload: Any, payload: Any,
# hide_tb: bool = False, hide_tb: bool = False,
) -> None: ) -> None:
''' '''
Send a coded msg-blob over the transport. Send a coded msg-blob over the transport.
''' '''
# __tracebackhide__: bool = hide_tb __tracebackhide__: bool = hide_tb
log.transport( try:
'=> send IPC msg:\n\n' log.transport(
f'{pformat(payload)}\n' '=> send IPC msg:\n\n'
) # type: ignore f'{pformat(payload)}\n'
assert self._transport )
await self._transport.send( # assert self._transport # but why typing?
payload, await self._transport.send(
# hide_tb=hide_tb, payload,
) hide_tb=hide_tb,
)
except BaseException as _err:
err = _err # bind for introspection
if not isinstance(_err, MsgTypeError):
# assert err
__tracebackhide__: bool = False
else:
assert err.cid
raise
async def recv(self) -> Any: async def recv(self) -> Any:
assert self._transport assert self._transport
@ -617,8 +716,11 @@ class Channel:
await self.aclose(*args) await self.aclose(*args)
def __aiter__(self): def __aiter__(self):
return self._agen return self._aiter_msgs
# ?TODO? run any reconnection sequence?
# -[ ] prolly should be impl-ed as deco-API?
#
# async def _reconnect(self) -> None: # async def _reconnect(self) -> None:
# """Handle connection failures by polling until a reconnect can be # """Handle connection failures by polling until a reconnect can be
# established. # established.
@ -636,7 +738,6 @@ class Channel:
# else: # else:
# log.transport("Stream connection re-established!") # log.transport("Stream connection re-established!")
# # TODO: run any reconnection sequence
# # on_recon = self._recon_seq # # on_recon = self._recon_seq
# # if on_recon: # # if on_recon:
# # await on_recon(self) # # await on_recon(self)
@ -650,11 +751,17 @@ class Channel:
# " for re-establishment") # " for re-establishment")
# await trio.sleep(1) # await trio.sleep(1)
async def _aiter_recv( async def _iter_msgs(
self self
) -> AsyncGenerator[Any, None]: ) -> AsyncGenerator[Any, None]:
''' '''
Async iterate items from underlying stream. Yield `MsgType` IPC msgs decoded and deliverd from
an underlying `MsgTransport` protocol.
This is a streaming routine alo implemented as an async-gen
func (same a `MsgTransport._iter_pkts()`) gets allocated by
a `.__call__()` inside `.__init__()` where it is assigned to
the `._aiter_msgs` attr.
''' '''
assert self._transport assert self._transport
@ -680,15 +787,6 @@ class Channel:
case _: case _:
yield msg yield msg
# TODO: if we were gonna do this it should be
# done up at the `MsgStream` layer!
#
# sent = yield item
# if sent is not None:
# # optimization, passing None through all the
# # time is pointless
# await self._transport.send(sent)
except trio.BrokenResourceError: except trio.BrokenResourceError:
# if not self._autorecon: # if not self._autorecon:

View File

@ -97,7 +97,7 @@ class Portal:
channel: Channel, channel: Channel,
) -> None: ) -> None:
self.chan = channel self._chan: Channel = channel
# during the portal's lifetime # during the portal's lifetime
self._final_result_pld: Any|None = None self._final_result_pld: Any|None = None
self._final_result_msg: PayloadMsg|None = None self._final_result_msg: PayloadMsg|None = None
@ -109,6 +109,10 @@ class Portal:
self._streams: set[MsgStream] = set() self._streams: set[MsgStream] = set()
self.actor: Actor = current_actor() self.actor: Actor = current_actor()
@property
def chan(self) -> Channel:
return self._chan
@property @property
def channel(self) -> Channel: def channel(self) -> Channel:
''' '''
@ -121,7 +125,8 @@ class Portal:
) )
return self.chan return self.chan
# TODO: factor this out into an `ActorNursery` wrapper # TODO: factor this out into a `.highlevel` API-wrapper that uses
# a single `.open_context()` call underneath.
async def _submit_for_result( async def _submit_for_result(
self, self,
ns: str, ns: str,
@ -141,13 +146,22 @@ class Portal:
portal=self, portal=self,
) )
# TODO: we should deprecate this API right? since if we remove
# `.run_in_actor()` (and instead move it to a `.highlevel`
# wrapper api (around a single `.open_context()` call) we don't
# really have any notion of a "main" remote task any more?
#
# @api_frame # @api_frame
async def result(self) -> Any: async def wait_for_result(
self,
hide_tb: bool = True,
) -> Any:
''' '''
Return the result(s) from the remote actor's "main" task. Return the final result delivered by a `Return`-msg from the
remote peer actor's "main" task's `return` statement.
''' '''
__tracebackhide__ = True __tracebackhide__: bool = hide_tb
# Check for non-rpc errors slapped on the # Check for non-rpc errors slapped on the
# channel for which we always raise # channel for which we always raise
exc = self.channel._exc exc = self.channel._exc
@ -182,6 +196,23 @@ class Portal:
return self._final_result_pld return self._final_result_pld
# TODO: factor this out into a `.highlevel` API-wrapper that uses
# a single `.open_context()` call underneath.
async def result(
self,
*args,
**kwargs,
) -> Any|Exception:
typname: str = type(self).__name__
log.warning(
f'`{typname}.result()` is DEPRECATED!\n'
f'Use `{typname}.wait_for_result()` instead!\n'
)
return await self.wait_for_result(
*args,
**kwargs,
)
async def _cancel_streams(self): async def _cancel_streams(self):
# terminate all locally running async generator # terminate all locally running async generator
# IPC calls # IPC calls
@ -232,14 +263,15 @@ class Portal:
return False return False
reminfo: str = ( reminfo: str = (
f'Portal.cancel_actor() => {self.channel.uid}\n' f'c)=> {self.channel.uid}\n'
f'|_{chan}\n' f' |_{chan}\n'
) )
log.cancel( log.cancel(
f'Requesting runtime cancel for peer\n\n' f'Requesting actor-runtime cancel for peer\n\n'
f'{reminfo}' f'{reminfo}'
) )
# XXX the one spot we set it?
self.channel._cancel_called: bool = True self.channel._cancel_called: bool = True
try: try:
# send cancel cmd - might not get response # send cancel cmd - might not get response
@ -279,6 +311,8 @@ class Portal:
) )
return False return False
# TODO: do we still need this for low level `Actor`-runtime
# method calls or can we also remove it?
async def run_from_ns( async def run_from_ns(
self, self,
namespace_path: str, namespace_path: str,
@ -316,6 +350,8 @@ class Portal:
expect_msg=Return, expect_msg=Return,
) )
# TODO: factor this out into a `.highlevel` API-wrapper that uses
# a single `.open_context()` call underneath.
async def run( async def run(
self, self,
func: str, func: str,
@ -370,6 +406,8 @@ class Portal:
expect_msg=Return, expect_msg=Return,
) )
# TODO: factor this out into a `.highlevel` API-wrapper that uses
# a single `.open_context()` call underneath.
@acm @acm
async def open_stream_from( async def open_stream_from(
self, self,

View File

@ -21,6 +21,7 @@ Root actor runtime ignition(s).
from contextlib import asynccontextmanager as acm from contextlib import asynccontextmanager as acm
from functools import partial from functools import partial
import importlib import importlib
import inspect
import logging import logging
import os import os
import signal import signal
@ -79,7 +80,7 @@ async def open_root_actor(
# enables the multi-process debugger support # enables the multi-process debugger support
debug_mode: bool = False, debug_mode: bool = False,
maybe_enable_greenback: bool = False, # `.pause_from_sync()/breakpoint()` support maybe_enable_greenback: bool = True, # `.pause_from_sync()/breakpoint()` support
enable_stack_on_sig: bool = False, enable_stack_on_sig: bool = False,
# internal logging # internal logging
@ -115,10 +116,16 @@ async def open_root_actor(
if ( if (
debug_mode debug_mode
and maybe_enable_greenback and maybe_enable_greenback
and await _debug.maybe_init_greenback( and (
raise_not_found=False, maybe_mod := await _debug.maybe_init_greenback(
raise_not_found=False,
)
) )
): ):
logger.info(
f'Found `greenback` installed @ {maybe_mod}\n'
'Enabling `tractor.pause_from_sync()` support!\n'
)
os.environ['PYTHONBREAKPOINT'] = ( os.environ['PYTHONBREAKPOINT'] = (
'tractor.devx._debug._sync_pause_from_builtin' 'tractor.devx._debug._sync_pause_from_builtin'
) )
@ -226,14 +233,8 @@ async def open_root_actor(
and and
enable_stack_on_sig enable_stack_on_sig
): ):
try: from .devx._stackscope import enable_stack_on_sig
logger.info('Enabling `stackscope` traces on SIGUSR1') enable_stack_on_sig()
from .devx import enable_stack_on_sig
enable_stack_on_sig()
except ImportError:
logger.warning(
'`stackscope` not installed for use in debug mode!'
)
# closed into below ping task-func # closed into below ping task-func
ponged_addrs: list[tuple[str, int]] = [] ponged_addrs: list[tuple[str, int]] = []
@ -264,7 +265,9 @@ async def open_root_actor(
except OSError: except OSError:
# TODO: make this a "discovery" log level? # TODO: make this a "discovery" log level?
logger.warning(f'No actor registry found @ {addr}') logger.info(
f'No actor registry found @ {addr}\n'
)
async with trio.open_nursery() as tn: async with trio.open_nursery() as tn:
for addr in registry_addrs: for addr in registry_addrs:
@ -278,7 +281,6 @@ async def open_root_actor(
# Create a new local root-actor instance which IS NOT THE # Create a new local root-actor instance which IS NOT THE
# REGISTRAR # REGISTRAR
if ponged_addrs: if ponged_addrs:
if ensure_registry: if ensure_registry:
raise RuntimeError( raise RuntimeError(
f'Failed to open `{name}`@{ponged_addrs}: ' f'Failed to open `{name}`@{ponged_addrs}: '
@ -365,23 +367,25 @@ async def open_root_actor(
) )
try: try:
yield actor yield actor
except ( except (
Exception, Exception,
BaseExceptionGroup, BaseExceptionGroup,
) as err: ) as err:
# XXX NOTE XXX see equiv note inside
import inspect # `._runtime.Actor._stream_handler()` where in the
# non-root or root-that-opened-this-mahually case we
# wait for the local actor-nursery to exit before
# exiting the transport channel handler.
entered: bool = await _debug._maybe_enter_pm( entered: bool = await _debug._maybe_enter_pm(
err, err,
api_frame=inspect.currentframe(), api_frame=inspect.currentframe(),
) )
if ( if (
not entered not entered
and not is_multi_cancelled(err) and
not is_multi_cancelled(err)
): ):
logger.exception('Root actor crashed:\n') logger.exception('Root actor crashed\n')
# ALWAYS re-raise any error bubbled up from the # ALWAYS re-raise any error bubbled up from the
# runtime! # runtime!

View File

@ -68,7 +68,7 @@ from .msg import (
MsgCodec, MsgCodec,
PayloadT, PayloadT,
NamespacePath, NamespacePath,
pretty_struct, # pretty_struct,
_ops as msgops, _ops as msgops,
) )
from tractor.msg.types import ( from tractor.msg.types import (
@ -89,6 +89,16 @@ if TYPE_CHECKING:
log = get_logger('tractor') log = get_logger('tractor')
# ?TODO? move to a `tractor.lowlevel._rpc` with the below
# func-type-cases implemented "on top of" `@context` defs:
# -[ ] std async func helper decorated with `@rpc_func`?
# -[ ] `Portal.open_stream_from()` with async-gens?
# |_ possibly a duplex form of this with a
# `sent_from_peer = yield send_to_peer` form, which would require
# syncing the send/recv side with possibly `.receive_nowait()`
# on each `yield`?
# -[ ] some kinda `@rpc_acm` maybe that does a fixture style with
# user only defining a single-`yield` generator-func?
async def _invoke_non_context( async def _invoke_non_context(
actor: Actor, actor: Actor,
cancel_scope: CancelScope, cancel_scope: CancelScope,
@ -108,8 +118,9 @@ async def _invoke_non_context(
] = trio.TASK_STATUS_IGNORED, ] = trio.TASK_STATUS_IGNORED,
): ):
__tracebackhide__: bool = True __tracebackhide__: bool = True
cs: CancelScope|None = None # ref when activated
# TODO: can we unify this with the `context=True` impl below? # ?TODO? can we unify this with the `context=True` impl below?
if inspect.isasyncgen(coro): if inspect.isasyncgen(coro):
await chan.send( await chan.send(
StartAck( StartAck(
@ -160,10 +171,6 @@ async def _invoke_non_context(
functype='asyncgen', functype='asyncgen',
) )
) )
# XXX: the async-func may spawn further tasks which push
# back values like an async-generator would but must
# manualy construct the response dict-packet-responses as
# above
with cancel_scope as cs: with cancel_scope as cs:
ctx._scope = cs ctx._scope = cs
task_status.started(ctx) task_status.started(ctx)
@ -175,15 +182,13 @@ async def _invoke_non_context(
await chan.send( await chan.send(
Stop(cid=cid) Stop(cid=cid)
) )
# simplest function/method request-response pattern
# XXX: in the most minimally used case, just a scheduled internal runtime
# call to `Actor._cancel_task()` from the ctx-peer task since we
# don't (yet) have a dedicated IPC msg.
# ------ - ------
else: else:
# regular async function/method
# XXX: possibly just a scheduled `Actor._cancel_task()`
# from a remote request to cancel some `Context`.
# ------ - ------
# TODO: ideally we unify this with the above `context=True`
# block such that for any remote invocation ftype, we
# always invoke the far end RPC task scheduling the same
# way: using the linked IPC context machinery.
failed_resp: bool = False failed_resp: bool = False
try: try:
ack = StartAck( ack = StartAck(
@ -354,8 +359,15 @@ async def _errors_relayed_via_ipc(
# channel. # channel.
task_status.started(err) task_status.started(err)
# always reraise KBIs so they propagate at the sys-process level. # always propagate KBIs at the sys-process level.
if isinstance(err, KeyboardInterrupt): if (
isinstance(err, KeyboardInterrupt)
# ?TODO? except when running in asyncio mode?
# |_ wut if you want to open a `@context` FROM an
# infected_aio task?
# and not actor.is_infected_aio()
):
raise raise
# RPC task bookeeping. # RPC task bookeeping.
@ -458,7 +470,6 @@ async def _invoke(
# tb: TracebackType = None # tb: TracebackType = None
cancel_scope = CancelScope() cancel_scope = CancelScope()
cs: CancelScope|None = None # ref when activated
ctx = actor.get_context( ctx = actor.get_context(
chan=chan, chan=chan,
cid=cid, cid=cid,
@ -607,6 +618,8 @@ async def _invoke(
# `@context` marked RPC function. # `@context` marked RPC function.
# - `._portal` is never set. # - `._portal` is never set.
try: try:
tn: trio.Nursery
rpc_ctx_cs: CancelScope
async with ( async with (
trio.open_nursery() as tn, trio.open_nursery() as tn,
msgops.maybe_limit_plds( msgops.maybe_limit_plds(
@ -616,7 +629,7 @@ async def _invoke(
), ),
): ):
ctx._scope_nursery = tn ctx._scope_nursery = tn
ctx._scope = tn.cancel_scope rpc_ctx_cs = ctx._scope = tn.cancel_scope
task_status.started(ctx) task_status.started(ctx)
# TODO: better `trionics` tooling: # TODO: better `trionics` tooling:
@ -642,7 +655,7 @@ async def _invoke(
# itself calls `ctx._maybe_cancel_and_set_remote_error()` # itself calls `ctx._maybe_cancel_and_set_remote_error()`
# which cancels the scope presuming the input error # which cancels the scope presuming the input error
# is not a `.cancel_acked` pleaser. # is not a `.cancel_acked` pleaser.
if ctx._scope.cancelled_caught: if rpc_ctx_cs.cancelled_caught:
our_uid: tuple = actor.uid our_uid: tuple = actor.uid
# first check for and raise any remote error # first check for and raise any remote error
@ -652,9 +665,7 @@ async def _invoke(
if re := ctx._remote_error: if re := ctx._remote_error:
ctx._maybe_raise_remote_err(re) ctx._maybe_raise_remote_err(re)
cs: CancelScope = ctx._scope if rpc_ctx_cs.cancel_called:
if cs.cancel_called:
canceller: tuple = ctx.canceller canceller: tuple = ctx.canceller
explain: str = f'{ctx.side!r}-side task was cancelled by ' explain: str = f'{ctx.side!r}-side task was cancelled by '
@ -680,9 +691,15 @@ async def _invoke(
elif canceller == ctx.chan.uid: elif canceller == ctx.chan.uid:
explain += f'its {ctx.peer_side!r}-side peer' explain += f'its {ctx.peer_side!r}-side peer'
else: elif canceller == our_uid:
explain += 'itself'
elif canceller:
explain += 'a remote peer' explain += 'a remote peer'
else:
explain += 'an unknown cause?'
explain += ( explain += (
add_div(message=explain) add_div(message=explain)
+ +
@ -911,7 +928,10 @@ async def process_messages(
f'IPC msg from peer\n' f'IPC msg from peer\n'
f'<= {chan.uid}\n\n' f'<= {chan.uid}\n\n'
# TODO: avoid fmting depending on loglevel for perf? # TODO: use of the pprinting of structs is
# FRAGILE and should prolly not be
#
# avoid fmting depending on loglevel for perf?
# -[ ] specifically `pretty_struct.pformat()` sub-call..? # -[ ] specifically `pretty_struct.pformat()` sub-call..?
# - how to only log-level-aware actually call this? # - how to only log-level-aware actually call this?
# -[ ] use `.msg.pretty_struct` here now instead! # -[ ] use `.msg.pretty_struct` here now instead!
@ -1177,7 +1197,7 @@ async def process_messages(
parent_chan=chan, parent_chan=chan,
) )
except TransportClosed: except TransportClosed as tc:
# channels "breaking" (for TCP streams by EOF or 104 # channels "breaking" (for TCP streams by EOF or 104
# connection-reset) is ok since we don't have a teardown # connection-reset) is ok since we don't have a teardown
# handshake for them (yet) and instead we simply bail out of # handshake for them (yet) and instead we simply bail out of
@ -1185,12 +1205,20 @@ async def process_messages(
# up.. # up..
# #
# TODO: maybe add a teardown handshake? and, # TODO: maybe add a teardown handshake? and,
# -[ ] don't show this msg if it's an ephemeral discovery ep call? # -[x] don't show this msg if it's an ephemeral discovery ep call?
# |_ see the below `.report_n_maybe_raise()` impl as well as
# tc-exc input details in `MsgpackTCPStream._iter_pkts()`
# for different read-failure cases.
# -[ ] figure out how this will break with other transports? # -[ ] figure out how this will break with other transports?
log.runtime( tc.report_n_maybe_raise(
f'IPC channel closed abruptly\n' message=(
f'<=x peer: {chan.uid}\n' f'peer IPC channel closed abruptly?\n\n'
f' |_{chan.raddr}\n' f'<=x {chan}\n'
f' |_{chan.raddr}\n\n'
)
+
tc.message
) )
# transport **WAS** disconnected # transport **WAS** disconnected
@ -1238,7 +1266,7 @@ async def process_messages(
'Exiting IPC msg loop with final msg\n\n' 'Exiting IPC msg loop with final msg\n\n'
f'<= peer: {chan.uid}\n' f'<= peer: {chan.uid}\n'
f' |_{chan}\n\n' f' |_{chan}\n\n'
f'{pretty_struct.pformat(msg)}' # f'{pretty_struct.pformat(msg)}'
) )
log.runtime(message) log.runtime(message)

View File

@ -59,6 +59,7 @@ import os
import warnings import warnings
import trio import trio
from trio._core import _run as trio_runtime
from trio import ( from trio import (
CancelScope, CancelScope,
Nursery, Nursery,
@ -66,10 +67,11 @@ from trio import (
) )
from tractor.msg import ( from tractor.msg import (
pretty_struct,
NamespacePath,
types as msgtypes,
MsgType, MsgType,
NamespacePath,
Stop,
pretty_struct,
types as msgtypes,
) )
from ._ipc import Channel from ._ipc import Channel
from ._context import ( from ._context import (
@ -79,6 +81,7 @@ from ._context import (
from .log import get_logger from .log import get_logger
from ._exceptions import ( from ._exceptions import (
ContextCancelled, ContextCancelled,
InternalError,
ModuleNotExposed, ModuleNotExposed,
MsgTypeError, MsgTypeError,
unpack_error, unpack_error,
@ -101,6 +104,7 @@ from ._rpc import (
if TYPE_CHECKING: if TYPE_CHECKING:
from ._supervise import ActorNursery from ._supervise import ActorNursery
from trio._channel import MemoryChannelState
log = get_logger('tractor') log = get_logger('tractor')
@ -114,25 +118,26 @@ class Actor:
''' '''
The fundamental "runtime" concurrency primitive. The fundamental "runtime" concurrency primitive.
An *actor* is the combination of a regular Python process executing An "actor" is the combination of a regular Python process
a ``trio`` task tree, communicating with other actors through executing a `trio.run()` task tree, communicating with other
"memory boundary portals" - which provide a native async API around "actors" through "memory boundary portals": `Portal`, which
IPC transport "channels" which themselves encapsulate various provide a high-level async API around IPC "channels" (`Channel`)
(swappable) network protocols. which themselves encapsulate various (swappable) network
transport protocols for sending msgs between said memory domains
(processes, hosts, non-GIL threads).
Each "actor" is `trio.run()` scheduled "runtime" composed of many
Each "actor" is ``trio.run()`` scheduled "runtime" composed of concurrent tasks in a single thread. The "runtime" tasks conduct
many concurrent tasks in a single thread. The "runtime" tasks a slew of low(er) level functions to make it possible for message
conduct a slew of low(er) level functions to make it possible passing between actors as well as the ability to create new
for message passing between actors as well as the ability to actors (aka new "runtimes" in new processes which are supervised
create new actors (aka new "runtimes" in new processes which via an "actor-nursery" construct). Each task which sends messages
are supervised via a nursery construct). Each task which sends to a task in a "peer" actor (not necessarily a parent-child,
messages to a task in a "peer" (not necessarily a parent-child,
depth hierarchy) is able to do so via an "address", which maps depth hierarchy) is able to do so via an "address", which maps
IPC connections across memory boundaries, and a task request id IPC connections across memory boundaries, and a task request id
which allows for per-actor tasks to send and receive messages which allows for per-actor tasks to send and receive messages to
to specific peer-actor tasks with which there is an ongoing specific peer-actor tasks with which there is an ongoing RPC/IPC
RPC/IPC dialog. dialog.
''' '''
# ugh, we need to get rid of this and replace with a "registry" sys # ugh, we need to get rid of this and replace with a "registry" sys
@ -229,17 +234,20 @@ class Actor:
# by the user (currently called the "arbiter") # by the user (currently called the "arbiter")
self._spawn_method: str = spawn_method self._spawn_method: str = spawn_method
self._peers: defaultdict = defaultdict(list) self._peers: defaultdict[
str, # uaid
list[Channel], # IPC conns from peer
] = defaultdict(list)
self._peer_connected: dict[tuple[str, str], trio.Event] = {} self._peer_connected: dict[tuple[str, str], trio.Event] = {}
self._no_more_peers = trio.Event() self._no_more_peers = trio.Event()
self._no_more_peers.set() self._no_more_peers.set()
# RPC state
self._ongoing_rpc_tasks = trio.Event() self._ongoing_rpc_tasks = trio.Event()
self._ongoing_rpc_tasks.set() self._ongoing_rpc_tasks.set()
# (chan, cid) -> (cancel_scope, func)
self._rpc_tasks: dict[ self._rpc_tasks: dict[
tuple[Channel, str], tuple[Channel, str], # (chan, cid)
tuple[Context, Callable, trio.Event] tuple[Context, Callable, trio.Event] # (ctx=>, fn(), done?)
] = {} ] = {}
# map {actor uids -> Context} # map {actor uids -> Context}
@ -316,7 +324,10 @@ class Actor:
event = self._peer_connected.setdefault(uid, trio.Event()) event = self._peer_connected.setdefault(uid, trio.Event())
await event.wait() await event.wait()
log.debug(f'{uid!r} successfully connected back to us') log.debug(f'{uid!r} successfully connected back to us')
return event, self._peers[uid][-1] return (
event,
self._peers[uid][-1],
)
def load_modules( def load_modules(
self, self,
@ -407,26 +418,11 @@ class Actor:
''' '''
self._no_more_peers = trio.Event() # unset by making new self._no_more_peers = trio.Event() # unset by making new
chan = Channel.from_stream(stream) chan = Channel.from_stream(stream)
their_uid: tuple[str, str]|None = chan.uid con_status: str = (
'New inbound IPC connection <=\n'
con_status: str = ''
# TODO: remove this branch since can never happen?
# NOTE: `.uid` is only set after first contact
if their_uid:
con_status = (
'IPC Re-connection from already known peer?\n'
)
else:
con_status = (
'New inbound IPC connection <=\n'
)
con_status += (
f'|_{chan}\n' f'|_{chan}\n'
# f' |_@{chan.raddr}\n\n'
# ^-TODO-^ remove since alfready in chan.__repr__()?
) )
# send/receive initial handshake response # send/receive initial handshake response
try: try:
uid: tuple|None = await self._do_handshake(chan) uid: tuple|None = await self._do_handshake(chan)
@ -438,10 +434,10 @@ class Actor:
TransportClosed, TransportClosed,
): ):
# XXX: This may propagate up from ``Channel._aiter_recv()`` # XXX: This may propagate up from `Channel._aiter_recv()`
# and ``MsgpackStream._inter_packets()`` on a read from the # and `MsgpackStream._inter_packets()` on a read from the
# stream particularly when the runtime is first starting up # stream particularly when the runtime is first starting up
# inside ``open_root_actor()`` where there is a check for # inside `open_root_actor()` where there is a check for
# a bound listener on the "arbiter" addr. the reset will be # a bound listener on the "arbiter" addr. the reset will be
# because the handshake was never meant took place. # because the handshake was never meant took place.
log.runtime( log.runtime(
@ -451,9 +447,22 @@ class Actor:
) )
return return
familiar: str = 'new-peer'
if _pre_chan := self._peers.get(uid):
familiar: str = 'pre-existing-peer'
uid_short: str = f'{uid[0]}[{uid[1][-6:]}]'
con_status += ( con_status += (
f' -> Handshake with actor `{uid[0]}[{uid[1][-6:]}]` complete\n' f' -> Handshake with {familiar} `{uid_short}` complete\n'
) )
if _pre_chan:
log.warning(
# con_status += (
# ^TODO^ swap once we minimize conn duplication
f' -> Wait, we already have IPC with `{uid_short}`??\n'
f' |_{_pre_chan}\n'
)
# IPC connection tracking for both peers and new children: # IPC connection tracking for both peers and new children:
# - if this is a new channel to a locally spawned # - if this is a new channel to a locally spawned
# sub-actor there will be a spawn wait even registered # sub-actor there will be a spawn wait even registered
@ -506,8 +515,9 @@ class Actor:
) )
except trio.Cancelled: except trio.Cancelled:
log.cancel( log.cancel(
'IPC transport msg loop was cancelled for \n' 'IPC transport msg loop was cancelled\n'
f'|_{chan}\n' f'c)>\n'
f' |_{chan}\n'
) )
raise raise
@ -544,8 +554,9 @@ class Actor:
): ):
log.cancel( log.cancel(
'Waiting on cancel request to peer\n' 'Waiting on cancel request to peer..\n'
f'`Portal.cancel_actor()` => {chan.uid}\n' f'c)=>\n'
f' |_{chan.uid}\n'
) )
# XXX: this is a soft wait on the channel (and its # XXX: this is a soft wait on the channel (and its
@ -642,12 +653,18 @@ class Actor:
# and # and
an_exit_cs.cancelled_caught an_exit_cs.cancelled_caught
): ):
log.warning( report: str = (
'Timed out waiting on local actor-nursery to exit?\n' 'Timed out waiting on local actor-nursery to exit?\n'
f'{local_nursery}\n' f'c)>\n'
f' |_{pformat(local_nursery._children)}\n' f' |_{local_nursery}\n'
) )
# await _debug.pause() if children := local_nursery._children:
# indent from above local-nurse repr
report += (
f' |_{pformat(children)}\n'
)
log.warning(report)
if disconnected: if disconnected:
# if the transport died and this actor is still # if the transport died and this actor is still
@ -819,14 +836,17 @@ class Actor:
# side, # side,
)] )]
except KeyError: except KeyError:
log.warning( report: str = (
'Ignoring invalid IPC ctx msg!\n\n' 'Ignoring invalid IPC ctx msg!\n\n'
f'<= sender: {uid}\n\n' f'<=? {uid}\n\n'
# XXX don't need right since it's always in msg? f' |_{pretty_struct.pformat(msg)}\n'
# f'=> cid: {cid}\n\n'
f'{pretty_struct.pformat(msg)}\n'
) )
match msg:
case Stop():
log.runtime(report)
case _:
log.warning(report)
return return
# if isinstance(msg, MsgTypeError): # if isinstance(msg, MsgTypeError):
@ -880,11 +900,15 @@ class Actor:
f'peer: {chan.uid}\n' f'peer: {chan.uid}\n'
f'cid:{cid}\n' f'cid:{cid}\n'
) )
ctx._allow_overruns = allow_overruns ctx._allow_overruns: bool = allow_overruns
# adjust buffer size if specified # adjust buffer size if specified
state = ctx._send_chan._state # type: ignore state: MemoryChannelState = ctx._send_chan._state # type: ignore
if msg_buffer_size and state.max_buffer_size != msg_buffer_size: if (
msg_buffer_size
and
state.max_buffer_size != msg_buffer_size
):
state.max_buffer_size = msg_buffer_size state.max_buffer_size = msg_buffer_size
except KeyError: except KeyError:
@ -1046,6 +1070,10 @@ class Actor:
# TODO: another `Struct` for rtvs.. # TODO: another `Struct` for rtvs..
rvs: dict[str, Any] = spawnspec._runtime_vars rvs: dict[str, Any] = spawnspec._runtime_vars
if rvs['_debug_mode']: if rvs['_debug_mode']:
from .devx import (
enable_stack_on_sig,
maybe_init_greenback,
)
try: try:
# TODO: maybe return some status msgs upward # TODO: maybe return some status msgs upward
# to that we can emit them in `con_status` # to that we can emit them in `con_status`
@ -1053,14 +1081,57 @@ class Actor:
log.devx( log.devx(
'Enabling `stackscope` traces on SIGUSR1' 'Enabling `stackscope` traces on SIGUSR1'
) )
from .devx import enable_stack_on_sig
enable_stack_on_sig() enable_stack_on_sig()
except ImportError: except ImportError:
log.warning( log.warning(
'`stackscope` not installed for use in debug mode!' '`stackscope` not installed for use in debug mode!'
) )
rvs['_is_root'] = False if rvs.get('use_greenback', False):
maybe_mod: ModuleType|None = await maybe_init_greenback()
if maybe_mod:
log.devx(
'Activated `greenback` '
'for `tractor.pause_from_sync()` support!'
)
else:
rvs['use_greenback'] = False
log.warning(
'`greenback` not installed for use in debug mode!\n'
'`tractor.pause_from_sync()` not available!'
)
# XXX ensure the "infected `asyncio` mode" setting
# passed down from our spawning parent is consistent
# with `trio`-runtime initialization:
# - during sub-proc boot, the entrypoint func
# (`._entry.<spawn_backend>_main()`) should set
# `._infected_aio = True` before calling
# `run_as_asyncio_guest()`,
# - the value of `infect_asyncio: bool = True` as
# passed to `ActorNursery.start_actor()` must be
# the same as `_runtime_vars['_is_infected_aio']`
if (
(aio_rtv := rvs['_is_infected_aio'])
!=
(aio_attr := self._infected_aio)
):
raise InternalError(
'Parent sent runtime-vars that mismatch for the '
'"infected `asyncio` mode" settings ?!?\n\n'
f'rvs["_is_infected_aio"] = {aio_rtv}\n'
f'self._infected_aio = {aio_attr}\n'
)
if aio_rtv:
assert trio_runtime.GLOBAL_RUN_CONTEXT.runner.is_guest
# ^TODO^ possibly add a `sniffio` or
# `trio` pub-API for `is_guest_mode()`?
rvs['_is_root'] = False # obvi XD
# update process-wide globals
_state._runtime_vars.update(rvs) _state._runtime_vars.update(rvs)
# XXX: ``msgspec`` doesn't support serializing tuples # XXX: ``msgspec`` doesn't support serializing tuples
@ -1211,8 +1282,9 @@ class Actor:
# TODO: just use the new `Context.repr_rpc: str` (and # TODO: just use the new `Context.repr_rpc: str` (and
# other) repr fields instead of doing this all manual.. # other) repr fields instead of doing this all manual..
msg: str = ( msg: str = (
f'Runtime cancel request from {requester_type}:\n\n' f'Actor-runtime cancel request from {requester_type}\n\n'
f'<= .cancel(): {requesting_uid}\n\n' f'<=c) {requesting_uid}\n'
f' |_{self}\n'
) )
# TODO: what happens here when we self-cancel tho? # TODO: what happens here when we self-cancel tho?
@ -1320,10 +1392,11 @@ class Actor:
return True return True
log.cancel( log.cancel(
'Cancel request for RPC task\n\n' 'Rxed cancel request for RPC task\n'
f'<= Actor._cancel_task(): {requesting_uid}\n\n' f'<=c) {requesting_uid}\n'
f'=> {ctx._task}\n' f' |_{ctx._task}\n'
f' |_ >> {ctx.repr_rpc}\n' f' >> {ctx.repr_rpc}\n'
# f'=> {ctx._task}\n'
# f' >> Actor._cancel_task() => {ctx._task}\n' # f' >> Actor._cancel_task() => {ctx._task}\n'
# f' |_ {ctx._task}\n\n' # f' |_ {ctx._task}\n\n'
@ -1439,17 +1512,17 @@ class Actor:
"IPC channel's " "IPC channel's "
) )
rent_chan_repr: str = ( rent_chan_repr: str = (
f' |_{parent_chan}\n\n' f' |_{parent_chan}\n\n'
if parent_chan if parent_chan
else '' else ''
) )
log.cancel( log.cancel(
f'Cancelling {descr} RPC tasks\n\n' f'Cancelling {descr} RPC tasks\n\n'
f'<= canceller: {req_uid}\n' f'<=c) {req_uid} [canceller]\n'
f'{rent_chan_repr}' f'{rent_chan_repr}'
f'=> cancellee: {self.uid}\n' f'c)=> {self.uid} [cancellee]\n'
f' |_{self}.cancel_rpc_tasks()\n' f' |_{self} [with {len(tasks)} tasks]\n'
f' |_tasks: {len(tasks)}\n' # f' |_tasks: {len(tasks)}\n'
# f'{tasks_str}' # f'{tasks_str}'
) )
for ( for (
@ -1518,7 +1591,7 @@ class Actor:
def accept_addr(self) -> tuple[str, int]: def accept_addr(self) -> tuple[str, int]:
''' '''
Primary address to which the IPC transport server is Primary address to which the IPC transport server is
bound. bound and listening for new connections.
''' '''
# throws OSError on failure # throws OSError on failure
@ -1535,6 +1608,7 @@ class Actor:
def get_chans( def get_chans(
self, self,
uid: tuple[str, str], uid: tuple[str, str],
) -> list[Channel]: ) -> list[Channel]:
''' '''
Return all IPC channels to the actor with provided `uid`. Return all IPC channels to the actor with provided `uid`.
@ -1717,8 +1791,8 @@ async def async_main(
# Register with the arbiter if we're told its addr # Register with the arbiter if we're told its addr
log.runtime( log.runtime(
f'Registering `{actor.name}` ->\n' f'Registering `{actor.name}` => {pformat(accept_addrs)}\n'
f'{pformat(accept_addrs)}' # ^-TODO-^ we should instead show the maddr here^^
) )
# TODO: ideally we don't fan out to all registrars # TODO: ideally we don't fan out to all registrars
@ -1776,57 +1850,90 @@ async def async_main(
# Blocks here as expected until the root nursery is # Blocks here as expected until the root nursery is
# killed (i.e. this actor is cancelled or signalled by the parent) # killed (i.e. this actor is cancelled or signalled by the parent)
except Exception as err: except Exception as internal_err:
log.runtime("Closing all actor lifetime contexts")
actor.lifetime_stack.close()
if not is_registered: if not is_registered:
err_report: str = (
'\n'
"Actor runtime (internally) failed BEFORE contacting the registry?\n"
f'registrars -> {actor.reg_addrs} ?!?!\n\n'
'^^^ THIS IS PROBABLY AN INTERNAL `tractor` BUG! ^^^\n\n'
'\t>> CALMLY CANCEL YOUR CHILDREN AND CALL YOUR PARENTS <<\n\n'
'\tIf this is a sub-actor hopefully its parent will keep running '
'and cancel/reap this sub-process..\n'
'(well, presuming this error was propagated upward)\n\n'
'\t---------------------------------------------\n'
'\tPLEASE REPORT THIS TRACEBACK IN A BUG REPORT @ ' # oneline
'https://github.com/goodboy/tractor/issues\n'
'\t---------------------------------------------\n'
)
# TODO: I guess we could try to connect back # TODO: I guess we could try to connect back
# to the parent through a channel and engage a debugger # to the parent through a channel and engage a debugger
# once we have that all working with std streams locking? # once we have that all working with std streams locking?
log.exception( log.exception(err_report)
f"Actor errored and failed to register with arbiter "
f"@ {actor.reg_addrs[0]}?")
log.error(
"\n\n\t^^^ THIS IS PROBABLY AN INTERNAL `tractor` BUG! ^^^\n\n"
"\t>> CALMLY CALL THE AUTHORITIES AND HIDE YOUR CHILDREN <<\n\n"
"\tIf this is a sub-actor hopefully its parent will keep running "
"correctly presuming this error was safely ignored..\n\n"
"\tPLEASE REPORT THIS TRACEBACK IN A BUG REPORT: "
"https://github.com/goodboy/tractor/issues\n"
)
if actor._parent_chan: if actor._parent_chan:
await try_ship_error_to_remote( await try_ship_error_to_remote(
actor._parent_chan, actor._parent_chan,
err, internal_err,
) )
# always! # always!
match err: match internal_err:
case ContextCancelled(): case ContextCancelled():
log.cancel( log.cancel(
f'Actor: {actor.uid} was task-context-cancelled with,\n' f'Actor: {actor.uid} was task-context-cancelled with,\n'
f'str(err)' f'str(internal_err)'
) )
case _: case _:
log.exception("Actor errored:") log.exception(
raise 'Main actor-runtime task errored\n'
f'<x)\n'
f' |_{actor}\n'
)
raise internal_err
finally: finally:
log.runtime( teardown_report: str = (
'Runtime nursery complete' 'Main actor-runtime task completed\n'
'-> Closing all actor lifetime contexts..'
) )
# tear down all lifetime contexts if not in guest mode
# XXX: should this just be in the entrypoint?
actor.lifetime_stack.close()
# TODO: we can't actually do this bc the debugger # ?TODO? should this be in `._entry`/`._root` mods instead?
# uses the _service_n to spawn the lock task, BUT, #
# in theory if we had the root nursery surround this finally # teardown any actor-lifetime-bound contexts
# block it might be actually possible to debug THIS ls: ExitStack = actor.lifetime_stack
# machinery in the same way as user task code? # only report if there are any registered
cbs: list[Callable] = [
repr(tup[1].__wrapped__)
for tup in ls._exit_callbacks
]
if cbs:
cbs_str: str = '\n'.join(cbs)
teardown_report += (
'-> Closing actor-lifetime-bound callbacks\n\n'
f'}}>\n'
f' |_{ls}\n'
f' |_{cbs_str}\n'
)
# XXX NOTE XXX this will cause an error which
# prevents any `infected_aio` actor from continuing
# and any callbacks in the `ls` here WILL NOT be
# called!!
# await _debug.pause(shield=True)
ls.close()
# XXX TODO but hard XXX
# we can't actually do this bc the debugger uses the
# _service_n to spawn the lock task, BUT, in theory if we had
# the root nursery surround this finally block it might be
# actually possible to debug THIS machinery in the same way
# as user task code?
#
# if actor.name == 'brokerd.ib': # if actor.name == 'brokerd.ib':
# with CancelScope(shield=True): # with CancelScope(shield=True):
# await _debug.breakpoint() # await _debug.breakpoint()
@ -1856,9 +1963,9 @@ async def async_main(
failed = True failed = True
if failed: if failed:
log.warning( teardown_report += (
f'Failed to unregister {actor.name} from ' f'-> Failed to unregister {actor.name} from '
f'registar @ {addr}' f'registar @ {addr}\n'
) )
# Ensure all peers (actors connected to us as clients) are finished # Ensure all peers (actors connected to us as clients) are finished
@ -1866,13 +1973,23 @@ async def async_main(
if any( if any(
chan.connected() for chan in chain(*actor._peers.values()) chan.connected() for chan in chain(*actor._peers.values())
): ):
log.runtime( teardown_report += (
f"Waiting for remaining peers {actor._peers} to clear") f'-> Waiting for remaining peers {actor._peers} to clear..\n'
)
log.runtime(teardown_report)
with CancelScope(shield=True): with CancelScope(shield=True):
await actor._no_more_peers.wait() await actor._no_more_peers.wait()
log.runtime("All peer channels are complete")
log.runtime("Runtime completed") teardown_report += (
'-> All peer channels are complete\n'
)
teardown_report += (
'Actor runtime exiting\n'
f'>)\n'
f'|_{actor}\n'
)
log.info(teardown_report)
# TODO: rename to `Registry` and move to `._discovery`! # TODO: rename to `Registry` and move to `._discovery`!

View File

@ -149,7 +149,7 @@ async def exhaust_portal(
# XXX: streams should never be reaped here since they should # XXX: streams should never be reaped here since they should
# always be established and shutdown using a context manager api # always be established and shutdown using a context manager api
final: Any = await portal.result() final: Any = await portal.wait_for_result()
except ( except (
Exception, Exception,
@ -223,8 +223,8 @@ async def cancel_on_completion(
async def hard_kill( async def hard_kill(
proc: trio.Process, proc: trio.Process,
terminate_after: int = 1.6,
terminate_after: int = 1.6,
# NOTE: for mucking with `.pause()`-ing inside the runtime # NOTE: for mucking with `.pause()`-ing inside the runtime
# whilst also hacking on it XD # whilst also hacking on it XD
# terminate_after: int = 99999, # terminate_after: int = 99999,
@ -246,8 +246,9 @@ async def hard_kill(
''' '''
log.cancel( log.cancel(
'Terminating sub-proc:\n' 'Terminating sub-proc\n'
f'|_{proc}\n' f'>x)\n'
f' |_{proc}\n'
) )
# NOTE: this timeout used to do nothing since we were shielding # NOTE: this timeout used to do nothing since we were shielding
# the ``.wait()`` inside ``new_proc()`` which will pretty much # the ``.wait()`` inside ``new_proc()`` which will pretty much
@ -293,8 +294,8 @@ async def hard_kill(
log.critical( log.critical(
# 'Well, the #ZOMBIE_LORD_IS_HERE# to collect\n' # 'Well, the #ZOMBIE_LORD_IS_HERE# to collect\n'
'#T-800 deployed to collect zombie B0\n' '#T-800 deployed to collect zombie B0\n'
f'|\n' f'>x)\n'
f'|_{proc}\n' f' |_{proc}\n'
) )
proc.kill() proc.kill()
@ -322,8 +323,9 @@ async def soft_kill(
uid: tuple[str, str] = portal.channel.uid uid: tuple[str, str] = portal.channel.uid
try: try:
log.cancel( log.cancel(
'Soft killing sub-actor via `Portal.cancel_actor()`\n' 'Soft killing sub-actor via portal request\n'
f'|_{proc}\n' f'c)> {portal.chan.uid}\n'
f' |_{proc}\n'
) )
# wait on sub-proc to signal termination # wait on sub-proc to signal termination
await wait_func(proc) await wait_func(proc)
@ -552,8 +554,9 @@ async def trio_proc(
# cancel result waiter that may have been spawned in # cancel result waiter that may have been spawned in
# tandem if not done already # tandem if not done already
log.cancel( log.cancel(
'Cancelling existing result waiter task for ' 'Cancelling portal result reaper task\n'
f'{subactor.uid}' f'>c)\n'
f' |_{subactor.uid}\n'
) )
nursery.cancel_scope.cancel() nursery.cancel_scope.cancel()
@ -562,7 +565,11 @@ async def trio_proc(
# allowed! Do this **after** cancellation/teardown to avoid # allowed! Do this **after** cancellation/teardown to avoid
# killing the process too early. # killing the process too early.
if proc: if proc:
log.cancel(f'Hard reap sequence starting for {subactor.uid}') log.cancel(
f'Hard reap sequence starting for subactor\n'
f'>x)\n'
f' |_{subactor}@{subactor.uid}\n'
)
with trio.CancelScope(shield=True): with trio.CancelScope(shield=True):
# don't clobber an ongoing pdb # don't clobber an ongoing pdb

View File

@ -44,7 +44,9 @@ _runtime_vars: dict[str, Any] = {
'_root_mailbox': (None, None), '_root_mailbox': (None, None),
'_registry_addrs': [], '_registry_addrs': [],
# for `breakpoint()` support '_is_infected_aio': False,
# for `tractor.pause_from_sync()` & `breakpoint()` support
'use_greenback': False, 'use_greenback': False,
} }
@ -70,7 +72,8 @@ def current_actor(
''' '''
if ( if (
err_on_no_runtime err_on_no_runtime
and _current_actor is None and
_current_actor is None
): ):
msg: str = 'No local actor has been initialized yet?\n' msg: str = 'No local actor has been initialized yet?\n'
from ._exceptions import NoRuntime from ._exceptions import NoRuntime

View File

@ -36,8 +36,8 @@ import warnings
import trio import trio
from ._exceptions import ( from ._exceptions import (
# _raise_from_no_key_in_msg,
ContextCancelled, ContextCancelled,
RemoteActorError,
) )
from .log import get_logger from .log import get_logger
from .trionics import ( from .trionics import (
@ -101,7 +101,7 @@ class MsgStream(trio.abc.Channel):
@property @property
def ctx(self) -> Context: def ctx(self) -> Context:
''' '''
This stream's IPC `Context` ref. A read-only ref to this stream's inter-actor-task `Context`.
''' '''
return self._ctx return self._ctx
@ -145,9 +145,8 @@ class MsgStream(trio.abc.Channel):
''' '''
__tracebackhide__: bool = hide_tb __tracebackhide__: bool = hide_tb
# NOTE: `trio.ReceiveChannel` implements # NOTE FYI: `trio.ReceiveChannel` implements EOC handling as
# EOC handling as follows (aka uses it # follows (aka uses it to gracefully exit async for loops):
# to gracefully exit async for loops):
# #
# async def __anext__(self) -> ReceiveType: # async def __anext__(self) -> ReceiveType:
# try: # try:
@ -165,48 +164,29 @@ class MsgStream(trio.abc.Channel):
src_err: Exception|None = None # orig tb src_err: Exception|None = None # orig tb
try: try:
ctx: Context = self._ctx ctx: Context = self._ctx
return await ctx._pld_rx.recv_pld(ipc=self) return await ctx._pld_rx.recv_pld(ipc=self)
# XXX: the stream terminates on either of: # XXX: the stream terminates on either of:
# - via `self._rx_chan.receive()` raising after manual closure # - `self._rx_chan.receive()` raising after manual closure
# by the rpc-runtime OR, # by the rpc-runtime,
# - via a received `{'stop': ...}` msg from remote side. # OR
# |_ NOTE: previously this was triggered by calling # - via a `Stop`-msg received from remote peer task.
# ``._rx_chan.aclose()`` on the send side of the channel inside # NOTE
# `Actor._deliver_ctx_payload()`, but now the 'stop' message handling # |_ previously this was triggered by calling
# has been put just above inside `_raise_from_no_key_in_msg()`. # ``._rx_chan.aclose()`` on the send side of the channel
except ( # inside `Actor._deliver_ctx_payload()`, but now the 'stop'
trio.EndOfChannel, # message handling gets delegated to `PldRFx.recv_pld()`
) as eoc: # internals.
src_err = eoc except trio.EndOfChannel as eoc:
# a graceful stream finished signal
self._eoc = eoc self._eoc = eoc
src_err = eoc
# TODO: Locally, we want to close this stream gracefully, by # a `ClosedResourceError` indicates that the internal feeder
# terminating any local consumers tasks deterministically. # memory receive channel was closed likely by the runtime
# Once we have broadcast support, we **don't** want to be # after the associated transport-channel disconnected or
# closing this stream and not flushing a final value to # broke.
# remaining (clone) consumers who may not have been
# scheduled to receive it yet.
# try:
# maybe_err_msg_or_res: dict = self._rx_chan.receive_nowait()
# if maybe_err_msg_or_res:
# log.warning(
# 'Discarding un-processed msg:\n'
# f'{maybe_err_msg_or_res}'
# )
# except trio.WouldBlock:
# # no queued msgs that might be another remote
# # error, so just raise the original EoC
# pass
# raise eoc
# a ``ClosedResourceError`` indicates that the internal
# feeder memory receive channel was closed likely by the
# runtime after the associated transport-channel
# disconnected or broke.
except trio.ClosedResourceError as cre: # by self._rx_chan.receive() except trio.ClosedResourceError as cre: # by self._rx_chan.receive()
src_err = cre src_err = cre
log.warning( log.warning(
@ -218,14 +198,15 @@ class MsgStream(trio.abc.Channel):
# terminated and signal this local iterator to stop # terminated and signal this local iterator to stop
drained: list[Exception|dict] = await self.aclose() drained: list[Exception|dict] = await self.aclose()
if drained: if drained:
# ?TODO? pass these to the `._ctx._drained_msgs: deque`
# and then iterate them as part of any `.wait_for_result()` call?
#
# from .devx import pause # from .devx import pause
# await pause() # await pause()
log.warning( log.warning(
'Drained context msgs during closure:\n' 'Drained context msgs during closure\n\n'
f'{drained}' f'{drained}'
) )
# TODO: pass these to the `._ctx._drained_msgs: deque`
# and then iterate them as part of any `.result()` call?
# NOTE XXX: if the context was cancelled or remote-errored # NOTE XXX: if the context was cancelled or remote-errored
# but we received the stream close msg first, we # but we received the stream close msg first, we
@ -238,28 +219,36 @@ class MsgStream(trio.abc.Channel):
from_src_exc=src_err, from_src_exc=src_err,
) )
# propagate any error but hide low-level frame details # propagate any error but hide low-level frame details from
# from the caller by default for debug noise reduction. # the caller by default for console/debug-REPL noise
# reduction.
if ( if (
hide_tb hide_tb
and (
# XXX NOTE XXX don't reraise on certain # XXX NOTE special conditions: don't reraise on
# stream-specific internal error types like, # certain stream-specific internal error types like,
# #
# - `trio.EoC` since we want to use the exact instance # - `trio.EoC` since we want to use the exact instance
# to ensure that it is the error that bubbles upward # to ensure that it is the error that bubbles upward
# for silent absorption by `Context.open_stream()`. # for silent absorption by `Context.open_stream()`.
and not self._eoc not self._eoc
# - `RemoteActorError` (or `ContextCancelled`) if it gets # - `RemoteActorError` (or subtypes like ctxc)
# raised from `_raise_from_no_key_in_msg()` since we # since we want to present the error as though it is
# want the same (as the above bullet) for any # "sourced" directly from this `.receive()` call and
# `.open_context()` block bubbled error raised by # generally NOT include the stack frames raised from
# any nearby ctx API remote-failures. # inside the `PldRx` and/or the transport stack
# and not isinstance(src_err, RemoteActorError) # layers.
or isinstance(src_err, RemoteActorError)
)
): ):
raise type(src_err)(*src_err.args) from src_err raise type(src_err)(*src_err.args) from src_err
else: else:
# for any non-graceful-EOC we want to NOT hide this frame
if not self._eoc:
__tracebackhide__: bool = False
raise src_err raise src_err
async def aclose(self) -> list[Exception|dict]: async def aclose(self) -> list[Exception|dict]:
@ -385,6 +374,8 @@ class MsgStream(trio.abc.Channel):
if not self._eoc: if not self._eoc:
message: str = ( message: str = (
f'Stream self-closed by {self._ctx.side!r}-side before EoC\n' f'Stream self-closed by {self._ctx.side!r}-side before EoC\n'
# } bc a stream is a "scope"/msging-phase inside an IPC
f'x}}>\n'
f'|_{self}\n' f'|_{self}\n'
) )
log.cancel(message) log.cancel(message)

View File

@ -80,6 +80,7 @@ class ActorNursery:
''' '''
def __init__( def __init__(
self, self,
# TODO: maybe def these as fields of a struct looking type?
actor: Actor, actor: Actor,
ria_nursery: trio.Nursery, ria_nursery: trio.Nursery,
da_nursery: trio.Nursery, da_nursery: trio.Nursery,
@ -88,8 +89,10 @@ class ActorNursery:
) -> None: ) -> None:
# self.supervisor = supervisor # TODO # self.supervisor = supervisor # TODO
self._actor: Actor = actor self._actor: Actor = actor
self._ria_nursery = ria_nursery
# TODO: rename to `._tn` for our conventional "task-nursery"
self._da_nursery = da_nursery self._da_nursery = da_nursery
self._children: dict[ self._children: dict[
tuple[str, str], tuple[str, str],
tuple[ tuple[
@ -98,15 +101,13 @@ class ActorNursery:
Portal | None, Portal | None,
] ]
] = {} ] = {}
# portals spawned with ``run_in_actor()`` are
# cancelled when their "main" result arrives
self._cancel_after_result_on_exit: set = set()
self.cancelled: bool = False self.cancelled: bool = False
self._join_procs = trio.Event() self._join_procs = trio.Event()
self._at_least_one_child_in_debug: bool = False self._at_least_one_child_in_debug: bool = False
self.errors = errors self.errors = errors
self.exited = trio.Event()
self._scope_error: BaseException|None = None self._scope_error: BaseException|None = None
self.exited = trio.Event()
# NOTE: when no explicit call is made to # NOTE: when no explicit call is made to
# `.open_root_actor()` by application code, # `.open_root_actor()` by application code,
@ -116,6 +117,13 @@ class ActorNursery:
# and syncing purposes to any actor opened nurseries. # and syncing purposes to any actor opened nurseries.
self._implicit_runtime_started: bool = False self._implicit_runtime_started: bool = False
# TODO: remove the `.run_in_actor()` API and thus this 2ndary
# nursery when that API get's moved outside this primitive!
self._ria_nursery = ria_nursery
# portals spawned with ``run_in_actor()`` are
# cancelled when their "main" result arrives
self._cancel_after_result_on_exit: set = set()
async def start_actor( async def start_actor(
self, self,
name: str, name: str,
@ -126,10 +134,14 @@ class ActorNursery:
rpc_module_paths: list[str]|None = None, rpc_module_paths: list[str]|None = None,
enable_modules: list[str]|None = None, enable_modules: list[str]|None = None,
loglevel: str|None = None, # set log level per subactor loglevel: str|None = None, # set log level per subactor
nursery: trio.Nursery|None = None,
debug_mode: bool|None = None, debug_mode: bool|None = None,
infect_asyncio: bool = False, infect_asyncio: bool = False,
# TODO: ideally we can rm this once we no longer have
# a `._ria_nursery` since the dependent APIs have been
# removed!
nursery: trio.Nursery|None = None,
) -> Portal: ) -> Portal:
''' '''
Start a (daemon) actor: an process that has no designated Start a (daemon) actor: an process that has no designated
@ -146,6 +158,7 @@ class ActorNursery:
# configure and pass runtime state # configure and pass runtime state
_rtv = _state._runtime_vars.copy() _rtv = _state._runtime_vars.copy()
_rtv['_is_root'] = False _rtv['_is_root'] = False
_rtv['_is_infected_aio'] = infect_asyncio
# allow setting debug policy per actor # allow setting debug policy per actor
if debug_mode is not None: if debug_mode is not None:
@ -200,6 +213,7 @@ class ActorNursery:
# |_ dynamic @context decoration on child side # |_ dynamic @context decoration on child side
# |_ implicit `Portal.open_context() as (ctx, first):` # |_ implicit `Portal.open_context() as (ctx, first):`
# and `return first` on parent side. # and `return first` on parent side.
# |_ mention how it's similar to `trio-parallel` API?
# -[ ] use @api_frame on the wrapper # -[ ] use @api_frame on the wrapper
async def run_in_actor( async def run_in_actor(
self, self,
@ -269,11 +283,14 @@ class ActorNursery:
) -> None: ) -> None:
''' '''
Cancel this nursery by instructing each subactor to cancel Cancel this actor-nursery by instructing each subactor's
itself and wait for all subactors to terminate. runtime to cancel and wait for all underlying sub-processes
to terminate.
If ``hard_killl`` is set to ``True`` then kill the processes If `hard_kill` is set then kill the processes directly using
directly without any far end graceful ``trio`` cancellation. the spawning-backend's API/OS-machinery without any attempt
at (graceful) `trio`-style cancellation using our
`Actor.cancel()`.
''' '''
__runtimeframe__: int = 1 # noqa __runtimeframe__: int = 1 # noqa
@ -357,11 +374,12 @@ class ActorNursery:
@acm @acm
async def _open_and_supervise_one_cancels_all_nursery( async def _open_and_supervise_one_cancels_all_nursery(
actor: Actor, actor: Actor,
tb_hide: bool = False,
) -> typing.AsyncGenerator[ActorNursery, None]: ) -> typing.AsyncGenerator[ActorNursery, None]:
# normally don't need to show user by default # normally don't need to show user by default
__tracebackhide__: bool = True __tracebackhide__: bool = tb_hide
outer_err: BaseException|None = None outer_err: BaseException|None = None
inner_err: BaseException|None = None inner_err: BaseException|None = None
@ -629,8 +647,12 @@ async def open_nursery(
f'|_{an}\n' f'|_{an}\n'
) )
# shutdown runtime if it was started
if implicit_runtime: if implicit_runtime:
# shutdown runtime if it was started and report noisly
# that we're did so.
msg += '=> Shutting down actor runtime <=\n' msg += '=> Shutting down actor runtime <=\n'
log.info(msg)
log.info(msg) else:
# keep noise low during std operation.
log.runtime(msg)

View File

@ -54,6 +54,25 @@ def examples_dir() -> pathlib.Path:
return repodir() / 'examples' return repodir() / 'examples'
def mk_cmd(
ex_name: str,
exs_subpath: str = 'debugging',
) -> str:
'''
Generate a shell command suitable to pass to ``pexpect.spawn()``.
'''
script_path: pathlib.Path = (
examples_dir()
/ exs_subpath
/ f'{ex_name}.py'
)
return ' '.join([
'python',
str(script_path)
])
@acm @acm
async def expect_ctxc( async def expect_ctxc(
yay: bool, yay: bool,

View File

@ -26,9 +26,10 @@ from ._debug import (
breakpoint as breakpoint, breakpoint as breakpoint,
pause as pause, pause as pause,
pause_from_sync as pause_from_sync, pause_from_sync as pause_from_sync,
shield_sigint_handler as shield_sigint_handler, sigint_shield as sigint_shield,
open_crash_handler as open_crash_handler, open_crash_handler as open_crash_handler,
maybe_open_crash_handler as maybe_open_crash_handler, maybe_open_crash_handler as maybe_open_crash_handler,
maybe_init_greenback as maybe_init_greenback,
post_mortem as post_mortem, post_mortem as post_mortem,
mk_pdb as mk_pdb, mk_pdb as mk_pdb,
) )

File diff suppressed because it is too large Load Diff

View File

@ -24,13 +24,24 @@ disjoint, parallel executing tasks in separate actors.
''' '''
from __future__ import annotations from __future__ import annotations
# from functools import partial
from threading import (
current_thread,
Thread,
RLock,
)
import multiprocessing as mp import multiprocessing as mp
from signal import ( from signal import (
signal, signal,
getsignal,
SIGUSR1, SIGUSR1,
) )
import traceback # import traceback
from typing import TYPE_CHECKING from types import ModuleType
from typing import (
Callable,
TYPE_CHECKING,
)
import trio import trio
from tractor import ( from tractor import (
@ -51,26 +62,45 @@ if TYPE_CHECKING:
@trio.lowlevel.disable_ki_protection @trio.lowlevel.disable_ki_protection
def dump_task_tree() -> None: def dump_task_tree() -> None:
import stackscope '''
from tractor.log import get_console_log Do a classic `stackscope.extract()` task-tree dump to console at
`.devx()` level.
'''
import stackscope
tree_str: str = str( tree_str: str = str(
stackscope.extract( stackscope.extract(
trio.lowlevel.current_root_task(), trio.lowlevel.current_root_task(),
recurse_child_tasks=True recurse_child_tasks=True
) )
) )
log = get_console_log(
name=__name__,
level='cancel',
)
actor: Actor = _state.current_actor() actor: Actor = _state.current_actor()
thr: Thread = current_thread()
log.devx( log.devx(
f'Dumping `stackscope` tree for actor\n' f'Dumping `stackscope` tree for actor\n'
f'{actor.name}: {actor}\n' f'{actor.uid}:\n'
f' |_{mp.current_process()}\n\n' f'|_{mp.current_process()}\n'
f' |_{thr}\n'
f' |_{actor}\n\n'
# start-of-trace-tree delimiter (mostly for testing)
'------ - ------\n'
'\n'
+
f'{tree_str}\n' f'{tree_str}\n'
+
# end-of-trace-tree delimiter (mostly for testing)
f'\n'
f'------ {actor.uid!r} ------\n'
) )
# TODO: can remove this right?
# -[ ] was original code from author
#
# print(
# 'DUMPING FROM PRINT\n'
# +
# content
# )
# import logging # import logging
# try: # try:
# with open("/dev/tty", "w") as tty: # with open("/dev/tty", "w") as tty:
@ -80,58 +110,130 @@ def dump_task_tree() -> None:
# "task_tree" # "task_tree"
# ).exception("Error printing task tree") # ).exception("Error printing task tree")
_handler_lock = RLock()
_tree_dumped: bool = False
def signal_handler(
def dump_tree_on_sig(
sig: int, sig: int,
frame: object, frame: object,
relay_to_subs: bool = True, relay_to_subs: bool = True,
) -> None: ) -> None:
try: global _tree_dumped, _handler_lock
trio.lowlevel.current_trio_token( with _handler_lock:
).run_sync_soon(dump_task_tree) if _tree_dumped:
except RuntimeError: log.warning(
# not in async context -- print a normal traceback 'Already dumped for this actor...??'
traceback.print_stack() )
return
_tree_dumped = True
# actor: Actor = _state.current_actor()
log.devx(
'Trying to dump `stackscope` tree..\n'
)
try:
dump_task_tree()
# await actor._service_n.start_soon(
# partial(
# trio.to_thread.run_sync,
# dump_task_tree,
# )
# )
# trio.lowlevel.current_trio_token().run_sync_soon(
# dump_task_tree
# )
except RuntimeError:
log.exception(
'Failed to dump `stackscope` tree..\n'
)
# not in async context -- print a normal traceback
# traceback.print_stack()
raise
except BaseException:
log.exception(
'Failed to dump `stackscope` tree..\n'
)
raise
log.devx(
'Supposedly we dumped just fine..?'
)
if not relay_to_subs: if not relay_to_subs:
return return
an: ActorNursery an: ActorNursery
for an in _state.current_actor()._actoruid2nursery.values(): for an in _state.current_actor()._actoruid2nursery.values():
subproc: ProcessType subproc: ProcessType
subactor: Actor subactor: Actor
for subactor, subproc, _ in an._children.values(): for subactor, subproc, _ in an._children.values():
log.devx( log.warning(
f'Relaying `SIGUSR1`[{sig}] to sub-actor\n' f'Relaying `SIGUSR1`[{sig}] to sub-actor\n'
f'{subactor}\n' f'{subactor}\n'
f' |_{subproc}\n' f' |_{subproc}\n'
) )
if isinstance(subproc, trio.Process): # bc of course stdlib can't have a std API.. XD
subproc.send_signal(sig) match subproc:
case trio.Process():
subproc.send_signal(sig)
elif isinstance(subproc, mp.Process): case mp.Process():
subproc._send_signal(sig) subproc._send_signal(sig)
def enable_stack_on_sig( def enable_stack_on_sig(
sig: int = SIGUSR1 sig: int = SIGUSR1,
) -> None: ) -> ModuleType:
''' '''
Enable `stackscope` tracing on reception of a signal; by Enable `stackscope` tracing on reception of a signal; by
default this is SIGUSR1. default this is SIGUSR1.
HOT TIP: a task/ctx-tree dump can be triggered from a shell with
fancy cmds.
For ex. from `bash` using `pgrep` and cmd-sustitution
(https://www.gnu.org/software/bash/manual/bash.html#Command-Substitution)
you could use:
>> kill -SIGUSR1 $(pgrep -f '<cmd>')
Or with with `xonsh` (which has diff capture-from-subproc syntax)
>> kill -SIGUSR1 @$(pgrep -f '<cmd>')
''' '''
try:
import stackscope
except ImportError:
log.warning(
'`stackscope` not installed for use in debug mode!'
)
return None
handler: Callable|int = getsignal(sig)
if handler is dump_tree_on_sig:
log.devx(
'A `SIGUSR1` handler already exists?\n'
f'|_ {handler!r}\n'
)
return
signal( signal(
sig, sig,
signal_handler, dump_tree_on_sig,
) )
# NOTE: not the above can be triggered from log.devx(
# a (xonsh) shell using: 'Enabling trace-trees on `SIGUSR1` '
# kill -SIGUSR1 @$(pgrep -f '<cmd>') 'since `stackscope` is installed @ \n'
# f'{stackscope!r}\n\n'
# for example if you were looking to trace a `pytest` run f'With `SIGUSR1` handler\n'
# kill -SIGUSR1 @$(pgrep -f 'pytest') f'|_{dump_tree_on_sig}\n'
)
return stackscope

View File

@ -54,11 +54,12 @@ LOG_FORMAT = (
DATE_FORMAT = '%b %d %H:%M:%S' DATE_FORMAT = '%b %d %H:%M:%S'
# FYI, ERROR is 40 # FYI, ERROR is 40
# TODO: use a `bidict` to avoid the :155 check?
CUSTOM_LEVELS: dict[str, int] = { CUSTOM_LEVELS: dict[str, int] = {
'TRANSPORT': 5, 'TRANSPORT': 5,
'RUNTIME': 15, 'RUNTIME': 15,
'DEVX': 17, 'DEVX': 17,
'CANCEL': 18, 'CANCEL': 22,
'PDB': 500, 'PDB': 500,
} }
STD_PALETTE = { STD_PALETTE = {
@ -147,6 +148,8 @@ class StackLevelAdapter(LoggerAdapter):
Delegate a log call to the underlying logger, after adding Delegate a log call to the underlying logger, after adding
contextual information from this adapter instance. contextual information from this adapter instance.
NOTE: all custom level methods (above) delegate to this!
''' '''
if self.isEnabledFor(level): if self.isEnabledFor(level):
stacklevel: int = 3 stacklevel: int = 3

View File

@ -374,7 +374,7 @@ class PldRx(Struct):
case _: case _:
src_err = InternalError( src_err = InternalError(
'Unknown IPC msg ??\n\n' 'Invalid IPC msg ??\n\n'
f'{msg}\n' f'{msg}\n'
) )
@ -499,7 +499,7 @@ async def maybe_limit_plds(
yield None yield None
return return
# sanity on scoping # sanity check on IPC scoping
curr_ctx: Context = current_ipc_ctx() curr_ctx: Context = current_ipc_ctx()
assert ctx is curr_ctx assert ctx is curr_ctx
@ -510,6 +510,8 @@ async def maybe_limit_plds(
) as msgdec: ) as msgdec:
yield msgdec yield msgdec
# when the applied spec is unwound/removed, the same IPC-ctx
# should still be in scope.
curr_ctx: Context = current_ipc_ctx() curr_ctx: Context = current_ipc_ctx()
assert ctx is curr_ctx assert ctx is curr_ctx
@ -525,16 +527,26 @@ async def drain_to_final_msg(
list[MsgType] list[MsgType]
]: ]:
''' '''
Drain IPC msgs delivered to the underlying IPC primitive's Drain IPC msgs delivered to the underlying IPC context's
rx-mem-chan (eg. `Context._rx_chan`) from the runtime in rx-mem-chan (i.e. from `Context._rx_chan`) in search for a final
search for a final result or error. `Return` or `Error` msg.
The motivation here is to ideally capture errors during ctxc Deliver the `Return` + preceding drained msgs (`list[MsgType]`)
conditions where a canc-request/or local error is sent but the as a pair unless an `Error` is found, in which unpack and raise
local task also excepts and enters the it.
`Portal.open_context().__aexit__()` block wherein we prefer to
capture and raise any remote error or ctxc-ack as part of the The motivation here is to always capture any remote error relayed
`ctx.result()` cleanup and teardown sequence. by the remote peer task during a ctxc condition.
For eg. a ctxc-request may be sent to the peer as part of the
local task's (request for) cancellation but then that same task
**also errors** before executing the teardown in the
`Portal.open_context().__aexit__()` block. In such error-on-exit
cases we want to always capture and raise any delivered remote
error (like an expected ctxc-ACK) as part of the final
`ctx.wait_for_result()` teardown sequence such that the
`Context.outcome` related state always reflect what transpired
even after ctx closure and the `.open_context()` block exit.
''' '''
__tracebackhide__: bool = hide_tb __tracebackhide__: bool = hide_tb
@ -572,22 +584,42 @@ async def drain_to_final_msg(
# |_from tractor.devx._debug import pause # |_from tractor.devx._debug import pause
# await pause() # await pause()
# NOTE: we get here if the far end was # NOTE: we get here if the far end was
# `ContextCancelled` in 2 cases: # `ContextCancelled` in 2 cases:
# 1. we requested the cancellation and thus # 1. we requested the cancellation and thus
# SHOULD NOT raise that far end error, # SHOULD NOT raise that far end error,
# 2. WE DID NOT REQUEST that cancel and thus # 2. WE DID NOT REQUEST that cancel and thus
# SHOULD RAISE HERE! # SHOULD RAISE HERE!
except trio.Cancelled as taskc: except trio.Cancelled as _taskc:
taskc: trio.Cancelled = _taskc
# report when the cancellation wasn't (ostensibly) due to
# RPC operation, some surrounding parent cancel-scope.
if not ctx._scope.cancel_called:
task: trio.lowlevel.Task = trio.lowlevel.current_task()
rent_n: trio.Nursery = task.parent_nursery
if (
(local_cs := rent_n.cancel_scope).cancel_called
):
log.cancel(
'RPC-ctx cancelled by local-parent scope during drain!\n\n'
f'c}}>\n'
f' |_{rent_n}\n'
f' |_.cancel_scope = {local_cs}\n'
f' |_>c}}\n'
f' |_{ctx.pformat(indent=" "*9)}'
# ^TODO, some (other) simpler repr here?
)
__tracebackhide__: bool = False
# CASE 2: mask the local cancelled-error(s) # CASE 2: mask the local cancelled-error(s)
# only when we are sure the remote error is # only when we are sure the remote error is
# the source cause of this local task's # the source cause of this local task's
# cancellation. # cancellation.
ctx.maybe_raise( ctx.maybe_raise(
# TODO: when use this/ hide_tb=hide_tb,
# from_src_exc=taskc, from_src_exc=taskc,
# ?TODO? when *should* we use this?
) )
# CASE 1: we DID request the cancel we simply # CASE 1: we DID request the cancel we simply
@ -659,7 +691,7 @@ async def drain_to_final_msg(
# Stop() # Stop()
case Stop(): case Stop():
pre_result_drained.append(msg) pre_result_drained.append(msg)
log.cancel( log.runtime( # normal/expected shutdown transaction
'Remote stream terminated due to "stop" msg:\n\n' 'Remote stream terminated due to "stop" msg:\n\n'
f'{pretty_struct.pformat(msg)}\n' f'{pretty_struct.pformat(msg)}\n'
) )
@ -719,13 +751,19 @@ async def drain_to_final_msg(
pre_result_drained.append(msg) pre_result_drained.append(msg)
# It's definitely an internal error if any other # It's definitely an internal error if any other
# msg type without a`'cid'` field arrives here! # msg type without a`'cid'` field arrives here!
report: str = (
f'Invalid or unknown msg type {type(msg)!r}!?\n'
)
if not msg.cid: if not msg.cid:
raise InternalError( report += (
'Unexpected cid-missing msg?\n\n' '\nWhich also has no `.cid` field?\n'
f'{msg}\n'
) )
raise RuntimeError('Unknown msg type: {msg}') raise MessagingError(
report
+
f'\n{msg}\n'
)
else: else:
log.cancel( log.cancel(

View File

@ -34,6 +34,9 @@ from pprint import (
saferepr, saferepr,
) )
from tractor.log import get_logger
log = get_logger()
# TODO: auto-gen type sig for input func both for # TODO: auto-gen type sig for input func both for
# type-msgs and logging of RPC tasks? # type-msgs and logging of RPC tasks?
# taken and modified from: # taken and modified from:
@ -143,7 +146,13 @@ def pformat(
else: # the `pprint` recursion-safe format: else: # the `pprint` recursion-safe format:
# https://docs.python.org/3.11/library/pprint.html#pprint.saferepr # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr
val_str: str = saferepr(v) try:
val_str: str = saferepr(v)
except Exception:
log.exception(
'Failed to `saferepr({type(struct)})` !?\n'
)
return _Struct.__repr__(struct)
# TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg!
obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n')
@ -194,12 +203,20 @@ class Struct(
return sin_props return sin_props
pformat = pformat pformat = pformat
# __repr__ = pformat
# __str__ = __repr__ = pformat # __str__ = __repr__ = pformat
# TODO: use a pprint.PrettyPrinter instance around ONLY rendering # TODO: use a pprint.PrettyPrinter instance around ONLY rendering
# inside a known tty? # inside a known tty?
# def __repr__(self) -> str: # def __repr__(self) -> str:
# ... # ...
__repr__ = pformat def __repr__(self) -> str:
try:
return pformat(self)
except Exception:
log.exception(
f'Failed to `pformat({type(self)})` !?\n'
)
return _Struct.__repr__(self)
def copy( def copy(
self, self,

View File

@ -18,11 +18,13 @@
Infection apis for ``asyncio`` loops running ``trio`` using guest mode. Infection apis for ``asyncio`` loops running ``trio`` using guest mode.
''' '''
from __future__ import annotations
import asyncio import asyncio
from asyncio.exceptions import CancelledError from asyncio.exceptions import CancelledError
from contextlib import asynccontextmanager as acm from contextlib import asynccontextmanager as acm
from dataclasses import dataclass from dataclasses import dataclass
import inspect import inspect
import traceback
from typing import ( from typing import (
Any, Any,
Callable, Callable,
@ -30,25 +32,30 @@ from typing import (
Awaitable, Awaitable,
) )
import trio import tractor
from outcome import Error from tractor._exceptions import AsyncioCancelled
from tractor.log import get_logger
from tractor._state import ( from tractor._state import (
current_actor,
debug_mode, debug_mode,
) )
from tractor.devx import _debug from tractor.devx import _debug
from tractor._exceptions import AsyncioCancelled from tractor.log import get_logger
from tractor.trionics._broadcast import ( from tractor.trionics._broadcast import (
broadcast_receiver, broadcast_receiver,
BroadcastReceiver, BroadcastReceiver,
) )
import trio
from outcome import (
Error,
Outcome,
)
log = get_logger(__name__) log = get_logger(__name__)
__all__ = ['run_task', 'run_as_asyncio_guest'] __all__ = [
'run_task',
'run_as_asyncio_guest',
]
@dataclass @dataclass
@ -152,16 +159,17 @@ def _run_asyncio_task(
*, *,
qsize: int = 1, qsize: int = 1,
provide_channels: bool = False, provide_channels: bool = False,
hide_tb: bool = False,
**kwargs, **kwargs,
) -> LinkedTaskChannel: ) -> LinkedTaskChannel:
''' '''
Run an ``asyncio`` async function or generator in a task, return Run an ``asyncio`` async function or generator in a task, return
or stream the result back to ``trio``. or stream the result back to the caller `trio.lowleve.Task`.
''' '''
__tracebackhide__ = True __tracebackhide__: bool = hide_tb
if not current_actor().is_infected_aio(): if not tractor.current_actor().is_infected_aio():
raise RuntimeError( raise RuntimeError(
"`infect_asyncio` mode is not enabled!?" "`infect_asyncio` mode is not enabled!?"
) )
@ -172,7 +180,6 @@ def _run_asyncio_task(
to_trio, from_aio = trio.open_memory_channel(qsize) # type: ignore to_trio, from_aio = trio.open_memory_channel(qsize) # type: ignore
args = tuple(inspect.getfullargspec(func).args) args = tuple(inspect.getfullargspec(func).args)
if getattr(func, '_tractor_steam_function', None): if getattr(func, '_tractor_steam_function', None):
# the assumption is that the target async routine accepts the # the assumption is that the target async routine accepts the
# send channel then it intends to yield more then one return # send channel then it intends to yield more then one return
@ -222,6 +229,7 @@ def _run_asyncio_task(
try: try:
result = await coro result = await coro
except BaseException as aio_err: except BaseException as aio_err:
chan._aio_err = aio_err
if isinstance(aio_err, CancelledError): if isinstance(aio_err, CancelledError):
log.runtime( log.runtime(
'`asyncio` task was cancelled..\n' '`asyncio` task was cancelled..\n'
@ -230,7 +238,6 @@ def _run_asyncio_task(
log.exception( log.exception(
'`asyncio` task errored\n' '`asyncio` task errored\n'
) )
chan._aio_err = aio_err
raise raise
else: else:
@ -266,10 +273,13 @@ def _run_asyncio_task(
aio_task_complete aio_task_complete
) )
) )
chan._aio_task = task chan._aio_task: asyncio.Task = task
# XXX TODO XXX get this actually workin.. XD # XXX TODO XXX get this actually workin.. XD
# maybe setup `greenback` for `asyncio`-side task REPLing # -[ ] we need logic to setup `greenback` for `asyncio`-side task
# REPLing.. which should normally be nearly the same as for
# `trio`?
# -[ ] add to a new `.devx._greenback.maybe_init_for_asyncio()`?
if ( if (
debug_mode() debug_mode()
and and
@ -282,31 +292,38 @@ def _run_asyncio_task(
def cancel_trio(task: asyncio.Task) -> None: def cancel_trio(task: asyncio.Task) -> None:
''' '''
Cancel the calling ``trio`` task on error. Cancel the calling `trio` task on error.
''' '''
nonlocal chan nonlocal chan
aio_err = chan._aio_err aio_err: BaseException|None = chan._aio_err
task_err: BaseException|None = None task_err: BaseException|None = None
# only to avoid ``asyncio`` complaining about uncaptured # only to avoid `asyncio` complaining about uncaptured
# task exceptions # task exceptions
try: try:
task.exception() res: Any = task.result()
except BaseException as terr: except BaseException as terr:
task_err = terr task_err: BaseException = terr
msg: str = ( msg: str = (
'Infected `asyncio` task {etype_str}\n' 'Infected `asyncio` task {etype_str}\n'
f'|_{task}\n'
) )
if isinstance(terr, CancelledError): if isinstance(terr, CancelledError):
msg += (
f'c)>\n'
f' |_{task}\n'
)
log.cancel( log.cancel(
msg.format(etype_str='cancelled') msg.format(etype_str='cancelled')
) )
else: else:
msg += (
f'x)>\n'
f' |_{task}\n'
)
log.exception( log.exception(
msg.format(etype_str='cancelled') msg.format(etype_str='errored')
) )
assert type(terr) is type(aio_err), ( assert type(terr) is type(aio_err), (
@ -326,29 +343,45 @@ def _run_asyncio_task(
if task_err is None: if task_err is None:
assert aio_err assert aio_err
aio_err.with_traceback(aio_err.__traceback__) # wait, wut?
# log.error( # aio_err.with_traceback(aio_err.__traceback__)
# 'infected task errorred'
# )
# TODO: show that the cancellation originated # TODO: show when cancellation originated
# from the ``trio`` side? right? # from each side more pedantically?
# elif type(aio_err) is CancelledError: # elif (
# type(aio_err) is CancelledError
# and # trio was the cause?
# cancel_scope.cancel_called
# ):
# log.cancel( # log.cancel(
# 'infected task was cancelled' # 'infected task was cancelled by `trio`-side'
# ) # )
# raise aio_err from task_err
# if cancel_scope.cancelled: # XXX: if not already, alway cancel the scope
# raise aio_err from err # on a task error in case the trio task is blocking on
# a checkpoint.
# XXX: alway cancel the scope on error
# in case the trio task is blocking
# on a checkpoint.
cancel_scope.cancel() cancel_scope.cancel()
# raise any ``asyncio`` side error. if (
task_err
and
aio_err is not task_err
):
raise aio_err from task_err
# raise any `asyncio` side error.
raise aio_err raise aio_err
log.info(
'`trio` received final result from {task}\n'
f'|_{res}\n'
)
# TODO: do we need this?
# if task_err:
# cancel_scope.cancel()
# raise task_err
task.add_done_callback(cancel_trio) task.add_done_callback(cancel_trio)
return chan return chan
@ -375,7 +408,9 @@ async def translate_aio_errors(
) -> None: ) -> None:
aio_err = chan._aio_err aio_err = chan._aio_err
if ( if (
aio_err is not None and aio_err is not None
and
# not isinstance(aio_err, CancelledError)
type(aio_err) != CancelledError type(aio_err) != CancelledError
): ):
# always raise from any captured asyncio error # always raise from any captured asyncio error
@ -407,13 +442,17 @@ async def translate_aio_errors(
): ):
aio_err = chan._aio_err aio_err = chan._aio_err
if ( if (
task.cancelled() and task.cancelled()
and
type(aio_err) is CancelledError type(aio_err) is CancelledError
): ):
# if an underlying ``asyncio.CancelledError`` triggered this # if an underlying `asyncio.CancelledError` triggered this
# channel close, raise our (non-``BaseException``) wrapper # channel close, raise our (non-``BaseException``) wrapper
# error: ``AsyncioCancelled`` from that source error. # error: ``AsyncioCancelled`` from that source error.
raise AsyncioCancelled from aio_err raise AsyncioCancelled(
f'Task cancelled\n'
f'|_{task}\n'
) from aio_err
else: else:
raise raise
@ -456,8 +495,8 @@ async def run_task(
) -> Any: ) -> Any:
''' '''
Run an ``asyncio`` async function or generator in a task, return Run an `asyncio` async function or generator in a task, return
or stream the result back to ``trio``. or stream the result back to `trio`.
''' '''
# simple async func # simple async func
@ -515,11 +554,124 @@ async def open_channel_from(
chan._to_trio.close() chan._to_trio.close()
def run_as_asyncio_guest( class AsyncioRuntimeTranslationError(RuntimeError):
'''
We failed to correctly relay runtime semantics and/or maintain SC
supervision rules cross-event-loop.
'''
def run_trio_task_in_future(
async_fn,
*args,
) -> asyncio.Future:
'''
Run an async-func as a `trio` task from an `asyncio.Task` wrapped
in a `asyncio.Future` which is returned to the caller.
Another astounding feat by the great @oremanj !!
Bo
'''
result_future = asyncio.Future()
cancel_scope = trio.CancelScope()
finished: bool = False
# monkey-patch the future's `.cancel()` meth to
# allow cancellation relay to `trio`-task.
cancel_message: str|None = None
orig_cancel = result_future.cancel
def wrapped_cancel(
msg: str|None = None,
):
nonlocal cancel_message
if finished:
# We're being called back after the task completed
if msg is not None:
return orig_cancel(msg)
elif cancel_message is not None:
return orig_cancel(cancel_message)
else:
return orig_cancel()
if result_future.done():
return False
# Forward cancellation to the Trio task, don't mark
# future as cancelled until it completes
cancel_message = msg
cancel_scope.cancel()
return True
result_future.cancel = wrapped_cancel
async def trio_task() -> None:
nonlocal finished
try:
with cancel_scope:
try:
# TODO: type this with new tech in 3.13
result: Any = await async_fn(*args)
finally:
finished = True
# Propagate result or cancellation to the Future
if cancel_scope.cancelled_caught:
result_future.cancel()
elif not result_future.cancelled():
result_future.set_result(result)
except BaseException as exc:
# the result future gets all the non-Cancelled
# exceptions. Any Cancelled need to keep propagating
# out of this stack frame in order to reach the cancel
# scope for which they're intended.
cancelled: BaseException|None
rest: BaseException|None
if isinstance(exc, BaseExceptionGroup):
cancelled, rest = exc.split(trio.Cancelled)
elif isinstance(exc, trio.Cancelled):
cancelled, rest = exc, None
else:
cancelled, rest = None, exc
if not result_future.cancelled():
if rest:
result_future.set_exception(rest)
else:
result_future.cancel()
if cancelled:
raise cancelled
trio.lowlevel.spawn_system_task(
trio_task,
name=async_fn,
)
return result_future
def run_as_asyncio_guest(
trio_main: Callable, trio_main: Callable,
# ^-NOTE-^ when spawned with `infected_aio=True` this func is
# normally `Actor._async_main()` as is passed by some boostrap
# entrypoint like `._entry._trio_main()`.
_sigint_loop_pump_delay: float = 0,
) -> None: ) -> None:
# ^-TODO-^ technically whatever `trio_main` returns.. we should
# try to use func-typevar-params at leaast by 3.13!
# -[ ] https://typing.readthedocs.io/en/latest/spec/callables.html#callback-protocols
# -[ ] https://peps.python.org/pep-0646/#using-type-variable-tuples-in-functions
# -[ ] https://typing.readthedocs.io/en/latest/spec/callables.html#unpack-for-keyword-arguments
# -[ ] https://peps.python.org/pep-0718/
''' '''
Entry for an "infected ``asyncio`` actor". Entry for an "infected ``asyncio`` actor".
@ -545,51 +697,213 @@ def run_as_asyncio_guest(
# :) # :)
async def aio_main(trio_main): async def aio_main(trio_main):
'''
Main `asyncio.Task` which calls
`trio.lowlevel.start_guest_run()` to "infect" the `asyncio`
event-loop by embedding the `trio` scheduler allowing us to
boot the `tractor` runtime and connect back to our parent.
'''
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
trio_done_fut = asyncio.Future() trio_done_fute = asyncio.Future()
startup_msg: str = (
'Starting `asyncio` guest-loop-run\n'
'-> got running loop\n'
'-> built a `trio`-done future\n'
)
if debug_mode(): # TODO: shoudn't this be done in the guest-run trio task?
# XXX make it obvi we know this isn't supported yet! # if debug_mode():
log.error( # # XXX make it obvi we know this isn't supported yet!
'Attempting to enter unsupported `greenback` init ' # log.error(
'from `asyncio` task..' # 'Attempting to enter unsupported `greenback` init '
) # 'from `asyncio` task..'
await _debug.maybe_init_greenback( # )
force_reload=True, # await _debug.maybe_init_greenback(
) # force_reload=True,
# )
def trio_done_callback(main_outcome): def trio_done_callback(main_outcome):
log.runtime(
f'`trio` guest-run finishing with outcome\n'
f'>) {main_outcome}\n'
f'|_{trio_done_fute}\n'
)
if isinstance(main_outcome, Error): if isinstance(main_outcome, Error):
error = main_outcome.error error: BaseException = main_outcome.error
trio_done_fut.set_exception(error)
# TODO: explicit asyncio tb? # show an dedicated `asyncio`-side tb from the error
# traceback.print_exception(error) tb_str: str = ''.join(traceback.format_exception(error))
log.exception(
# XXX: do we need this? 'Guest-run errored!?\n\n'
# actor.cancel_soon() f'{main_outcome}\n'
f'{error}\n\n'
f'{tb_str}\n'
)
trio_done_fute.set_exception(error)
# raise inline
main_outcome.unwrap() main_outcome.unwrap()
else: else:
trio_done_fut.set_result(main_outcome) trio_done_fute.set_result(main_outcome)
log.runtime(f"trio_main finished: {main_outcome!r}")
log.info(
f'`trio` guest-run finished with outcome\n'
f')>\n'
f'|_{trio_done_fute}\n'
)
startup_msg += (
f'-> created {trio_done_callback!r}\n'
f'-> scheduling `trio_main`: {trio_main!r}\n'
)
# start the infection: run trio on the asyncio loop in "guest mode" # start the infection: run trio on the asyncio loop in "guest mode"
log.runtime( log.runtime(
'Infecting `asyncio`-process with a `trio` guest-run of\n\n' f'{startup_msg}\n\n'
f'{trio_main!r}\n\n' +
'Infecting `asyncio`-process with a `trio` guest-run!\n'
f'{trio_done_callback}\n'
) )
trio.lowlevel.start_guest_run( trio.lowlevel.start_guest_run(
trio_main, trio_main,
run_sync_soon_threadsafe=loop.call_soon_threadsafe, run_sync_soon_threadsafe=loop.call_soon_threadsafe,
done_callback=trio_done_callback, done_callback=trio_done_callback,
) )
# NOTE `.unwrap()` will raise on error fute_err: BaseException|None = None
return (await trio_done_fut).unwrap() try:
out: Outcome = await asyncio.shield(trio_done_fute)
# NOTE will raise (via `Error.unwrap()`) from any
# exception packed into the guest-run's `main_outcome`.
return out.unwrap()
except (
# XXX special SIGINT-handling is required since
# `asyncio.shield()`-ing seems to NOT handle that case as
# per recent changes in 3.11:
# https://docs.python.org/3/library/asyncio-runner.html#handling-keyboard-interruption
#
# NOTE: further, apparently ONLY need to handle this
# special SIGINT case since all other `asyncio`-side
# errors can be processed via our `chan._aio_err`
# relaying (right?); SIGINT seems to be totally diff
# error path in `asyncio`'s runtime..?
asyncio.CancelledError,
) as _fute_err:
fute_err = _fute_err
err_message: str = (
'main `asyncio` task '
)
if isinstance(fute_err, asyncio.CancelledError):
err_message += 'was cancelled!\n'
else:
err_message += f'errored with {out.error!r}\n'
actor: tractor.Actor = tractor.current_actor()
log.exception(
err_message
+
'Cancelling `trio`-side `tractor`-runtime..\n'
f'c)>\n'
f' |_{actor}.cancel_soon()\n'
)
# XXX WARNING XXX the next LOCs are super important, since
# without them, we can get guest-run abandonment cases
# where `asyncio` will not schedule or wait on the `trio`
# guest-run task before final shutdown! This is
# particularly true if the `trio` side has tasks doing
# shielded work when a SIGINT condition occurs.
#
# We now have the
# `test_infected_asyncio.test_sigint_closes_lifetime_stack()`
# suite to ensure we do not suffer this issues
# (hopefully) ever again.
#
# The original abandonment issue surfaced as 2 different
# race-condition dependent types scenarios all to do with
# `asyncio` handling SIGINT from the system:
#
# - "silent-abandon" (WORST CASE):
# `asyncio` abandons the `trio` guest-run task silently
# and no `trio`-guest-run or `tractor`-actor-runtime
# teardown happens whatsoever..
#
# - "loud-abandon" (BEST-ish CASE):
# the guest run get's abaondoned "loudly" with `trio`
# reporting a console traceback and further tbs of all
# the (failed) GC-triggered shutdown routines which
# thankfully does get dumped to console..
#
# The abandonment is most easily reproduced if the `trio`
# side has tasks doing shielded work where those tasks
# ignore the normal `Cancelled` condition and continue to
# run, but obviously `asyncio` isn't aware of this and at
# some point bails on the guest-run unless we take manual
# intervention..
#
# To repeat, *WITHOUT THIS* stuff below the guest-run can
# get race-conditionally abandoned!!
#
# XXX SOLUTION XXX
# ------ - ------
# XXX FIRST PART:
# ------ - ------
# the obvious fix to the "silent-abandon" case is to
# explicitly cancel the actor runtime such that no
# runtime tasks are even left unaware that the guest-run
# should be terminated due to OS cancellation.
#
actor.cancel_soon()
# ------ - ------
# XXX SECOND PART:
# ------ - ------
# Pump the `asyncio` event-loop to allow
# `trio`-side to `trio`-guest-run to complete and
# teardown !!
#
# oh `asyncio`, how i don't miss you at all XD
while not trio_done_fute.done():
log.runtime(
'Waiting on main guest-run `asyncio` task to complete..\n'
f'|_trio_done_fut: {trio_done_fute}\n'
)
await asyncio.sleep(_sigint_loop_pump_delay)
# XXX is there any alt API/approach like the internal
# call below but that doesn't block indefinitely..?
# loop._run_once()
try:
return trio_done_fute.result()
except asyncio.exceptions.InvalidStateError as state_err:
# XXX be super dupere noisy about abandonment issues!
aio_task: asyncio.Task = asyncio.current_task()
message: str = (
'The `asyncio`-side task likely exited before the '
'`trio`-side guest-run completed!\n\n'
)
if fute_err:
message += (
f'The main {aio_task}\n'
f'STOPPED due to {type(fute_err)}\n\n'
)
message += (
f'Likely something inside our guest-run-as-task impl is '
f'not effectively waiting on the `trio`-side to complete ?!\n'
f'This code -> {aio_main!r}\n\n'
'Below you will likely see a '
'"RuntimeWarning: Trio guest run got abandoned.." !!\n'
)
raise AsyncioRuntimeTranslationError(message) from state_err
# might as well if it's installed. # might as well if it's installed.
try: try:
@ -597,6 +911,8 @@ def run_as_asyncio_guest(
loop = uvloop.new_event_loop() loop = uvloop.new_event_loop()
asyncio.set_event_loop(loop) asyncio.set_event_loop(loop)
except ImportError: except ImportError:
pass log.runtime('`uvloop` not available..')
return asyncio.run(aio_main(trio_main)) return asyncio.run(
aio_main(trio_main),
)

View File

@ -156,11 +156,12 @@ class BroadcastState(Struct):
class BroadcastReceiver(ReceiveChannel): class BroadcastReceiver(ReceiveChannel):
''' '''
A memory receive channel broadcaster which is non-lossy for the A memory receive channel broadcaster which is non-lossy for
fastest consumer. the fastest consumer.
Additional consumer tasks can receive all produced values by registering Additional consumer tasks can receive all produced values by
with ``.subscribe()`` and receiving from the new instance it delivers. registering with ``.subscribe()`` and receiving from the new
instance it delivers.
''' '''
def __init__( def __init__(

View File

@ -18,8 +18,12 @@
Async context manager primitives with hard ``trio``-aware semantics Async context manager primitives with hard ``trio``-aware semantics
''' '''
from contextlib import asynccontextmanager as acm from __future__ import annotations
from contextlib import (
asynccontextmanager as acm,
)
import inspect import inspect
from types import ModuleType
from typing import ( from typing import (
Any, Any,
AsyncContextManager, AsyncContextManager,
@ -30,13 +34,16 @@ from typing import (
Optional, Optional,
Sequence, Sequence,
TypeVar, TypeVar,
TYPE_CHECKING,
) )
import trio import trio
from tractor._state import current_actor from tractor._state import current_actor
from tractor.log import get_logger from tractor.log import get_logger
if TYPE_CHECKING:
from tractor import ActorNursery
log = get_logger(__name__) log = get_logger(__name__)
@ -46,8 +53,10 @@ T = TypeVar("T")
@acm @acm
async def maybe_open_nursery( async def maybe_open_nursery(
nursery: trio.Nursery | None = None, nursery: trio.Nursery|ActorNursery|None = None,
shield: bool = False, shield: bool = False,
lib: ModuleType = trio,
) -> AsyncGenerator[trio.Nursery, Any]: ) -> AsyncGenerator[trio.Nursery, Any]:
''' '''
Create a new nursery if None provided. Create a new nursery if None provided.
@ -58,13 +67,12 @@ async def maybe_open_nursery(
if nursery is not None: if nursery is not None:
yield nursery yield nursery
else: else:
async with trio.open_nursery() as nursery: async with lib.open_nursery() as nursery:
nursery.cancel_scope.shield = shield nursery.cancel_scope.shield = shield
yield nursery yield nursery
async def _enter_and_wait( async def _enter_and_wait(
mngr: AsyncContextManager[T], mngr: AsyncContextManager[T],
unwrapped: dict[int, T], unwrapped: dict[int, T],
all_entered: trio.Event, all_entered: trio.Event,
@ -91,7 +99,6 @@ async def _enter_and_wait(
@acm @acm
async def gather_contexts( async def gather_contexts(
mngrs: Sequence[AsyncContextManager[T]], mngrs: Sequence[AsyncContextManager[T]],
) -> AsyncGenerator[ ) -> AsyncGenerator[
@ -102,15 +109,17 @@ async def gather_contexts(
None, None,
]: ]:
''' '''
Concurrently enter a sequence of async context managers, each in Concurrently enter a sequence of async context managers (acms),
a separate ``trio`` task and deliver the unwrapped values in the each from a separate `trio` task and deliver the unwrapped
same order once all managers have entered. On exit all contexts are `yield`-ed values in the same order once all managers have entered.
subsequently and concurrently exited.
This function is somewhat similar to common usage of On exit, all acms are subsequently and concurrently exited.
``contextlib.AsyncExitStack.enter_async_context()`` (in a loop) in
combo with ``asyncio.gather()`` except the managers are concurrently This function is somewhat similar to a batch of non-blocking
entered and exited, and cancellation just works. calls to `contextlib.AsyncExitStack.enter_async_context()`
(inside a loop) *in combo with* a `asyncio.gather()` to get the
`.__aenter__()`-ed values, except the managers are both
concurrently entered and exited and *cancellation just works*(R).
''' '''
seed: int = id(mngrs) seed: int = id(mngrs)
@ -210,9 +219,10 @@ async def maybe_open_context(
) -> AsyncIterator[tuple[bool, T]]: ) -> AsyncIterator[tuple[bool, T]]:
''' '''
Maybe open a context manager if there is not already a _Cached Maybe open an async-context-manager (acm) if there is not already
version for the provided ``key`` for *this* actor. Return the a `_Cached` version for the provided (input) `key` for *this* actor.
_Cached instance on a _Cache hit.
Return the `_Cached` instance on a _Cache hit.
''' '''
fid = id(acm_func) fid = id(acm_func)
@ -273,8 +283,13 @@ async def maybe_open_context(
else: else:
_Cache.users += 1 _Cache.users += 1
log.runtime( log.runtime(
f'Reusing resource for `_Cache` user {_Cache.users}\n\n' f'Re-using cached resource for user {_Cache.users}\n\n'
f'{ctx_key!r} -> {yielded!r}\n' f'{ctx_key!r} -> {type(yielded)}\n'
# TODO: make this work with values but without
# `msgspec.Struct` causing frickin crashes on field-type
# lookups..
# f'{ctx_key!r} -> {yielded!r}\n'
) )
lock.release() lock.release()
yield True, yielded yield True, yielded