Compare commits
37 Commits
4bdf7f79f2
...
4d675deb24
Author | SHA1 | Date |
---|---|---|
|
4d675deb24 | |
|
31297171fc | |
|
d7a9ddd4a9 | |
|
43b84c99b6 | |
|
b7f2258f15 | |
|
1dfa109879 | |
|
0453e3565e | |
|
0bf13c50b4 | |
|
2e69aa0f67 | |
|
fc9c7e6e3f | |
|
354a4c2226 | |
|
d2c88e9709 | |
|
ef179b69f2 | |
|
837602a011 | |
|
debacef30e | |
|
7ae405ef5a | |
|
e428bf0a34 | |
|
d448bb81bd | |
|
77108a9759 | |
|
1720fefa1d | |
|
a3c1f8e419 | |
|
db58f6e1b5 | |
|
76b7006977 | |
|
bd1885bce1 | |
|
066a35322e | |
|
2ebc30d708 | |
|
57a5b7eb6f | |
|
e269aa3751 | |
|
7fc9297104 | |
|
9208708b3a | |
|
cf2f2adec2 | |
|
f28abc6720 | |
|
6f33a9891e | |
|
79604b7f98 | |
|
cec4a2a0ab | |
|
4089e4b3ac | |
|
5ec48310b6 |
|
@ -93,12 +93,18 @@ _reg_addr: tuple[str, int] = (
|
|||
'127.0.0.1',
|
||||
random.randint(1000, 9999),
|
||||
)
|
||||
_arb_addr = _reg_addr
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
def arb_addr():
|
||||
return _arb_addr
|
||||
def reg_addr() -> tuple[str, int]:
|
||||
|
||||
# globally override the runtime to the per-test-session-dynamic
|
||||
# addr so that all tests never conflict with any other actor
|
||||
# tree using the default.
|
||||
from tractor import _root
|
||||
_root._default_lo_addrs = [_reg_addr]
|
||||
|
||||
return _reg_addr
|
||||
|
||||
|
||||
def pytest_generate_tests(metafunc):
|
||||
|
@ -140,30 +146,35 @@ def sig_prog(proc, sig):
|
|||
def daemon(
|
||||
loglevel: str,
|
||||
testdir,
|
||||
arb_addr: tuple[str, int],
|
||||
reg_addr: tuple[str, int],
|
||||
):
|
||||
'''
|
||||
Run a daemon actor as a "remote arbiter".
|
||||
Run a daemon root actor as a separate actor-process tree and
|
||||
"remote registrar" for discovery-protocol related tests.
|
||||
|
||||
'''
|
||||
if loglevel in ('trace', 'debug'):
|
||||
# too much logging will lock up the subproc (smh)
|
||||
loglevel = 'info'
|
||||
# XXX: too much logging will lock up the subproc (smh)
|
||||
loglevel: str = 'info'
|
||||
|
||||
cmdargs = [
|
||||
sys.executable, '-c',
|
||||
"import tractor; tractor.run_daemon([], registry_addr={}, loglevel={})"
|
||||
.format(
|
||||
arb_addr,
|
||||
"'{}'".format(loglevel) if loglevel else None)
|
||||
code: str = (
|
||||
"import tractor; "
|
||||
"tractor.run_daemon([], registry_addrs={reg_addrs}, loglevel={ll})"
|
||||
).format(
|
||||
reg_addrs=str([reg_addr]),
|
||||
ll="'{}'".format(loglevel) if loglevel else None,
|
||||
)
|
||||
cmd: list[str] = [
|
||||
sys.executable,
|
||||
'-c', code,
|
||||
]
|
||||
kwargs = dict()
|
||||
kwargs = {}
|
||||
if platform.system() == 'Windows':
|
||||
# without this, tests hang on windows forever
|
||||
kwargs['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
|
||||
|
||||
proc = testdir.popen(
|
||||
cmdargs,
|
||||
cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
**kwargs,
|
||||
|
|
|
@ -10,6 +10,7 @@ TODO:
|
|||
- wonder if any of it'll work on OS X?
|
||||
|
||||
"""
|
||||
from functools import partial
|
||||
import itertools
|
||||
from typing import Optional
|
||||
import platform
|
||||
|
@ -26,6 +27,10 @@ from pexpect.exceptions import (
|
|||
from tractor._testing import (
|
||||
examples_dir,
|
||||
)
|
||||
from tractor.devx._debug import (
|
||||
_pause_msg,
|
||||
_crash_msg,
|
||||
)
|
||||
from conftest import (
|
||||
_ci_env,
|
||||
)
|
||||
|
@ -123,20 +128,52 @@ def expect(
|
|||
raise
|
||||
|
||||
|
||||
def in_prompt_msg(
|
||||
prompt: str,
|
||||
parts: list[str],
|
||||
|
||||
pause_on_false: bool = False,
|
||||
print_prompt_on_false: bool = True,
|
||||
|
||||
) -> bool:
|
||||
'''
|
||||
Predicate check if (the prompt's) std-streams output has all
|
||||
`str`-parts in it.
|
||||
|
||||
Can be used in test asserts for bulk matching expected
|
||||
log/REPL output for a given `pdb` interact point.
|
||||
|
||||
'''
|
||||
for part in parts:
|
||||
if part not in prompt:
|
||||
|
||||
if pause_on_false:
|
||||
import pdbp
|
||||
pdbp.set_trace()
|
||||
|
||||
if print_prompt_on_false:
|
||||
print(prompt)
|
||||
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def assert_before(
|
||||
child,
|
||||
patts: list[str],
|
||||
|
||||
**kwargs,
|
||||
|
||||
) -> None:
|
||||
|
||||
before = str(child.before.decode())
|
||||
# as in before the prompt end
|
||||
before: str = str(child.before.decode())
|
||||
assert in_prompt_msg(
|
||||
prompt=before,
|
||||
parts=patts,
|
||||
|
||||
for patt in patts:
|
||||
try:
|
||||
assert patt in before
|
||||
except AssertionError:
|
||||
print(before)
|
||||
raise
|
||||
**kwargs
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
|
@ -166,7 +203,7 @@ def ctlc(
|
|||
# XXX: disable pygments highlighting for auto-tests
|
||||
# since some envs (like actions CI) will struggle
|
||||
# the the added color-char encoding..
|
||||
from tractor._debug import TractorConfig
|
||||
from tractor.devx._debug import TractorConfig
|
||||
TractorConfig.use_pygements = False
|
||||
|
||||
yield use_ctlc
|
||||
|
@ -195,7 +232,10 @@ def test_root_actor_error(spawn, user_in_out):
|
|||
before = str(child.before.decode())
|
||||
|
||||
# make sure expected logging and error arrives
|
||||
assert "Attaching to pdb in crashed actor: ('root'" in before
|
||||
assert in_prompt_msg(
|
||||
before,
|
||||
[_crash_msg, "('root'"]
|
||||
)
|
||||
assert 'AssertionError' in before
|
||||
|
||||
# send user command
|
||||
|
@ -332,7 +372,10 @@ def test_subactor_error(
|
|||
child.expect(PROMPT)
|
||||
|
||||
before = str(child.before.decode())
|
||||
assert "Attaching to pdb in crashed actor: ('name_error'" in before
|
||||
assert in_prompt_msg(
|
||||
before,
|
||||
[_crash_msg, "('name_error'"]
|
||||
)
|
||||
|
||||
if do_next:
|
||||
child.sendline('n')
|
||||
|
@ -353,9 +396,15 @@ def test_subactor_error(
|
|||
before = str(child.before.decode())
|
||||
|
||||
# root actor gets debugger engaged
|
||||
assert "Attaching to pdb in crashed actor: ('root'" in before
|
||||
assert in_prompt_msg(
|
||||
before,
|
||||
[_crash_msg, "('root'"]
|
||||
)
|
||||
# error is a remote error propagated from the subactor
|
||||
assert "RemoteActorError: ('name_error'" in before
|
||||
assert in_prompt_msg(
|
||||
before,
|
||||
[_crash_msg, "('name_error'"]
|
||||
)
|
||||
|
||||
# another round
|
||||
if ctlc:
|
||||
|
@ -380,7 +429,10 @@ def test_subactor_breakpoint(
|
|||
child.expect(PROMPT)
|
||||
|
||||
before = str(child.before.decode())
|
||||
assert "Attaching pdb to actor: ('breakpoint_forever'" in before
|
||||
assert in_prompt_msg(
|
||||
before,
|
||||
[_pause_msg, "('breakpoint_forever'"]
|
||||
)
|
||||
|
||||
# do some "next" commands to demonstrate recurrent breakpoint
|
||||
# entries
|
||||
|
@ -396,7 +448,10 @@ def test_subactor_breakpoint(
|
|||
child.sendline('continue')
|
||||
child.expect(PROMPT)
|
||||
before = str(child.before.decode())
|
||||
assert "Attaching pdb to actor: ('breakpoint_forever'" in before
|
||||
assert in_prompt_msg(
|
||||
before,
|
||||
[_pause_msg, "('breakpoint_forever'"]
|
||||
)
|
||||
|
||||
if ctlc:
|
||||
do_ctlc(child)
|
||||
|
@ -441,7 +496,10 @@ def test_multi_subactors(
|
|||
child.expect(PROMPT)
|
||||
|
||||
before = str(child.before.decode())
|
||||
assert "Attaching pdb to actor: ('breakpoint_forever'" in before
|
||||
assert in_prompt_msg(
|
||||
before,
|
||||
[_pause_msg, "('breakpoint_forever'"]
|
||||
)
|
||||
|
||||
if ctlc:
|
||||
do_ctlc(child)
|
||||
|
@ -461,7 +519,10 @@ def test_multi_subactors(
|
|||
# first name_error failure
|
||||
child.expect(PROMPT)
|
||||
before = str(child.before.decode())
|
||||
assert "Attaching to pdb in crashed actor: ('name_error'" in before
|
||||
assert in_prompt_msg(
|
||||
before,
|
||||
[_crash_msg, "('name_error'"]
|
||||
)
|
||||
assert "NameError" in before
|
||||
|
||||
if ctlc:
|
||||
|
@ -487,7 +548,10 @@ def test_multi_subactors(
|
|||
child.sendline('c')
|
||||
child.expect(PROMPT)
|
||||
before = str(child.before.decode())
|
||||
assert "Attaching pdb to actor: ('breakpoint_forever'" in before
|
||||
assert in_prompt_msg(
|
||||
before,
|
||||
[_pause_msg, "('breakpoint_forever'"]
|
||||
)
|
||||
|
||||
if ctlc:
|
||||
do_ctlc(child)
|
||||
|
@ -527,9 +591,12 @@ def test_multi_subactors(
|
|||
child.expect(PROMPT)
|
||||
before = str(child.before.decode())
|
||||
|
||||
assert_before(child, [
|
||||
assert_before(
|
||||
child, [
|
||||
# debugger attaches to root
|
||||
"Attaching to pdb in crashed actor: ('root'",
|
||||
# "Attaching to pdb in crashed actor: ('root'",
|
||||
_crash_msg,
|
||||
"('root'",
|
||||
|
||||
# expect a multierror with exceptions for each sub-actor
|
||||
"RemoteActorError: ('breakpoint_forever'",
|
||||
|
@ -537,7 +604,8 @@ def test_multi_subactors(
|
|||
"RemoteActorError: ('spawn_error'",
|
||||
"RemoteActorError: ('name_error_1'",
|
||||
'bdb.BdbQuit',
|
||||
])
|
||||
]
|
||||
)
|
||||
|
||||
if ctlc:
|
||||
do_ctlc(child)
|
||||
|
@ -574,15 +642,22 @@ def test_multi_daemon_subactors(
|
|||
# the root's tty lock first so anticipate either crash
|
||||
# message on the first entry.
|
||||
|
||||
bp_forever_msg = "Attaching pdb to actor: ('bp_forever'"
|
||||
bp_forev_parts = [_pause_msg, "('bp_forever'"]
|
||||
bp_forev_in_msg = partial(
|
||||
in_prompt_msg,
|
||||
parts=bp_forev_parts,
|
||||
)
|
||||
|
||||
name_error_msg = "NameError: name 'doggypants' is not defined"
|
||||
name_error_parts = [name_error_msg]
|
||||
|
||||
before = str(child.before.decode())
|
||||
if bp_forever_msg in before:
|
||||
next_msg = name_error_msg
|
||||
|
||||
if bp_forev_in_msg(prompt=before):
|
||||
next_parts = name_error_parts
|
||||
|
||||
elif name_error_msg in before:
|
||||
next_msg = bp_forever_msg
|
||||
next_parts = bp_forev_parts
|
||||
|
||||
else:
|
||||
raise ValueError("Neither log msg was found !?")
|
||||
|
@ -599,7 +674,10 @@ def test_multi_daemon_subactors(
|
|||
|
||||
child.sendline('c')
|
||||
child.expect(PROMPT)
|
||||
assert_before(child, [next_msg])
|
||||
assert_before(
|
||||
child,
|
||||
next_parts,
|
||||
)
|
||||
|
||||
# XXX: hooray the root clobbering the child here was fixed!
|
||||
# IMO, this demonstrates the true power of SC system design.
|
||||
|
@ -607,7 +685,7 @@ def test_multi_daemon_subactors(
|
|||
# now the root actor won't clobber the bp_forever child
|
||||
# during it's first access to the debug lock, but will instead
|
||||
# wait for the lock to release, by the edge triggered
|
||||
# ``_debug.Lock.no_remote_has_tty`` event before sending cancel messages
|
||||
# ``devx._debug.Lock.no_remote_has_tty`` event before sending cancel messages
|
||||
# (via portals) to its underlings B)
|
||||
|
||||
# at some point here there should have been some warning msg from
|
||||
|
@ -623,9 +701,15 @@ def test_multi_daemon_subactors(
|
|||
child.expect(PROMPT)
|
||||
|
||||
try:
|
||||
assert_before(child, [bp_forever_msg])
|
||||
assert_before(
|
||||
child,
|
||||
bp_forev_parts,
|
||||
)
|
||||
except AssertionError:
|
||||
assert_before(child, [name_error_msg])
|
||||
assert_before(
|
||||
child,
|
||||
name_error_parts,
|
||||
)
|
||||
|
||||
else:
|
||||
if ctlc:
|
||||
|
@ -637,7 +721,10 @@ def test_multi_daemon_subactors(
|
|||
|
||||
child.sendline('c')
|
||||
child.expect(PROMPT)
|
||||
assert_before(child, [name_error_msg])
|
||||
assert_before(
|
||||
child,
|
||||
name_error_parts,
|
||||
)
|
||||
|
||||
# wait for final error in root
|
||||
# where it crashs with boxed error
|
||||
|
@ -647,7 +734,7 @@ def test_multi_daemon_subactors(
|
|||
child.expect(PROMPT)
|
||||
assert_before(
|
||||
child,
|
||||
[bp_forever_msg]
|
||||
bp_forev_parts
|
||||
)
|
||||
except AssertionError:
|
||||
break
|
||||
|
@ -656,7 +743,9 @@ def test_multi_daemon_subactors(
|
|||
child,
|
||||
[
|
||||
# boxed error raised in root task
|
||||
"Attaching to pdb in crashed actor: ('root'",
|
||||
# "Attaching to pdb in crashed actor: ('root'",
|
||||
_crash_msg,
|
||||
"('root'",
|
||||
"_exceptions.RemoteActorError: ('name_error'",
|
||||
]
|
||||
)
|
||||
|
@ -770,7 +859,7 @@ def test_multi_nested_subactors_error_through_nurseries(
|
|||
|
||||
child = spawn('multi_nested_subactors_error_up_through_nurseries')
|
||||
|
||||
timed_out_early: bool = False
|
||||
# timed_out_early: bool = False
|
||||
|
||||
for send_char in itertools.cycle(['c', 'q']):
|
||||
try:
|
||||
|
@ -871,11 +960,14 @@ def test_root_nursery_cancels_before_child_releases_tty_lock(
|
|||
|
||||
if not timed_out_early:
|
||||
before = str(child.before.decode())
|
||||
assert_before(child, [
|
||||
assert_before(
|
||||
child,
|
||||
[
|
||||
"tractor._exceptions.RemoteActorError: ('spawner0'",
|
||||
"tractor._exceptions.RemoteActorError: ('name_error'",
|
||||
"NameError: name 'doggypants' is not defined",
|
||||
])
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def test_root_cancels_child_context_during_startup(
|
||||
|
@ -909,8 +1001,10 @@ def test_different_debug_mode_per_actor(
|
|||
|
||||
# only one actor should enter the debugger
|
||||
before = str(child.before.decode())
|
||||
assert "Attaching to pdb in crashed actor: ('debugged_boi'" in before
|
||||
assert "RuntimeError" in before
|
||||
assert in_prompt_msg(
|
||||
before,
|
||||
[_crash_msg, "('debugged_boi'", "RuntimeError"],
|
||||
)
|
||||
|
||||
if ctlc:
|
||||
do_ctlc(child)
|
||||
|
|
|
@ -20,7 +20,7 @@ from tractor._testing import (
|
|||
def run_example_in_subproc(
|
||||
loglevel: str,
|
||||
testdir,
|
||||
arb_addr: tuple[str, int],
|
||||
reg_addr: tuple[str, int],
|
||||
):
|
||||
|
||||
@contextmanager
|
||||
|
|
|
@ -15,9 +15,19 @@ async def sleep_back_actor(
|
|||
func_name,
|
||||
func_defined,
|
||||
exposed_mods,
|
||||
*,
|
||||
reg_addr: tuple,
|
||||
):
|
||||
if actor_name:
|
||||
async with tractor.find_actor(actor_name) as portal:
|
||||
async with tractor.find_actor(
|
||||
actor_name,
|
||||
# NOTE: must be set manually since
|
||||
# the subactor doesn't have the reg_addr
|
||||
# fixture code run in it!
|
||||
# TODO: maybe we should just set this once in the
|
||||
# _state mod and derive to all children?
|
||||
registry_addrs=[reg_addr],
|
||||
) as portal:
|
||||
try:
|
||||
await portal.run(__name__, func_name)
|
||||
except tractor.RemoteActorError as err:
|
||||
|
@ -52,11 +62,17 @@ async def short_sleep():
|
|||
'fail_on_syntax',
|
||||
],
|
||||
)
|
||||
def test_rpc_errors(reg_addr, to_call, testdir):
|
||||
"""Test errors when making various RPC requests to an actor
|
||||
def test_rpc_errors(
|
||||
reg_addr,
|
||||
to_call,
|
||||
testdir,
|
||||
):
|
||||
'''
|
||||
Test errors when making various RPC requests to an actor
|
||||
that either doesn't have the requested module exposed or doesn't define
|
||||
the named function.
|
||||
"""
|
||||
|
||||
'''
|
||||
exposed_mods, funcname, inside_err = to_call
|
||||
subactor_exposed_mods = []
|
||||
func_defined = globals().get(funcname, False)
|
||||
|
@ -84,8 +100,13 @@ def test_rpc_errors(reg_addr, to_call, testdir):
|
|||
|
||||
# spawn a subactor which calls us back
|
||||
async with tractor.open_nursery(
|
||||
arbiter_addr=reg_addr,
|
||||
registry_addrs=[reg_addr],
|
||||
enable_modules=exposed_mods.copy(),
|
||||
|
||||
# NOTE: will halt test in REPL if uncommented, so only
|
||||
# do that if actually debugging subactor but keep it
|
||||
# disabled for the test.
|
||||
# debug_mode=True,
|
||||
) as n:
|
||||
|
||||
actor = tractor.current_actor()
|
||||
|
@ -102,6 +123,7 @@ def test_rpc_errors(reg_addr, to_call, testdir):
|
|||
exposed_mods=exposed_mods,
|
||||
func_defined=True if func_defined else False,
|
||||
enable_modules=subactor_exposed_mods,
|
||||
reg_addr=reg_addr,
|
||||
)
|
||||
|
||||
def run():
|
||||
|
|
|
@ -32,8 +32,7 @@ async def spawn(
|
|||
|
||||
if actor.is_arbiter:
|
||||
|
||||
async with tractor.open_nursery(
|
||||
) as nursery:
|
||||
async with tractor.open_nursery() as nursery:
|
||||
|
||||
# forks here
|
||||
portal = await nursery.run_in_actor(
|
||||
|
@ -55,7 +54,9 @@ async def spawn(
|
|||
return 10
|
||||
|
||||
|
||||
def test_local_arbiter_subactor_global_state(reg_addr):
|
||||
def test_local_arbiter_subactor_global_state(
|
||||
reg_addr,
|
||||
):
|
||||
result = trio.run(
|
||||
spawn,
|
||||
True,
|
||||
|
|
|
@ -18,76 +18,49 @@
|
|||
tractor: structured concurrent ``trio``-"actors".
|
||||
|
||||
"""
|
||||
from exceptiongroup import BaseExceptionGroup
|
||||
from exceptiongroup import BaseExceptionGroup as BaseExceptionGroup
|
||||
|
||||
from ._clustering import open_actor_cluster
|
||||
from ._clustering import (
|
||||
open_actor_cluster as open_actor_cluster,
|
||||
)
|
||||
from ._context import (
|
||||
Context, # the type
|
||||
context, # a func-decorator
|
||||
Context as Context, # the type
|
||||
context as context, # a func-decorator
|
||||
)
|
||||
from ._streaming import (
|
||||
MsgStream,
|
||||
stream,
|
||||
MsgStream as MsgStream,
|
||||
stream as stream,
|
||||
)
|
||||
from ._discovery import (
|
||||
get_arbiter,
|
||||
find_actor,
|
||||
wait_for_actor,
|
||||
query_actor,
|
||||
get_arbiter as get_arbiter,
|
||||
find_actor as find_actor,
|
||||
wait_for_actor as wait_for_actor,
|
||||
query_actor as query_actor,
|
||||
)
|
||||
from ._supervise import (
|
||||
open_nursery as open_nursery,
|
||||
ActorNursery as ActorNursery,
|
||||
)
|
||||
from ._supervise import open_nursery
|
||||
from ._state import (
|
||||
current_actor,
|
||||
is_root_process,
|
||||
current_actor as current_actor,
|
||||
is_root_process as is_root_process,
|
||||
)
|
||||
from ._exceptions import (
|
||||
RemoteActorError,
|
||||
ModuleNotExposed,
|
||||
ContextCancelled,
|
||||
RemoteActorError as RemoteActorError,
|
||||
ModuleNotExposed as ModuleNotExposed,
|
||||
ContextCancelled as ContextCancelled,
|
||||
)
|
||||
from ._debug import (
|
||||
breakpoint,
|
||||
pause,
|
||||
pause_from_sync,
|
||||
post_mortem,
|
||||
from .devx import (
|
||||
breakpoint as breakpoint,
|
||||
pause as pause,
|
||||
pause_from_sync as pause_from_sync,
|
||||
post_mortem as post_mortem,
|
||||
)
|
||||
from . import msg
|
||||
from . import msg as msg
|
||||
from ._root import (
|
||||
run_daemon,
|
||||
open_root_actor,
|
||||
run_daemon as run_daemon,
|
||||
open_root_actor as open_root_actor,
|
||||
)
|
||||
from ._ipc import Channel
|
||||
from ._portal import Portal
|
||||
from ._runtime import Actor
|
||||
|
||||
|
||||
__all__ = [
|
||||
'Actor',
|
||||
'BaseExceptionGroup',
|
||||
'Channel',
|
||||
'Context',
|
||||
'ContextCancelled',
|
||||
'ModuleNotExposed',
|
||||
'MsgStream',
|
||||
'Portal',
|
||||
'RemoteActorError',
|
||||
'breakpoint',
|
||||
'context',
|
||||
'current_actor',
|
||||
'find_actor',
|
||||
'query_actor',
|
||||
'get_arbiter',
|
||||
'is_root_process',
|
||||
'msg',
|
||||
'open_actor_cluster',
|
||||
'open_nursery',
|
||||
'open_root_actor',
|
||||
'pause',
|
||||
'post_mortem',
|
||||
'pause_from_sync',
|
||||
'query_actor',
|
||||
'run_daemon',
|
||||
'stream',
|
||||
'to_asyncio',
|
||||
'wait_for_actor',
|
||||
]
|
||||
from ._ipc import Channel as Channel
|
||||
from ._portal import Portal as Portal
|
||||
from ._runtime import Actor as Actor
|
||||
|
|
|
@ -868,6 +868,9 @@ class Context:
|
|||
|
||||
# TODO: maybe we should also call `._res_scope.cancel()` if it
|
||||
# exists to support cancelling any drain loop hangs?
|
||||
# NOTE: this usage actually works here B)
|
||||
# from .devx._debug import breakpoint
|
||||
# await breakpoint()
|
||||
|
||||
# TODO: add to `Channel`?
|
||||
@property
|
||||
|
|
|
@ -15,16 +15,20 @@
|
|||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
"""
|
||||
Actor discovery API.
|
||||
Discovery (protocols) API for automatic addressing and location
|
||||
management of (service) actors.
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from typing import (
|
||||
Optional,
|
||||
Union,
|
||||
AsyncGenerator,
|
||||
AsyncContextManager,
|
||||
TYPE_CHECKING,
|
||||
)
|
||||
from contextlib import asynccontextmanager as acm
|
||||
import warnings
|
||||
|
||||
from .trionics import gather_contexts
|
||||
from ._ipc import _connect_chan, Channel
|
||||
from ._portal import (
|
||||
Portal,
|
||||
|
@ -34,13 +38,19 @@ from ._portal import (
|
|||
from ._state import current_actor, _runtime_vars
|
||||
|
||||
|
||||
@acm
|
||||
async def get_arbiter(
|
||||
if TYPE_CHECKING:
|
||||
from ._runtime import Actor
|
||||
|
||||
|
||||
@acm
|
||||
async def get_registry(
|
||||
host: str,
|
||||
port: int,
|
||||
|
||||
) -> AsyncGenerator[Union[Portal, LocalPortal], None]:
|
||||
) -> AsyncGenerator[
|
||||
Portal | LocalPortal | None,
|
||||
None,
|
||||
]:
|
||||
'''
|
||||
Return a portal instance connected to a local or remote
|
||||
arbiter.
|
||||
|
@ -51,16 +61,33 @@ async def get_arbiter(
|
|||
if not actor:
|
||||
raise RuntimeError("No actor instance has been defined yet?")
|
||||
|
||||
if actor.is_arbiter:
|
||||
if actor.is_registrar:
|
||||
# we're already the arbiter
|
||||
# (likely a re-entrant call from the arbiter actor)
|
||||
yield LocalPortal(actor, Channel((host, port)))
|
||||
yield LocalPortal(
|
||||
actor,
|
||||
Channel((host, port))
|
||||
)
|
||||
else:
|
||||
async with _connect_chan(host, port) as chan:
|
||||
async with (
|
||||
_connect_chan(host, port) as chan,
|
||||
open_portal(chan) as regstr_ptl,
|
||||
):
|
||||
yield regstr_ptl
|
||||
|
||||
async with open_portal(chan) as arb_portal:
|
||||
|
||||
yield arb_portal
|
||||
|
||||
# TODO: deprecate and this remove _arbiter form!
|
||||
@acm
|
||||
async def get_arbiter(*args, **kwargs):
|
||||
warnings.warn(
|
||||
'`tractor.get_arbiter()` is now deprecated!\n'
|
||||
'Use `.get_registry()` instead!',
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
async with get_registry(*args, **kwargs) as to_yield:
|
||||
yield to_yield
|
||||
|
||||
|
||||
@acm
|
||||
|
@ -68,51 +95,80 @@ async def get_root(
|
|||
**kwargs,
|
||||
) -> AsyncGenerator[Portal, None]:
|
||||
|
||||
# TODO: rename mailbox to `_root_maddr` when we finally
|
||||
# add and impl libp2p multi-addrs?
|
||||
host, port = _runtime_vars['_root_mailbox']
|
||||
assert host is not None
|
||||
|
||||
async with _connect_chan(host, port) as chan:
|
||||
async with open_portal(chan, **kwargs) as portal:
|
||||
async with (
|
||||
_connect_chan(host, port) as chan,
|
||||
open_portal(chan, **kwargs) as portal,
|
||||
):
|
||||
yield portal
|
||||
|
||||
|
||||
@acm
|
||||
async def query_actor(
|
||||
name: str,
|
||||
arbiter_sockaddr: Optional[tuple[str, int]] = None,
|
||||
arbiter_sockaddr: tuple[str, int] | None = None,
|
||||
regaddr: tuple[str, int] | None = None,
|
||||
|
||||
) -> AsyncGenerator[tuple[str, int], None]:
|
||||
) -> AsyncGenerator[
|
||||
tuple[str, int] | None,
|
||||
None,
|
||||
]:
|
||||
'''
|
||||
Simple address lookup for a given actor name.
|
||||
Make a transport address lookup for an actor name to a specific
|
||||
registrar.
|
||||
|
||||
Returns the (socket) address or ``None``.
|
||||
Returns the (socket) address or ``None`` if no entry under that
|
||||
name exists for the given registrar listening @ `regaddr`.
|
||||
|
||||
'''
|
||||
actor = current_actor()
|
||||
async with get_arbiter(
|
||||
*arbiter_sockaddr or actor._arb_addr
|
||||
) as arb_portal:
|
||||
actor: Actor = current_actor()
|
||||
if (
|
||||
name == 'registrar'
|
||||
and actor.is_registrar
|
||||
):
|
||||
raise RuntimeError(
|
||||
'The current actor IS the registry!?'
|
||||
)
|
||||
|
||||
sockaddr = await arb_portal.run_from_ns(
|
||||
if arbiter_sockaddr is not None:
|
||||
warnings.warn(
|
||||
'`tractor.query_actor(regaddr=<blah>)` is deprecated.\n'
|
||||
'Use `registry_addrs: list[tuple]` instead!',
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
regaddr: list[tuple[str, int]] = arbiter_sockaddr
|
||||
|
||||
reg_portal: Portal
|
||||
regaddr: tuple[str, int] = regaddr or actor.reg_addrs[0]
|
||||
async with get_registry(*regaddr) as reg_portal:
|
||||
# TODO: return portals to all available actors - for now
|
||||
# just the last one that registered
|
||||
sockaddr: tuple[str, int] = await reg_portal.run_from_ns(
|
||||
'self',
|
||||
'find_actor',
|
||||
name=name,
|
||||
)
|
||||
|
||||
# TODO: return portals to all available actors - for now just
|
||||
# the last one that registered
|
||||
if name == 'arbiter' and actor.is_arbiter:
|
||||
raise RuntimeError("The current actor is the arbiter")
|
||||
|
||||
yield sockaddr if sockaddr else None
|
||||
yield sockaddr
|
||||
|
||||
|
||||
@acm
|
||||
async def find_actor(
|
||||
name: str,
|
||||
arbiter_sockaddr: tuple[str, int] | None = None
|
||||
arbiter_sockaddr: tuple[str, int] | None = None,
|
||||
registry_addrs: list[tuple[str, int]] | None = None,
|
||||
|
||||
) -> AsyncGenerator[Optional[Portal], None]:
|
||||
only_first: bool = True,
|
||||
raise_on_none: bool = False,
|
||||
|
||||
) -> AsyncGenerator[
|
||||
Portal | list[Portal] | None,
|
||||
None,
|
||||
]:
|
||||
'''
|
||||
Ask the arbiter to find actor(s) by name.
|
||||
|
||||
|
@ -120,11 +176,23 @@ async def find_actor(
|
|||
known to the arbiter.
|
||||
|
||||
'''
|
||||
if arbiter_sockaddr is not None:
|
||||
warnings.warn(
|
||||
'`tractor.find_actor(arbiter_sockaddr=<blah>)` is deprecated.\n'
|
||||
'Use `registry_addrs: list[tuple]` instead!',
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
registry_addrs: list[tuple[str, int]] = [arbiter_sockaddr]
|
||||
|
||||
@acm
|
||||
async def maybe_open_portal_from_reg_addr(
|
||||
addr: tuple[str, int],
|
||||
):
|
||||
async with query_actor(
|
||||
name=name,
|
||||
arbiter_sockaddr=arbiter_sockaddr,
|
||||
regaddr=addr,
|
||||
) as sockaddr:
|
||||
|
||||
if sockaddr:
|
||||
async with _connect_chan(*sockaddr) as chan:
|
||||
async with open_portal(chan) as portal:
|
||||
|
@ -132,12 +200,55 @@ async def find_actor(
|
|||
else:
|
||||
yield None
|
||||
|
||||
if not registry_addrs:
|
||||
# XXX NOTE: make sure to dynamically read the value on
|
||||
# every call since something may change it globally (eg.
|
||||
# like in our discovery test suite)!
|
||||
from . import _root
|
||||
registry_addrs = _root._default_lo_addrs
|
||||
|
||||
maybe_portals: list[
|
||||
AsyncContextManager[tuple[str, int]]
|
||||
] = list(
|
||||
maybe_open_portal_from_reg_addr(addr)
|
||||
for addr in registry_addrs
|
||||
)
|
||||
|
||||
async with gather_contexts(
|
||||
mngrs=maybe_portals,
|
||||
) as portals:
|
||||
# log.runtime(
|
||||
# 'Gathered portals:\n'
|
||||
# f'{portals}'
|
||||
# )
|
||||
# NOTE: `gather_contexts()` will return a
|
||||
# `tuple[None, None, ..., None]` if no contact
|
||||
# can be made with any regstrar at any of the
|
||||
# N provided addrs!
|
||||
if not any(portals):
|
||||
if raise_on_none:
|
||||
raise RuntimeError(
|
||||
f'No actor "{name}" found registered @ {registry_addrs}'
|
||||
)
|
||||
yield None
|
||||
return
|
||||
|
||||
portals: list[Portal] = list(portals)
|
||||
if only_first:
|
||||
yield portals[0]
|
||||
|
||||
else:
|
||||
# TODO: currently this may return multiple portals
|
||||
# given there are multi-homed or multiple registrars..
|
||||
# SO, we probably need de-duplication logic?
|
||||
yield portals
|
||||
|
||||
|
||||
@acm
|
||||
async def wait_for_actor(
|
||||
name: str,
|
||||
arbiter_sockaddr: tuple[str, int] | None = None,
|
||||
# registry_addr: tuple[str, int] | None = None,
|
||||
registry_addr: tuple[str, int] | None = None,
|
||||
|
||||
) -> AsyncGenerator[Portal, None]:
|
||||
'''
|
||||
|
@ -146,17 +257,31 @@ async def wait_for_actor(
|
|||
A portal to the first registered actor is returned.
|
||||
|
||||
'''
|
||||
actor = current_actor()
|
||||
actor: Actor = current_actor()
|
||||
|
||||
async with get_arbiter(
|
||||
*arbiter_sockaddr or actor._arb_addr,
|
||||
) as arb_portal:
|
||||
sockaddrs = await arb_portal.run_from_ns(
|
||||
if arbiter_sockaddr is not None:
|
||||
warnings.warn(
|
||||
'`tractor.wait_for_actor(arbiter_sockaddr=<foo>)` is deprecated.\n'
|
||||
'Use `registry_addr: tuple` instead!',
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
registry_addr: tuple[str, int] = arbiter_sockaddr
|
||||
|
||||
# TODO: use `.trionics.gather_contexts()` like
|
||||
# above in `find_actor()` as well?
|
||||
reg_portal: Portal
|
||||
regaddr: tuple[str, int] = registry_addr or actor.reg_addrs[0]
|
||||
async with get_registry(*regaddr) as reg_portal:
|
||||
sockaddrs = await reg_portal.run_from_ns(
|
||||
'self',
|
||||
'wait_for_actor',
|
||||
name=name,
|
||||
)
|
||||
sockaddr = sockaddrs[-1]
|
||||
|
||||
# get latest registered addr by default?
|
||||
# TODO: offer multi-portal yields in multi-homed case?
|
||||
sockaddr: tuple[str, int] = sockaddrs[-1]
|
||||
|
||||
async with _connect_chan(*sockaddr) as chan:
|
||||
async with open_portal(chan) as portal:
|
||||
|
|
|
@ -47,8 +47,8 @@ log = get_logger(__name__)
|
|||
|
||||
def _mp_main(
|
||||
|
||||
actor: Actor, # type: ignore
|
||||
accept_addr: tuple[str, int],
|
||||
actor: Actor,
|
||||
accept_addrs: list[tuple[str, int]],
|
||||
forkserver_info: tuple[Any, Any, Any, Any, Any],
|
||||
start_method: SpawnMethodKey,
|
||||
parent_addr: tuple[str, int] | None = None,
|
||||
|
@ -77,8 +77,8 @@ def _mp_main(
|
|||
log.debug(f"parent_addr is {parent_addr}")
|
||||
trio_main = partial(
|
||||
async_main,
|
||||
actor,
|
||||
accept_addr,
|
||||
actor=actor,
|
||||
accept_addrs=accept_addrs,
|
||||
parent_addr=parent_addr
|
||||
)
|
||||
try:
|
||||
|
@ -96,7 +96,7 @@ def _mp_main(
|
|||
|
||||
def _trio_main(
|
||||
|
||||
actor: Actor, # type: ignore
|
||||
actor: Actor,
|
||||
*,
|
||||
parent_addr: tuple[str, int] | None = None,
|
||||
infect_asyncio: bool = False,
|
||||
|
|
|
@ -517,7 +517,9 @@ class Channel:
|
|||
|
||||
@acm
|
||||
async def _connect_chan(
|
||||
host: str, port: int
|
||||
host: str,
|
||||
port: int
|
||||
|
||||
) -> typing.AsyncGenerator[Channel, None]:
|
||||
'''
|
||||
Create and connect a channel with disconnect on context manager
|
||||
|
|
|
@ -0,0 +1,151 @@
|
|||
# tractor: structured concurrent "actors".
|
||||
# Copyright 2018-eternity Tyler Goodlet.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
'''
|
||||
Multiaddress parser and utils according the spec(s) defined by
|
||||
`libp2p` and used in dependent project such as `ipfs`:
|
||||
|
||||
- https://docs.libp2p.io/concepts/fundamentals/addressing/
|
||||
- https://github.com/libp2p/specs/blob/master/addressing/README.md
|
||||
|
||||
'''
|
||||
from typing import Iterator
|
||||
|
||||
from bidict import bidict
|
||||
|
||||
# TODO: see if we can leverage libp2p ecosys projects instead of
|
||||
# rolling our own (parser) impls of the above addressing specs:
|
||||
# - https://github.com/libp2p/py-libp2p
|
||||
# - https://docs.libp2p.io/concepts/nat/circuit-relay/#relay-addresses
|
||||
# prots: bidict[int, str] = bidict({
|
||||
prots: bidict[int, str] = {
|
||||
'ipv4': 3,
|
||||
'ipv6': 3,
|
||||
'wg': 3,
|
||||
|
||||
'tcp': 4,
|
||||
'udp': 4,
|
||||
|
||||
# TODO: support the next-gen shite Bo
|
||||
# 'quic': 4,
|
||||
# 'ssh': 7, # via rsyscall bootstrapping
|
||||
}
|
||||
|
||||
prot_params: dict[str, tuple[str]] = {
|
||||
'ipv4': ('addr',),
|
||||
'ipv6': ('addr',),
|
||||
'wg': ('addr', 'port', 'pubkey'),
|
||||
|
||||
'tcp': ('port',),
|
||||
'udp': ('port',),
|
||||
|
||||
# 'quic': ('port',),
|
||||
# 'ssh': ('port',),
|
||||
}
|
||||
|
||||
|
||||
def iter_prot_layers(
|
||||
multiaddr: str,
|
||||
) -> Iterator[
|
||||
tuple[
|
||||
int,
|
||||
list[str]
|
||||
]
|
||||
]:
|
||||
'''
|
||||
Unpack a libp2p style "multiaddress" into multiple "segments"
|
||||
for each "layer" of the protocoll stack (in OSI terms).
|
||||
|
||||
'''
|
||||
tokens: list[str] = multiaddr.split('/')
|
||||
root, tokens = tokens[0], tokens[1:]
|
||||
assert not root # there is a root '/' on LHS
|
||||
itokens = iter(tokens)
|
||||
|
||||
prot: str | None = None
|
||||
params: list[str] = []
|
||||
for token in itokens:
|
||||
# every prot path should start with a known
|
||||
# key-str.
|
||||
if token in prots:
|
||||
if prot is None:
|
||||
prot: str = token
|
||||
else:
|
||||
yield prot, params
|
||||
prot = token
|
||||
|
||||
params = []
|
||||
|
||||
elif token not in prots:
|
||||
params.append(token)
|
||||
|
||||
else:
|
||||
yield prot, params
|
||||
|
||||
|
||||
def parse_maddr(
|
||||
multiaddr: str,
|
||||
) -> dict[str, str | int | dict]:
|
||||
'''
|
||||
Parse a libp2p style "multiaddress" into its distinct protocol
|
||||
segments where each segment is of the form:
|
||||
|
||||
`../<protocol>/<param0>/<param1>/../<paramN>`
|
||||
|
||||
and is loaded into a (order preserving) `layers: dict[str,
|
||||
dict[str, Any]` which holds each protocol-layer-segment of the
|
||||
original `str` path as a separate entry according to its approx
|
||||
OSI "layer number".
|
||||
|
||||
Any `paramN` in the path must be distinctly defined by a str-token in the
|
||||
(module global) `prot_params` table.
|
||||
|
||||
For eg. for wireguard which requires an address, port number and publickey
|
||||
the protocol params are specified as the entry:
|
||||
|
||||
'wg': ('addr', 'port', 'pubkey'),
|
||||
|
||||
and are thus parsed from a maddr in that order:
|
||||
`'/wg/1.1.1.1/51820/<pubkey>'`
|
||||
|
||||
'''
|
||||
layers: dict[str, str | int | dict] = {}
|
||||
for (
|
||||
prot_key,
|
||||
params,
|
||||
) in iter_prot_layers(multiaddr):
|
||||
|
||||
layer: int = prots[prot_key] # OSI layer used for sorting
|
||||
ep: dict[str, int | str] = {'layer': layer}
|
||||
layers[prot_key] = ep
|
||||
|
||||
# TODO; validation and resolving of names:
|
||||
# - each param via a validator provided as part of the
|
||||
# prot_params def? (also see `"port"` case below..)
|
||||
# - do a resolv step that will check addrs against
|
||||
# any loaded network.resolv: dict[str, str]
|
||||
rparams: list = list(reversed(params))
|
||||
for key in prot_params[prot_key]:
|
||||
val: str | int = rparams.pop()
|
||||
|
||||
# TODO: UGHH, dunno what we should do for validation
|
||||
# here, put it in the params spec somehow?
|
||||
if key == 'port':
|
||||
val = int(val)
|
||||
|
||||
ep[key] = val
|
||||
|
||||
return layers
|
|
@ -461,7 +461,12 @@ class LocalPortal:
|
|||
actor: 'Actor' # type: ignore # noqa
|
||||
channel: Channel
|
||||
|
||||
async def run_from_ns(self, ns: str, func_name: str, **kwargs) -> Any:
|
||||
async def run_from_ns(
|
||||
self,
|
||||
ns: str,
|
||||
func_name: str,
|
||||
**kwargs,
|
||||
) -> Any:
|
||||
'''
|
||||
Run a requested local function from a namespace path and
|
||||
return it's result.
|
||||
|
|
180
tractor/_root.py
180
tractor/_root.py
|
@ -25,7 +25,6 @@ import logging
|
|||
import signal
|
||||
import sys
|
||||
import os
|
||||
import typing
|
||||
import warnings
|
||||
|
||||
|
||||
|
@ -38,7 +37,7 @@ from ._runtime import (
|
|||
# Arbiter as Registry,
|
||||
async_main,
|
||||
)
|
||||
from . import _debug
|
||||
from .devx import _debug
|
||||
from . import _spawn
|
||||
from . import _state
|
||||
from . import log
|
||||
|
@ -47,8 +46,14 @@ from ._exceptions import is_multi_cancelled
|
|||
|
||||
|
||||
# set at startup and after forks
|
||||
_default_arbiter_host: str = '127.0.0.1'
|
||||
_default_arbiter_port: int = 1616
|
||||
_default_host: str = '127.0.0.1'
|
||||
_default_port: int = 1616
|
||||
|
||||
# default registry always on localhost
|
||||
_default_lo_addrs: list[tuple[str, int]] = [(
|
||||
_default_host,
|
||||
_default_port,
|
||||
)]
|
||||
|
||||
|
||||
logger = log.get_logger('tractor')
|
||||
|
@ -59,10 +64,10 @@ async def open_root_actor(
|
|||
|
||||
*,
|
||||
# defaults are above
|
||||
arbiter_addr: tuple[str, int] | None = None,
|
||||
registry_addrs: list[tuple[str, int]] | None = None,
|
||||
|
||||
# defaults are above
|
||||
registry_addr: tuple[str, int] | None = None,
|
||||
arbiter_addr: tuple[str, int] | None = None,
|
||||
|
||||
name: str | None = 'root',
|
||||
|
||||
|
@ -80,7 +85,11 @@ async def open_root_actor(
|
|||
enable_modules: list | None = None,
|
||||
rpc_module_paths: list | None = None,
|
||||
|
||||
) -> typing.Any:
|
||||
# NOTE: allow caller to ensure that only one registry exists
|
||||
# and that this call creates it.
|
||||
ensure_registry: bool = False,
|
||||
|
||||
) -> Actor:
|
||||
'''
|
||||
Runtime init entry point for ``tractor``.
|
||||
|
||||
|
@ -90,7 +99,7 @@ async def open_root_actor(
|
|||
# https://github.com/python-trio/trio/issues/1155#issuecomment-742964018
|
||||
builtin_bp_handler = sys.breakpointhook
|
||||
orig_bp_path: str | None = os.environ.get('PYTHONBREAKPOINT', None)
|
||||
os.environ['PYTHONBREAKPOINT'] = 'tractor._debug.pause_from_sync'
|
||||
os.environ['PYTHONBREAKPOINT'] = 'tractor.devx._debug.pause_from_sync'
|
||||
|
||||
# attempt to retreive ``trio``'s sigint handler and stash it
|
||||
# on our debugger lock state.
|
||||
|
@ -116,20 +125,19 @@ async def open_root_actor(
|
|||
|
||||
if arbiter_addr is not None:
|
||||
warnings.warn(
|
||||
'`arbiter_addr` is now deprecated and has been renamed to'
|
||||
'`registry_addr`.\nUse that instead..',
|
||||
'`arbiter_addr` is now deprecated\n'
|
||||
'Use `registry_addrs: list[tuple]` instead..',
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
registry_addrs = [arbiter_addr]
|
||||
|
||||
registry_addr = (host, port) = (
|
||||
registry_addr
|
||||
or arbiter_addr
|
||||
or (
|
||||
_default_arbiter_host,
|
||||
_default_arbiter_port,
|
||||
)
|
||||
registry_addrs: list[tuple[str, int]] = (
|
||||
registry_addrs
|
||||
or
|
||||
_default_lo_addrs
|
||||
)
|
||||
assert registry_addrs
|
||||
|
||||
loglevel = (loglevel or log._default_loglevel).upper()
|
||||
|
||||
|
@ -138,7 +146,7 @@ async def open_root_actor(
|
|||
|
||||
# expose internal debug module to every actor allowing
|
||||
# for use of ``await tractor.breakpoint()``
|
||||
enable_modules.append('tractor._debug')
|
||||
enable_modules.append('tractor.devx._debug')
|
||||
|
||||
# if debug mode get's enabled *at least* use that level of
|
||||
# logging for some informative console prompts.
|
||||
|
@ -158,73 +166,131 @@ async def open_root_actor(
|
|||
|
||||
log.get_console_log(loglevel)
|
||||
|
||||
try:
|
||||
# make a temporary connection to see if an arbiter exists,
|
||||
# if one can't be made quickly we assume none exists.
|
||||
arbiter_found = False
|
||||
# closed into below ping task-func
|
||||
ponged_addrs: list[tuple[str, int]] = []
|
||||
|
||||
# TODO: this connect-and-bail forces us to have to carefully
|
||||
# rewrap TCP 104-connection-reset errors as EOF so as to avoid
|
||||
# propagating cancel-causing errors to the channel-msg loop
|
||||
# machinery. Likely it would be better to eventually have
|
||||
# a "discovery" protocol with basic handshake instead.
|
||||
with trio.move_on_after(1):
|
||||
async with _connect_chan(host, port):
|
||||
arbiter_found = True
|
||||
async def ping_tpt_socket(
|
||||
addr: tuple[str, int],
|
||||
timeout: float = 1,
|
||||
) -> None:
|
||||
'''
|
||||
Attempt temporary connection to see if a registry is
|
||||
listening at the requested address by a tranport layer
|
||||
ping.
|
||||
|
||||
If a connection can't be made quickly we assume none no
|
||||
server is listening at that addr.
|
||||
|
||||
'''
|
||||
try:
|
||||
# TODO: this connect-and-bail forces us to have to
|
||||
# carefully rewrap TCP 104-connection-reset errors as
|
||||
# EOF so as to avoid propagating cancel-causing errors
|
||||
# to the channel-msg loop machinery. Likely it would
|
||||
# be better to eventually have a "discovery" protocol
|
||||
# with basic handshake instead?
|
||||
with trio.move_on_after(timeout):
|
||||
async with _connect_chan(*addr):
|
||||
ponged_addrs.append(addr)
|
||||
|
||||
except OSError:
|
||||
# TODO: make this a "discovery" log level?
|
||||
logger.warning(f"No actor registry found @ {host}:{port}")
|
||||
logger.warning(f'No actor registry found @ {addr}')
|
||||
|
||||
# create a local actor and start up its main routine/task
|
||||
if arbiter_found:
|
||||
async with trio.open_nursery() as tn:
|
||||
for addr in registry_addrs:
|
||||
tn.start_soon(
|
||||
ping_tpt_socket,
|
||||
tuple(addr), # TODO: just drop this requirement?
|
||||
)
|
||||
|
||||
trans_bind_addrs: list[tuple[str, int]] = []
|
||||
|
||||
# Create a new local root-actor instance which IS NOT THE
|
||||
# REGISTRAR
|
||||
if ponged_addrs:
|
||||
|
||||
if ensure_registry:
|
||||
raise RuntimeError(
|
||||
f'Failed to open `{name}`@{ponged_addrs}: '
|
||||
'registry socket(s) already bound'
|
||||
)
|
||||
|
||||
# we were able to connect to an arbiter
|
||||
logger.info(f"Arbiter seems to exist @ {host}:{port}")
|
||||
logger.info(
|
||||
f'Registry(s) seem(s) to exist @ {ponged_addrs}'
|
||||
)
|
||||
|
||||
actor = Actor(
|
||||
name or 'anonymous',
|
||||
arbiter_addr=registry_addr,
|
||||
name=name or 'anonymous',
|
||||
registry_addrs=ponged_addrs,
|
||||
loglevel=loglevel,
|
||||
enable_modules=enable_modules,
|
||||
)
|
||||
host, port = (host, 0)
|
||||
# DO NOT use the registry_addrs as the transport server
|
||||
# addrs for this new non-registar, root-actor.
|
||||
for host, port in ponged_addrs:
|
||||
# NOTE: zero triggers dynamic OS port allocation
|
||||
trans_bind_addrs.append((host, 0))
|
||||
|
||||
# Start this local actor as the "registrar", aka a regular
|
||||
# actor who manages the local registry of "mailboxes" of
|
||||
# other process-tree-local sub-actors.
|
||||
else:
|
||||
# start this local actor as the arbiter (aka a regular actor who
|
||||
# manages the local registry of "mailboxes")
|
||||
|
||||
# Note that if the current actor is the arbiter it is desirable
|
||||
# for it to stay up indefinitely until a re-election process has
|
||||
# taken place - which is not implemented yet FYI).
|
||||
# NOTE that if the current actor IS THE REGISTAR, the
|
||||
# following init steps are taken:
|
||||
# - the tranport layer server is bound to each (host, port)
|
||||
# pair defined in provided registry_addrs, or the default.
|
||||
trans_bind_addrs = registry_addrs
|
||||
|
||||
# - it is normally desirable for any registrar to stay up
|
||||
# indefinitely until either all registered (child/sub)
|
||||
# actors are terminated (via SC supervision) or,
|
||||
# a re-election process has taken place.
|
||||
# NOTE: all of ^ which is not implemented yet - see:
|
||||
# https://github.com/goodboy/tractor/issues/216
|
||||
# https://github.com/goodboy/tractor/pull/348
|
||||
# https://github.com/goodboy/tractor/issues/296
|
||||
|
||||
actor = Arbiter(
|
||||
name or 'arbiter',
|
||||
arbiter_addr=registry_addr,
|
||||
name or 'registrar',
|
||||
registry_addrs=registry_addrs,
|
||||
loglevel=loglevel,
|
||||
enable_modules=enable_modules,
|
||||
)
|
||||
|
||||
# Start up main task set via core actor-runtime nurseries.
|
||||
try:
|
||||
# assign process-local actor
|
||||
_state._current_actor = actor
|
||||
|
||||
# start local channel-server and fake the portal API
|
||||
# NOTE: this won't block since we provide the nursery
|
||||
logger.info(f"Starting local {actor} @ {host}:{port}")
|
||||
ml_addrs_str: str = '\n'.join(
|
||||
f'@{addr}' for addr in trans_bind_addrs
|
||||
)
|
||||
logger.info(
|
||||
f'Starting local {actor.uid} on the following transport addrs:\n'
|
||||
f'{ml_addrs_str}'
|
||||
)
|
||||
|
||||
# start the actor runtime in a new task
|
||||
async with trio.open_nursery() as nursery:
|
||||
|
||||
# ``_runtime.async_main()`` creates an internal nursery and
|
||||
# thus blocks here until the entire underlying actor tree has
|
||||
# terminated thereby conducting structured concurrency.
|
||||
|
||||
# ``_runtime.async_main()`` creates an internal nursery
|
||||
# and blocks here until any underlying actor(-process)
|
||||
# tree has terminated thereby conducting so called
|
||||
# "end-to-end" structured concurrency throughout an
|
||||
# entire hierarchical python sub-process set; all
|
||||
# "actor runtime" primitives are SC-compat and thus all
|
||||
# transitively spawned actors/processes must be as
|
||||
# well.
|
||||
await nursery.start(
|
||||
partial(
|
||||
async_main,
|
||||
actor,
|
||||
accept_addr=(host, port),
|
||||
accept_addrs=trans_bind_addrs,
|
||||
parent_addr=None
|
||||
)
|
||||
)
|
||||
|
@ -236,7 +302,7 @@ async def open_root_actor(
|
|||
BaseExceptionGroup,
|
||||
) as err:
|
||||
|
||||
entered = await _debug._maybe_enter_pm(err)
|
||||
entered: bool = await _debug._maybe_enter_pm(err)
|
||||
if (
|
||||
not entered
|
||||
and
|
||||
|
@ -244,7 +310,8 @@ async def open_root_actor(
|
|||
):
|
||||
logger.exception('Root actor crashed:\n')
|
||||
|
||||
# always re-raise
|
||||
# ALWAYS re-raise any error bubbled up from the
|
||||
# runtime!
|
||||
raise
|
||||
|
||||
finally:
|
||||
|
@ -265,7 +332,7 @@ async def open_root_actor(
|
|||
_state._current_actor = None
|
||||
_state._last_actor_terminated = actor
|
||||
|
||||
# restore breakpoint hook state
|
||||
# restore built-in `breakpoint()` hook state
|
||||
sys.breakpointhook = builtin_bp_handler
|
||||
if orig_bp_path is not None:
|
||||
os.environ['PYTHONBREAKPOINT'] = orig_bp_path
|
||||
|
@ -281,10 +348,7 @@ def run_daemon(
|
|||
|
||||
# runtime kwargs
|
||||
name: str | None = 'root',
|
||||
registry_addr: tuple[str, int] = (
|
||||
_default_arbiter_host,
|
||||
_default_arbiter_port,
|
||||
),
|
||||
registry_addrs: list[tuple[str, int]] = _default_lo_addrs,
|
||||
|
||||
start_method: str | None = None,
|
||||
debug_mode: bool = False,
|
||||
|
@ -308,7 +372,7 @@ def run_daemon(
|
|||
async def _main():
|
||||
|
||||
async with open_root_actor(
|
||||
registry_addr=registry_addr,
|
||||
registry_addrs=registry_addrs,
|
||||
name=name,
|
||||
start_method=start_method,
|
||||
debug_mode=debug_mode,
|
||||
|
|
|
@ -45,6 +45,7 @@ from functools import partial
|
|||
from itertools import chain
|
||||
import importlib
|
||||
import importlib.util
|
||||
import os
|
||||
from pprint import pformat
|
||||
import signal
|
||||
import sys
|
||||
|
@ -55,7 +56,7 @@ from typing import (
|
|||
)
|
||||
import uuid
|
||||
from types import ModuleType
|
||||
import os
|
||||
import warnings
|
||||
|
||||
import trio
|
||||
from trio import (
|
||||
|
@ -77,8 +78,8 @@ from ._exceptions import (
|
|||
ContextCancelled,
|
||||
TransportClosed,
|
||||
)
|
||||
from ._discovery import get_arbiter
|
||||
from . import _debug
|
||||
from .devx import _debug
|
||||
from ._discovery import get_registry
|
||||
from ._portal import Portal
|
||||
from . import _state
|
||||
from . import _mp_fixup_main
|
||||
|
@ -127,6 +128,11 @@ class Actor:
|
|||
# ugh, we need to get rid of this and replace with a "registry" sys
|
||||
# https://github.com/goodboy/tractor/issues/216
|
||||
is_arbiter: bool = False
|
||||
|
||||
@property
|
||||
def is_registrar(self) -> bool:
|
||||
return self.is_arbiter
|
||||
|
||||
msg_buffer_size: int = 2**6
|
||||
|
||||
# nursery placeholders filled in by `async_main()` after fork
|
||||
|
@ -164,8 +170,12 @@ class Actor:
|
|||
enable_modules: list[str] = [],
|
||||
uid: str | None = None,
|
||||
loglevel: str | None = None,
|
||||
registry_addrs: list[tuple[str, int]] | None = None,
|
||||
spawn_method: str | None = None,
|
||||
|
||||
# TODO: remove!
|
||||
arbiter_addr: tuple[str, int] | None = None,
|
||||
spawn_method: str | None = None
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
This constructor is called in the parent actor **before** the spawning
|
||||
|
@ -187,29 +197,32 @@ class Actor:
|
|||
self._parent_main_data = _mp_fixup_main._mp_figure_out_main()
|
||||
|
||||
# always include debugging tools module
|
||||
enable_modules.append('tractor._debug')
|
||||
enable_modules.append('tractor.devx._debug')
|
||||
|
||||
mods = {}
|
||||
self.enable_modules: dict[str, str] = {}
|
||||
for name in enable_modules:
|
||||
mod = importlib.import_module(name)
|
||||
mods[name] = _get_mod_abspath(mod)
|
||||
mod: ModuleType = importlib.import_module(name)
|
||||
self.enable_modules[name] = _get_mod_abspath(mod)
|
||||
|
||||
self.enable_modules = mods
|
||||
self._mods: dict[str, ModuleType] = {}
|
||||
self.loglevel = loglevel
|
||||
self.loglevel: str = loglevel
|
||||
|
||||
self._arb_addr: tuple[str, int] | None = (
|
||||
str(arbiter_addr[0]),
|
||||
int(arbiter_addr[1])
|
||||
) if arbiter_addr else None
|
||||
if arbiter_addr is not None:
|
||||
warnings.warn(
|
||||
'`Actor(arbiter_addr=<blah>)` is now deprecated.\n'
|
||||
'Use `registry_addrs: list[tuple]` instead.',
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
registry_addrs: list[tuple[str, int]] = [arbiter_addr]
|
||||
|
||||
# marked by the process spawning backend at startup
|
||||
# will be None for the parent most process started manually
|
||||
# by the user (currently called the "arbiter")
|
||||
self._spawn_method = spawn_method
|
||||
self._spawn_method: str = spawn_method
|
||||
|
||||
self._peers: defaultdict = defaultdict(list)
|
||||
self._peer_connected: dict = {}
|
||||
self._peer_connected: dict[tuple[str, str], trio.Event] = {}
|
||||
self._no_more_peers = trio.Event()
|
||||
self._no_more_peers.set()
|
||||
self._ongoing_rpc_tasks = trio.Event()
|
||||
|
@ -239,6 +252,44 @@ class Actor:
|
|||
ActorNursery | None,
|
||||
] = {} # type: ignore # noqa
|
||||
|
||||
# when provided, init the registry addresses property from
|
||||
# input via the validator.
|
||||
self._reg_addrs: list[tuple[str, int]] = []
|
||||
if registry_addrs:
|
||||
self.reg_addrs: list[tuple[str, int]] = registry_addrs
|
||||
|
||||
@property
|
||||
def reg_addrs(self) -> list[tuple[str, int]]:
|
||||
'''
|
||||
List of (socket) addresses for all known (and contactable)
|
||||
registry actors.
|
||||
|
||||
'''
|
||||
return self._reg_addrs
|
||||
|
||||
@reg_addrs.setter
|
||||
def reg_addrs(
|
||||
self,
|
||||
addrs: list[tuple[str, int]],
|
||||
) -> None:
|
||||
if not addrs:
|
||||
log.warning(
|
||||
'Empty registry address list is invalid:\n'
|
||||
f'{addrs}'
|
||||
)
|
||||
return
|
||||
|
||||
# always sanity check the input list since it's critical
|
||||
# that addrs are correct for discovery sys operation.
|
||||
for addr in addrs:
|
||||
if not isinstance(addr, tuple):
|
||||
raise ValueError(
|
||||
'Expected `Actor.reg_addrs: list[tuple[str, int]]`\n'
|
||||
f'Got {addrs}'
|
||||
)
|
||||
|
||||
self._reg_addrs = addrs
|
||||
|
||||
async def wait_for_peer(
|
||||
self, uid: tuple[str, str]
|
||||
) -> tuple[trio.Event, Channel]:
|
||||
|
@ -336,6 +387,12 @@ class Actor:
|
|||
self._no_more_peers = trio.Event() # unset by making new
|
||||
chan = Channel.from_stream(stream)
|
||||
their_uid: tuple[str, str]|None = chan.uid
|
||||
if their_uid:
|
||||
log.warning(
|
||||
f'Re-connection from already known {their_uid}'
|
||||
)
|
||||
else:
|
||||
log.runtime(f'New connection to us @{chan.raddr}')
|
||||
|
||||
con_msg: str = ''
|
||||
if their_uid:
|
||||
|
@ -517,16 +574,19 @@ class Actor:
|
|||
|
||||
if disconnected:
|
||||
# if the transport died and this actor is still
|
||||
# registered within a local nursery, we report that the
|
||||
# IPC layer may have failed unexpectedly since it may be
|
||||
# the cause of other downstream errors.
|
||||
# registered within a local nursery, we report
|
||||
# that the IPC layer may have failed
|
||||
# unexpectedly since it may be the cause of
|
||||
# other downstream errors.
|
||||
entry = local_nursery._children.get(uid)
|
||||
if entry:
|
||||
proc: trio.Process
|
||||
_, proc, _ = entry
|
||||
|
||||
poll = getattr(proc, 'poll', None)
|
||||
if poll and poll() is None:
|
||||
if (
|
||||
(poll := getattr(proc, 'poll', None))
|
||||
and poll() is None
|
||||
):
|
||||
log.cancel(
|
||||
f'Peer IPC broke but subproc is alive?\n\n'
|
||||
|
||||
|
@ -880,11 +940,11 @@ class Actor:
|
|||
)
|
||||
await chan.connect()
|
||||
|
||||
# TODO: move this into a `Channel.handshake()`?
|
||||
# Initial handshake: swap names.
|
||||
await self._do_handshake(chan)
|
||||
|
||||
accept_addr: tuple[str, int] | None = None
|
||||
|
||||
accept_addrs: list[tuple[str, int]] | None = None
|
||||
if self._spawn_method == "trio":
|
||||
# Receive runtime state from our parent
|
||||
parent_data: dict[str, Any]
|
||||
|
@ -897,10 +957,7 @@ class Actor:
|
|||
# if "trace"/"util" mode is enabled?
|
||||
f'{pformat(parent_data)}\n'
|
||||
)
|
||||
accept_addr = (
|
||||
parent_data.pop('bind_host'),
|
||||
parent_data.pop('bind_port'),
|
||||
)
|
||||
accept_addrs: list[tuple[str, int]] = parent_data.pop('bind_addrs')
|
||||
rvs = parent_data.pop('_runtime_vars')
|
||||
|
||||
if rvs['_debug_mode']:
|
||||
|
@ -918,18 +975,23 @@ class Actor:
|
|||
_state._runtime_vars.update(rvs)
|
||||
|
||||
for attr, value in parent_data.items():
|
||||
|
||||
if attr == '_arb_addr':
|
||||
if (
|
||||
attr == 'reg_addrs'
|
||||
and value
|
||||
):
|
||||
# XXX: ``msgspec`` doesn't support serializing tuples
|
||||
# so just cash manually here since it's what our
|
||||
# internals expect.
|
||||
value = tuple(value) if value else None
|
||||
self._arb_addr = value
|
||||
# TODO: we don't really NEED these as
|
||||
# tuples so we can probably drop this
|
||||
# casting since apparently in python lists
|
||||
# are "more efficient"?
|
||||
self.reg_addrs = [tuple(val) for val in value]
|
||||
|
||||
else:
|
||||
setattr(self, attr, value)
|
||||
|
||||
return chan, accept_addr
|
||||
return chan, accept_addrs
|
||||
|
||||
except OSError: # failed to connect
|
||||
log.warning(
|
||||
|
@ -946,9 +1008,9 @@ class Actor:
|
|||
handler_nursery: Nursery,
|
||||
*,
|
||||
# (host, port) to bind for channel server
|
||||
accept_host: tuple[str, int] | None = None,
|
||||
accept_port: int = 0,
|
||||
task_status: TaskStatus[trio.Nursery] = trio.TASK_STATUS_IGNORED,
|
||||
listen_sockaddrs: list[tuple[str, int]] | None = None,
|
||||
|
||||
task_status: TaskStatus[Nursery] = trio.TASK_STATUS_IGNORED,
|
||||
) -> None:
|
||||
'''
|
||||
Start the IPC transport server, begin listening for new connections.
|
||||
|
@ -958,18 +1020,26 @@ class Actor:
|
|||
`.cancel_server()` is called.
|
||||
|
||||
'''
|
||||
if listen_sockaddrs is None:
|
||||
listen_sockaddrs = [(None, 0)]
|
||||
|
||||
self._server_down = trio.Event()
|
||||
try:
|
||||
async with trio.open_nursery() as server_n:
|
||||
|
||||
for host, port in listen_sockaddrs:
|
||||
listeners: list[trio.abc.Listener] = await server_n.start(
|
||||
partial(
|
||||
trio.serve_tcp,
|
||||
self._stream_handler,
|
||||
# new connections will stay alive even if this server
|
||||
# is cancelled
|
||||
|
||||
handler=self._stream_handler,
|
||||
port=port,
|
||||
host=host,
|
||||
|
||||
# NOTE: configured such that new
|
||||
# connections will stay alive even if
|
||||
# this server is cancelled!
|
||||
handler_nursery=handler_nursery,
|
||||
port=accept_port,
|
||||
host=accept_host,
|
||||
)
|
||||
)
|
||||
sockets: list[trio.socket] = [
|
||||
|
@ -981,7 +1051,9 @@ class Actor:
|
|||
f'|_{sockets}\n'
|
||||
)
|
||||
self._listeners.extend(listeners)
|
||||
|
||||
task_status.started(server_n)
|
||||
|
||||
finally:
|
||||
# signal the server is down since nursery above terminated
|
||||
self._server_down.set()
|
||||
|
@ -1319,6 +1391,18 @@ class Actor:
|
|||
self._server_n.cancel_scope.cancel()
|
||||
|
||||
@property
|
||||
def accept_addrs(self) -> list[tuple[str, int]]:
|
||||
'''
|
||||
All addresses to which the transport-channel server binds
|
||||
and listens for new connections.
|
||||
|
||||
'''
|
||||
# throws OSError on failure
|
||||
return [
|
||||
listener.socket.getsockname()
|
||||
for listener in self._listeners
|
||||
] # type: ignore
|
||||
|
||||
@property
|
||||
def accept_addr(self) -> tuple[str, int]:
|
||||
'''
|
||||
|
@ -1327,7 +1411,7 @@ class Actor:
|
|||
|
||||
'''
|
||||
# throws OSError on failure
|
||||
return self._listeners[0].socket.getsockname() # type: ignore
|
||||
return self.accept_addrs[0]
|
||||
|
||||
def get_parent(self) -> Portal:
|
||||
'''
|
||||
|
@ -1344,6 +1428,7 @@ class Actor:
|
|||
'''
|
||||
return self._peers[uid]
|
||||
|
||||
# TODO: move to `Channel.handshake(uid)`
|
||||
async def _do_handshake(
|
||||
self,
|
||||
chan: Channel
|
||||
|
@ -1380,7 +1465,7 @@ class Actor:
|
|||
|
||||
async def async_main(
|
||||
actor: Actor,
|
||||
accept_addr: tuple[str, int] | None = None,
|
||||
accept_addrs: tuple[str, int] | None = None,
|
||||
|
||||
# XXX: currently ``parent_addr`` is only needed for the
|
||||
# ``multiprocessing`` backend (which pickles state sent to
|
||||
|
@ -1408,20 +1493,25 @@ async def async_main(
|
|||
# on our debugger lock state.
|
||||
_debug.Lock._trio_handler = signal.getsignal(signal.SIGINT)
|
||||
|
||||
registered_with_arbiter = False
|
||||
is_registered: bool = False
|
||||
try:
|
||||
|
||||
# establish primary connection with immediate parent
|
||||
actor._parent_chan = None
|
||||
actor._parent_chan: Channel | None = None
|
||||
if parent_addr is not None:
|
||||
|
||||
actor._parent_chan, accept_addr_rent = await actor._from_parent(
|
||||
parent_addr)
|
||||
(
|
||||
actor._parent_chan,
|
||||
set_accept_addr_says_rent,
|
||||
) = await actor._from_parent(parent_addr)
|
||||
|
||||
# either it's passed in because we're not a child
|
||||
# or because we're running in mp mode
|
||||
if accept_addr_rent is not None:
|
||||
accept_addr = accept_addr_rent
|
||||
# either it's passed in because we're not a child or
|
||||
# because we're running in mp mode
|
||||
if (
|
||||
set_accept_addr_says_rent
|
||||
and set_accept_addr_says_rent is not None
|
||||
):
|
||||
accept_addrs = set_accept_addr_says_rent
|
||||
|
||||
# The "root" nursery ensures the channel with the immediate
|
||||
# parent is kept alive as a resilient service until
|
||||
|
@ -1461,34 +1551,72 @@ async def async_main(
|
|||
# - subactor: the bind address is sent by our parent
|
||||
# over our established channel
|
||||
# - root actor: the ``accept_addr`` passed to this method
|
||||
assert accept_addr
|
||||
host, port = accept_addr
|
||||
assert accept_addrs
|
||||
|
||||
try:
|
||||
actor._server_n = await service_nursery.start(
|
||||
partial(
|
||||
actor._serve_forever,
|
||||
service_nursery,
|
||||
accept_host=host,
|
||||
accept_port=port
|
||||
listen_sockaddrs=accept_addrs,
|
||||
)
|
||||
)
|
||||
accept_addr = actor.accept_addr
|
||||
except OSError as oserr:
|
||||
# NOTE: always allow runtime hackers to debug
|
||||
# tranport address bind errors - normally it's
|
||||
# something silly like the wrong socket-address
|
||||
# passed via a config or CLI Bo
|
||||
entered_debug: bool = await _debug._maybe_enter_pm(oserr)
|
||||
if not entered_debug:
|
||||
log.exception('Failed to init IPC channel server !?\n')
|
||||
raise
|
||||
|
||||
accept_addrs: list[tuple[str, int]] = actor.accept_addrs
|
||||
|
||||
# NOTE: only set the loopback addr for the
|
||||
# process-tree-global "root" mailbox since
|
||||
# all sub-actors should be able to speak to
|
||||
# their root actor over that channel.
|
||||
if _state._runtime_vars['_is_root']:
|
||||
_state._runtime_vars['_root_mailbox'] = accept_addr
|
||||
for addr in accept_addrs:
|
||||
host, _ = addr
|
||||
# TODO: generic 'lo' detector predicate
|
||||
if '127.0.0.1' in host:
|
||||
_state._runtime_vars['_root_mailbox'] = addr
|
||||
|
||||
# Register with the arbiter if we're told its addr
|
||||
log.runtime(f"Registering {actor} for role `{actor.name}`")
|
||||
assert isinstance(actor._arb_addr, tuple)
|
||||
log.runtime(
|
||||
f'Registering `{actor.name}` ->\n'
|
||||
f'{pformat(accept_addrs)}'
|
||||
)
|
||||
|
||||
async with get_arbiter(*actor._arb_addr) as arb_portal:
|
||||
await arb_portal.run_from_ns(
|
||||
# TODO: ideally we don't fan out to all registrars
|
||||
# if addresses point to the same actor..
|
||||
# So we need a way to detect that? maybe iterate
|
||||
# only on unique actor uids?
|
||||
for addr in actor.reg_addrs:
|
||||
try:
|
||||
assert isinstance(addr, tuple)
|
||||
assert addr[1] # non-zero after bind
|
||||
except AssertionError:
|
||||
await _debug.pause()
|
||||
|
||||
async with get_registry(*addr) as reg_portal:
|
||||
for accept_addr in accept_addrs:
|
||||
|
||||
if not accept_addr[1]:
|
||||
await _debug.pause()
|
||||
|
||||
assert accept_addr[1]
|
||||
|
||||
await reg_portal.run_from_ns(
|
||||
'self',
|
||||
'register_actor',
|
||||
uid=actor.uid,
|
||||
sockaddr=accept_addr,
|
||||
)
|
||||
|
||||
registered_with_arbiter = True
|
||||
is_registered: bool = True
|
||||
|
||||
# init steps complete
|
||||
task_status.started()
|
||||
|
@ -1521,18 +1649,20 @@ async def async_main(
|
|||
log.runtime("Closing all actor lifetime contexts")
|
||||
actor.lifetime_stack.close()
|
||||
|
||||
if not registered_with_arbiter:
|
||||
if not is_registered:
|
||||
# TODO: I guess we could try to connect back
|
||||
# to the parent through a channel and engage a debugger
|
||||
# once we have that all working with std streams locking?
|
||||
log.exception(
|
||||
f"Actor errored and failed to register with arbiter "
|
||||
f"@ {actor._arb_addr}?")
|
||||
f"@ {actor.reg_addrs[0]}?")
|
||||
log.error(
|
||||
"\n\n\t^^^ THIS IS PROBABLY A TRACTOR BUGGGGG!!! ^^^\n"
|
||||
"\tCALMLY CALL THE AUTHORITIES AND HIDE YOUR CHILDREN.\n\n"
|
||||
"\tYOUR PARENT CODE IS GOING TO KEEP WORKING FINE!!!\n"
|
||||
"\tTHIS IS HOW RELIABlE SYSTEMS ARE SUPPOSED TO WORK!?!?\n"
|
||||
"\n\n\t^^^ THIS IS PROBABLY AN INTERNAL `tractor` BUG! ^^^\n\n"
|
||||
"\t>> CALMLY CALL THE AUTHORITIES AND HIDE YOUR CHILDREN <<\n\n"
|
||||
"\tIf this is a sub-actor hopefully its parent will keep running "
|
||||
"correctly presuming this error was safely ignored..\n\n"
|
||||
"\tPLEASE REPORT THIS TRACEBACK IN A BUG REPORT: "
|
||||
"https://github.com/goodboy/tractor/issues\n"
|
||||
)
|
||||
|
||||
if actor._parent_chan:
|
||||
|
@ -1572,16 +1702,19 @@ async def async_main(
|
|||
|
||||
# Unregister actor from the registry-sys / registrar.
|
||||
if (
|
||||
registered_with_arbiter
|
||||
and not actor.is_arbiter
|
||||
is_registered
|
||||
and not actor.is_registrar
|
||||
):
|
||||
failed = False
|
||||
assert isinstance(actor._arb_addr, tuple)
|
||||
failed: bool = False
|
||||
for addr in actor.reg_addrs:
|
||||
assert isinstance(addr, tuple)
|
||||
with trio.move_on_after(0.5) as cs:
|
||||
cs.shield = True
|
||||
try:
|
||||
async with get_arbiter(*actor._arb_addr) as arb_portal:
|
||||
await arb_portal.run_from_ns(
|
||||
async with get_registry(
|
||||
*addr,
|
||||
) as reg_portal:
|
||||
await reg_portal.run_from_ns(
|
||||
'self',
|
||||
'unregister_actor',
|
||||
uid=actor.uid
|
||||
|
@ -1590,9 +1723,12 @@ async def async_main(
|
|||
failed = True
|
||||
if cs.cancelled_caught:
|
||||
failed = True
|
||||
|
||||
if failed:
|
||||
log.warning(
|
||||
f"Failed to unregister {actor.name} from arbiter")
|
||||
f'Failed to unregister {actor.name} from '
|
||||
f'registar @ {addr}'
|
||||
)
|
||||
|
||||
# Ensure all peers (actors connected to us as clients) are finished
|
||||
if not actor._no_more_peers.is_set():
|
||||
|
@ -1611,18 +1747,36 @@ async def async_main(
|
|||
# TODO: rename to `Registry` and move to `._discovery`!
|
||||
class Arbiter(Actor):
|
||||
'''
|
||||
A special actor who knows all the other actors and always has
|
||||
access to a top level nursery.
|
||||
A special registrar actor who can contact all other actors
|
||||
within its immediate process tree and possibly keeps a registry
|
||||
of others meant to be discoverable in a distributed
|
||||
application. Normally the registrar is also the "root actor"
|
||||
and thus always has access to the top-most-level actor
|
||||
(process) nursery.
|
||||
|
||||
The arbiter is by default the first actor spawned on each host
|
||||
and is responsible for keeping track of all other actors for
|
||||
coordination purposes. If a new main process is launched and an
|
||||
arbiter is already running that arbiter will be used.
|
||||
By default, the registrar is always initialized when and if no
|
||||
other registrar socket addrs have been specified to runtime
|
||||
init entry-points (such as `open_root_actor()` or
|
||||
`open_nursery()`). Any time a new main process is launched (and
|
||||
thus thus a new root actor created) and, no existing registrar
|
||||
can be contacted at the provided `registry_addr`, then a new
|
||||
one is always created; however, if one can be reached it is
|
||||
used.
|
||||
|
||||
Normally a distributed app requires at least registrar per
|
||||
logical host where for that given "host space" (aka localhost
|
||||
IPC domain of addresses) it is responsible for making all other
|
||||
host (local address) bound actors *discoverable* to external
|
||||
actor trees running on remote hosts.
|
||||
|
||||
'''
|
||||
is_arbiter = True
|
||||
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
*args,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
|
||||
self._registry: dict[
|
||||
tuple[str, str],
|
||||
|
@ -1664,7 +1818,10 @@ class Arbiter(Actor):
|
|||
# unpacker since we have tuples as keys (not this makes the
|
||||
# arbiter suscetible to hashdos):
|
||||
# https://github.com/msgpack/msgpack-python#major-breaking-changes-in-msgpack-10
|
||||
return {'.'.join(key): val for key, val in self._registry.items()}
|
||||
return {
|
||||
'.'.join(key): val
|
||||
for key, val in self._registry.items()
|
||||
}
|
||||
|
||||
async def wait_for_actor(
|
||||
self,
|
||||
|
@ -1707,8 +1864,15 @@ class Arbiter(Actor):
|
|||
sockaddr: tuple[str, int]
|
||||
|
||||
) -> None:
|
||||
uid = name, _ = (str(uid[0]), str(uid[1]))
|
||||
self._registry[uid] = (str(sockaddr[0]), int(sockaddr[1]))
|
||||
uid = name, hash = (str(uid[0]), str(uid[1]))
|
||||
addr = (host, port) = (
|
||||
str(sockaddr[0]),
|
||||
int(sockaddr[1]),
|
||||
)
|
||||
if port == 0:
|
||||
await _debug.pause()
|
||||
assert port # should never be 0-dynamic-os-alloc
|
||||
self._registry[uid] = addr
|
||||
|
||||
# pop and signal all waiter events
|
||||
events = self._waiters.pop(name, [])
|
||||
|
|
|
@ -34,7 +34,7 @@ from typing import (
|
|||
import trio
|
||||
from trio import TaskStatus
|
||||
|
||||
from ._debug import (
|
||||
from .devx._debug import (
|
||||
maybe_wait_for_debugger,
|
||||
acquire_debug_lock,
|
||||
)
|
||||
|
@ -220,6 +220,10 @@ async def hard_kill(
|
|||
# whilst also hacking on it XD
|
||||
# terminate_after: int = 99999,
|
||||
|
||||
# NOTE: for mucking with `.pause()`-ing inside the runtime
|
||||
# whilst also hacking on it XD
|
||||
# terminate_after: int = 99999,
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
Un-gracefully terminate an OS level `trio.Process` after timeout.
|
||||
|
@ -365,7 +369,7 @@ async def new_proc(
|
|||
errors: dict[tuple[str, str], Exception],
|
||||
|
||||
# passed through to actor main
|
||||
bind_addr: tuple[str, int],
|
||||
bind_addrs: list[tuple[str, int]],
|
||||
parent_addr: tuple[str, int],
|
||||
_runtime_vars: dict[str, Any], # serialized and sent to _child
|
||||
|
||||
|
@ -387,7 +391,7 @@ async def new_proc(
|
|||
actor_nursery,
|
||||
subactor,
|
||||
errors,
|
||||
bind_addr,
|
||||
bind_addrs,
|
||||
parent_addr,
|
||||
_runtime_vars, # run time vars
|
||||
infect_asyncio=infect_asyncio,
|
||||
|
@ -402,7 +406,7 @@ async def trio_proc(
|
|||
errors: dict[tuple[str, str], Exception],
|
||||
|
||||
# passed through to actor main
|
||||
bind_addr: tuple[str, int],
|
||||
bind_addrs: list[tuple[str, int]],
|
||||
parent_addr: tuple[str, int],
|
||||
_runtime_vars: dict[str, Any], # serialized and sent to _child
|
||||
*,
|
||||
|
@ -491,12 +495,11 @@ async def trio_proc(
|
|||
|
||||
# send additional init params
|
||||
await chan.send({
|
||||
"_parent_main_data": subactor._parent_main_data,
|
||||
"enable_modules": subactor.enable_modules,
|
||||
"_arb_addr": subactor._arb_addr,
|
||||
"bind_host": bind_addr[0],
|
||||
"bind_port": bind_addr[1],
|
||||
"_runtime_vars": _runtime_vars,
|
||||
'_parent_main_data': subactor._parent_main_data,
|
||||
'enable_modules': subactor.enable_modules,
|
||||
'reg_addrs': subactor.reg_addrs,
|
||||
'bind_addrs': bind_addrs,
|
||||
'_runtime_vars': _runtime_vars,
|
||||
})
|
||||
|
||||
# track subactor in current nursery
|
||||
|
@ -551,13 +554,14 @@ async def trio_proc(
|
|||
with trio.move_on_after(0.5):
|
||||
await proc.wait()
|
||||
|
||||
log.pdb(
|
||||
'Delaying subproc reaper while debugger locked..'
|
||||
)
|
||||
await maybe_wait_for_debugger(
|
||||
child_in_debug=_runtime_vars.get(
|
||||
'_debug_mode', False
|
||||
),
|
||||
header_msg=(
|
||||
'Delaying subproc reaper while debugger locked..\n'
|
||||
),
|
||||
|
||||
# TODO: need a diff value then default?
|
||||
# poll_steps=9999999,
|
||||
)
|
||||
|
@ -601,7 +605,7 @@ async def mp_proc(
|
|||
subactor: Actor,
|
||||
errors: dict[tuple[str, str], Exception],
|
||||
# passed through to actor main
|
||||
bind_addr: tuple[str, int],
|
||||
bind_addrs: list[tuple[str, int]],
|
||||
parent_addr: tuple[str, int],
|
||||
_runtime_vars: dict[str, Any], # serialized and sent to _child
|
||||
*,
|
||||
|
@ -659,7 +663,7 @@ async def mp_proc(
|
|||
target=_mp_main,
|
||||
args=(
|
||||
subactor,
|
||||
bind_addr,
|
||||
bind_addrs,
|
||||
fs_info,
|
||||
_spawn_method,
|
||||
parent_addr,
|
||||
|
|
|
@ -22,16 +22,13 @@ from contextlib import asynccontextmanager as acm
|
|||
from functools import partial
|
||||
import inspect
|
||||
from pprint import pformat
|
||||
from typing import (
|
||||
Optional,
|
||||
TYPE_CHECKING,
|
||||
)
|
||||
from typing import TYPE_CHECKING
|
||||
import typing
|
||||
import warnings
|
||||
|
||||
import trio
|
||||
|
||||
from ._debug import maybe_wait_for_debugger
|
||||
from .devx._debug import maybe_wait_for_debugger
|
||||
from ._state import current_actor, is_main_process
|
||||
from .log import get_logger, get_loglevel
|
||||
from ._runtime import Actor
|
||||
|
@ -97,7 +94,7 @@ class ActorNursery:
|
|||
tuple[
|
||||
Actor,
|
||||
trio.Process | mp.Process,
|
||||
Optional[Portal],
|
||||
Portal | None,
|
||||
]
|
||||
] = {}
|
||||
# portals spawned with ``run_in_actor()`` are
|
||||
|
@ -121,12 +118,12 @@ class ActorNursery:
|
|||
self,
|
||||
name: str,
|
||||
*,
|
||||
bind_addr: tuple[str, int] = _default_bind_addr,
|
||||
bind_addrs: list[tuple[str, int]] = [_default_bind_addr],
|
||||
rpc_module_paths: list[str] | None = None,
|
||||
enable_modules: list[str] | None = None,
|
||||
loglevel: str | None = None, # set log level per subactor
|
||||
nursery: trio.Nursery | None = None,
|
||||
debug_mode: Optional[bool] | None = None,
|
||||
debug_mode: bool | None = None,
|
||||
infect_asyncio: bool = False,
|
||||
) -> Portal:
|
||||
'''
|
||||
|
@ -161,7 +158,9 @@ class ActorNursery:
|
|||
# modules allowed to invoked funcs from
|
||||
enable_modules=enable_modules,
|
||||
loglevel=loglevel,
|
||||
arbiter_addr=current_actor()._arb_addr,
|
||||
|
||||
# verbatim relay this actor's registrar addresses
|
||||
registry_addrs=current_actor().reg_addrs,
|
||||
)
|
||||
parent_addr = self._actor.accept_addr
|
||||
assert parent_addr
|
||||
|
@ -178,7 +177,7 @@ class ActorNursery:
|
|||
self,
|
||||
subactor,
|
||||
self.errors,
|
||||
bind_addr,
|
||||
bind_addrs,
|
||||
parent_addr,
|
||||
_rtv, # run time vars
|
||||
infect_asyncio=infect_asyncio,
|
||||
|
@ -191,8 +190,8 @@ class ActorNursery:
|
|||
fn: typing.Callable,
|
||||
*,
|
||||
|
||||
name: Optional[str] = None,
|
||||
bind_addr: tuple[str, int] = _default_bind_addr,
|
||||
name: str | None = None,
|
||||
bind_addrs: tuple[str, int] = [_default_bind_addr],
|
||||
rpc_module_paths: list[str] | None = None,
|
||||
enable_modules: list[str] | None = None,
|
||||
loglevel: str | None = None, # set log level per subactor
|
||||
|
@ -221,7 +220,7 @@ class ActorNursery:
|
|||
enable_modules=[mod_path] + (
|
||||
enable_modules or rpc_module_paths or []
|
||||
),
|
||||
bind_addr=bind_addr,
|
||||
bind_addrs=bind_addrs,
|
||||
loglevel=loglevel,
|
||||
# use the run_in_actor nursery
|
||||
nursery=self._ria_nursery,
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
# tractor: structured concurrent "actors".
|
||||
# Copyright 2018-eternity Tyler Goodlet.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
"""
|
||||
Runtime "developer experience" utils and addons to aid our
|
||||
(advanced) users and core devs in building distributed applications
|
||||
and working with/on the actor runtime.
|
||||
|
||||
"""
|
||||
from ._debug import (
|
||||
maybe_wait_for_debugger as maybe_wait_for_debugger,
|
||||
acquire_debug_lock as acquire_debug_lock,
|
||||
breakpoint as breakpoint,
|
||||
pause as pause,
|
||||
pause_from_sync as pause_from_sync,
|
||||
shield_sigint_handler as shield_sigint_handler,
|
||||
MultiActorPdb as MultiActorPdb,
|
||||
open_crash_handler as open_crash_handler,
|
||||
maybe_open_crash_handler as maybe_open_crash_handler,
|
||||
post_mortem as post_mortem,
|
||||
)
|
||||
from ._stackscope import (
|
||||
enable_stack_on_sig as enable_stack_on_sig,
|
||||
)
|
|
@ -1,18 +1,19 @@
|
|||
# tractor: structured concurrent "actors".
|
||||
# Copyright 2018-eternity Tyler Goodlet.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
# This program is free software: you can redistribute it and/or
|
||||
# modify it under the terms of the GNU Affero General Public License
|
||||
# as published by the Free Software Foundation, either version 3 of
|
||||
# the License, or (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
# Affero General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
# You should have received a copy of the GNU Affero General Public
|
||||
# License along with this program. If not, see
|
||||
# <https://www.gnu.org/licenses/>.
|
||||
|
||||
"""
|
||||
Multi-core debugging for da peeps!
|
||||
|
@ -20,14 +21,19 @@ Multi-core debugging for da peeps!
|
|||
"""
|
||||
from __future__ import annotations
|
||||
import bdb
|
||||
import os
|
||||
import sys
|
||||
import signal
|
||||
from contextlib import (
|
||||
asynccontextmanager as acm,
|
||||
contextmanager as cm,
|
||||
nullcontext,
|
||||
)
|
||||
from functools import (
|
||||
partial,
|
||||
cached_property,
|
||||
)
|
||||
from contextlib import asynccontextmanager as acm
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
import traceback
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
|
@ -39,27 +45,31 @@ from types import FrameType
|
|||
import pdbp
|
||||
import tractor
|
||||
import trio
|
||||
from trio.lowlevel import current_task
|
||||
from trio_typing import (
|
||||
TaskStatus,
|
||||
# Task,
|
||||
)
|
||||
|
||||
from .log import get_logger
|
||||
from ._discovery import get_root
|
||||
from ._state import (
|
||||
from ..log import get_logger
|
||||
from .._state import (
|
||||
current_actor,
|
||||
is_root_process,
|
||||
debug_mode,
|
||||
)
|
||||
from ._exceptions import (
|
||||
from .._exceptions import (
|
||||
is_multi_cancelled,
|
||||
ContextCancelled,
|
||||
)
|
||||
from ._ipc import Channel
|
||||
from .._ipc import Channel
|
||||
|
||||
log = get_logger(__name__)
|
||||
|
||||
|
||||
__all__ = ['breakpoint', 'post_mortem']
|
||||
__all__ = [
|
||||
'breakpoint',
|
||||
'post_mortem',
|
||||
]
|
||||
|
||||
|
||||
class Lock:
|
||||
|
@ -232,7 +242,7 @@ async def _acquire_debug_lock_from_root_task(
|
|||
to the ``pdb`` repl.
|
||||
|
||||
'''
|
||||
task_name: str = trio.lowlevel.current_task().name
|
||||
task_name: str = current_task().name
|
||||
we_acquired: bool = False
|
||||
|
||||
log.runtime(
|
||||
|
@ -317,14 +327,13 @@ async def lock_tty_for_child(
|
|||
highly reliable at releasing the mutex complete!
|
||||
|
||||
'''
|
||||
task_name = trio.lowlevel.current_task().name
|
||||
|
||||
task_name: str = current_task().name
|
||||
if tuple(subactor_uid) in Lock._blocked:
|
||||
log.warning(
|
||||
f'Actor {subactor_uid} is blocked from acquiring debug lock\n'
|
||||
f"remote task: {task_name}:{subactor_uid}"
|
||||
)
|
||||
ctx._enter_debugger_on_cancel = False
|
||||
ctx._enter_debugger_on_cancel: bool = False
|
||||
await ctx.cancel(f'Debug lock blocked for {subactor_uid}')
|
||||
return 'pdb_lock_blocked'
|
||||
|
||||
|
@ -375,12 +384,14 @@ async def wait_for_parent_stdin_hijack(
|
|||
|
||||
This function is used by any sub-actor to acquire mutex access to
|
||||
the ``pdb`` REPL and thus the root's TTY for interactive debugging
|
||||
(see below inside ``_pause()``). It can be used to ensure that
|
||||
(see below inside ``pause()``). It can be used to ensure that
|
||||
an intermediate nursery-owning actor does not clobber its children
|
||||
if they are in debug (see below inside
|
||||
``maybe_wait_for_debugger()``).
|
||||
|
||||
'''
|
||||
from .._discovery import get_root
|
||||
|
||||
with trio.CancelScope(shield=True) as cs:
|
||||
Lock._debugger_request_cs = cs
|
||||
|
||||
|
@ -390,7 +401,7 @@ async def wait_for_parent_stdin_hijack(
|
|||
# this syncs to child's ``Context.started()`` call.
|
||||
async with portal.open_context(
|
||||
|
||||
tractor._debug.lock_tty_for_child,
|
||||
lock_tty_for_child,
|
||||
subactor_uid=actor_uid,
|
||||
|
||||
) as (ctx, val):
|
||||
|
@ -399,11 +410,13 @@ async def wait_for_parent_stdin_hijack(
|
|||
assert val == 'Locked'
|
||||
|
||||
async with ctx.open_stream() as stream:
|
||||
# unblock local caller
|
||||
|
||||
try:
|
||||
# unblock local caller
|
||||
assert Lock.local_pdb_complete
|
||||
task_status.started(cs)
|
||||
|
||||
# wait for local task to exit and
|
||||
# release the REPL
|
||||
await Lock.local_pdb_complete.wait()
|
||||
|
||||
finally:
|
||||
|
@ -441,171 +454,6 @@ def mk_mpdb() -> tuple[MultiActorPdb, Callable]:
|
|||
return pdb, Lock.unshield_sigint
|
||||
|
||||
|
||||
async def _pause(
|
||||
|
||||
debug_func: Callable | None = None,
|
||||
release_lock_signal: trio.Event | None = None,
|
||||
|
||||
# TODO:
|
||||
# shield: bool = False
|
||||
task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
A pause point (more commonly known as a "breakpoint") interrupt
|
||||
instruction for engaging a blocking debugger instance to
|
||||
conduct manual console-based-REPL-interaction from within
|
||||
`tractor`'s async runtime, normally from some single-threaded
|
||||
and currently executing actor-hosted-`trio`-task in some
|
||||
(remote) process.
|
||||
|
||||
NOTE: we use the semantics "pause" since it better encompasses
|
||||
the entirety of the necessary global-runtime-state-mutation any
|
||||
actor-task must access and lock in order to get full isolated
|
||||
control over the process tree's root TTY:
|
||||
https://en.wikipedia.org/wiki/Breakpoint
|
||||
|
||||
'''
|
||||
__tracebackhide__ = True
|
||||
actor = tractor.current_actor()
|
||||
pdb, undo_sigint = mk_mpdb()
|
||||
task_name = trio.lowlevel.current_task().name
|
||||
|
||||
# TODO: is it possible to debug a trio.Cancelled except block?
|
||||
# right now it seems like we can kinda do with by shielding
|
||||
# around ``tractor.breakpoint()`` but not if we move the shielded
|
||||
# scope here???
|
||||
# with trio.CancelScope(shield=shield):
|
||||
# await trio.lowlevel.checkpoint()
|
||||
|
||||
if (
|
||||
not Lock.local_pdb_complete
|
||||
or Lock.local_pdb_complete.is_set()
|
||||
):
|
||||
Lock.local_pdb_complete = trio.Event()
|
||||
|
||||
# TODO: need a more robust check for the "root" actor
|
||||
if (
|
||||
not is_root_process()
|
||||
and actor._parent_chan # a connected child
|
||||
):
|
||||
|
||||
if Lock.local_task_in_debug:
|
||||
|
||||
# Recurrence entry case: this task already has the lock and
|
||||
# is likely recurrently entering a breakpoint
|
||||
if Lock.local_task_in_debug == task_name:
|
||||
# noop on recurrent entry case but we want to trigger
|
||||
# a checkpoint to allow other actors error-propagate and
|
||||
# potetially avoid infinite re-entries in some subactor.
|
||||
await trio.lowlevel.checkpoint()
|
||||
return
|
||||
|
||||
# if **this** actor is already in debug mode block here
|
||||
# waiting for the control to be released - this allows
|
||||
# support for recursive entries to `tractor.breakpoint()`
|
||||
log.warning(f"{actor.uid} already has a debug lock, waiting...")
|
||||
|
||||
await Lock.local_pdb_complete.wait()
|
||||
await trio.sleep(0.1)
|
||||
|
||||
# mark local actor as "in debug mode" to avoid recurrent
|
||||
# entries/requests to the root process
|
||||
Lock.local_task_in_debug = task_name
|
||||
|
||||
# this **must** be awaited by the caller and is done using the
|
||||
# root nursery so that the debugger can continue to run without
|
||||
# being restricted by the scope of a new task nursery.
|
||||
|
||||
# TODO: if we want to debug a trio.Cancelled triggered exception
|
||||
# we have to figure out how to avoid having the service nursery
|
||||
# cancel on this task start? I *think* this works below:
|
||||
# ```python
|
||||
# actor._service_n.cancel_scope.shield = shield
|
||||
# ```
|
||||
# but not entirely sure if that's a sane way to implement it?
|
||||
try:
|
||||
with trio.CancelScope(shield=True):
|
||||
await actor._service_n.start(
|
||||
wait_for_parent_stdin_hijack,
|
||||
actor.uid,
|
||||
)
|
||||
Lock.repl = pdb
|
||||
except RuntimeError:
|
||||
Lock.release()
|
||||
|
||||
if actor._cancel_called:
|
||||
# service nursery won't be usable and we
|
||||
# don't want to lock up the root either way since
|
||||
# we're in (the midst of) cancellation.
|
||||
return
|
||||
|
||||
raise
|
||||
|
||||
elif is_root_process():
|
||||
|
||||
# we also wait in the root-parent for any child that
|
||||
# may have the tty locked prior
|
||||
# TODO: wait, what about multiple root tasks acquiring it though?
|
||||
if Lock.global_actor_in_debug == actor.uid:
|
||||
# re-entrant root process already has it: noop.
|
||||
return
|
||||
|
||||
# XXX: since we need to enter pdb synchronously below,
|
||||
# we have to release the lock manually from pdb completion
|
||||
# callbacks. Can't think of a nicer way then this atm.
|
||||
if Lock._debug_lock.locked():
|
||||
log.warning(
|
||||
'Root actor attempting to shield-acquire active tty lock'
|
||||
f' owned by {Lock.global_actor_in_debug}')
|
||||
|
||||
# must shield here to avoid hitting a ``Cancelled`` and
|
||||
# a child getting stuck bc we clobbered the tty
|
||||
with trio.CancelScope(shield=True):
|
||||
await Lock._debug_lock.acquire()
|
||||
else:
|
||||
# may be cancelled
|
||||
await Lock._debug_lock.acquire()
|
||||
|
||||
Lock.global_actor_in_debug = actor.uid
|
||||
Lock.local_task_in_debug = task_name
|
||||
Lock.repl = pdb
|
||||
|
||||
try:
|
||||
# breakpoint()
|
||||
if debug_func is None:
|
||||
# assert release_lock_signal, (
|
||||
# 'Must pass `release_lock_signal: trio.Event` if no '
|
||||
# 'trace func provided!'
|
||||
# )
|
||||
print(f"{actor.uid} ENTERING WAIT")
|
||||
task_status.started()
|
||||
|
||||
# with trio.CancelScope(shield=True):
|
||||
# await release_lock_signal.wait()
|
||||
|
||||
else:
|
||||
# block here one (at the appropriate frame *up*) where
|
||||
# ``breakpoint()`` was awaited and begin handling stdio.
|
||||
log.debug("Entering the synchronous world of pdb")
|
||||
debug_func(actor, pdb)
|
||||
|
||||
except bdb.BdbQuit:
|
||||
Lock.release()
|
||||
raise
|
||||
|
||||
# XXX: apparently we can't do this without showing this frame
|
||||
# in the backtrace on first entry to the REPL? Seems like an odd
|
||||
# behaviour that should have been fixed by now. This is also why
|
||||
# we scrapped all the @cm approaches that were tried previously.
|
||||
# finally:
|
||||
# __tracebackhide__ = True
|
||||
# # frame = sys._getframe()
|
||||
# # last_f = frame.f_back
|
||||
# # last_f.f_globals['__tracebackhide__'] = True
|
||||
# # signal.signal = pdbp.hideframe(signal.signal)
|
||||
|
||||
|
||||
def shield_sigint_handler(
|
||||
signum: int,
|
||||
frame: 'frame', # type: ignore # noqa
|
||||
|
@ -625,7 +473,7 @@ def shield_sigint_handler(
|
|||
|
||||
uid_in_debug: tuple[str, str] | None = Lock.global_actor_in_debug
|
||||
|
||||
actor = tractor.current_actor()
|
||||
actor = current_actor()
|
||||
# print(f'{actor.uid} in HANDLER with ')
|
||||
|
||||
def do_cancel():
|
||||
|
@ -764,27 +612,62 @@ def shield_sigint_handler(
|
|||
# https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py
|
||||
|
||||
|
||||
_pause_msg: str = 'Attaching to pdb REPL in actor'
|
||||
|
||||
|
||||
def _set_trace(
|
||||
actor: tractor.Actor | None = None,
|
||||
pdb: MultiActorPdb | None = None,
|
||||
):
|
||||
__tracebackhide__ = True
|
||||
actor: tractor.Actor = actor or tractor.current_actor()
|
||||
shield: bool = False,
|
||||
|
||||
# start 2 levels up in user code
|
||||
frame: FrameType | None = sys._getframe()
|
||||
if frame:
|
||||
extra_frames_up_when_async: int = 1,
|
||||
):
|
||||
__tracebackhide__: bool = True
|
||||
actor: tractor.Actor = actor or current_actor()
|
||||
|
||||
# always start 1 level up from THIS in user code.
|
||||
frame: FrameType|None
|
||||
if frame := sys._getframe():
|
||||
frame: FrameType = frame.f_back # type: ignore
|
||||
|
||||
if (
|
||||
frame
|
||||
and pdb
|
||||
and (
|
||||
pdb
|
||||
and actor is not None
|
||||
)
|
||||
# or shield
|
||||
):
|
||||
log.pdb(f"\nAttaching pdb to actor: {actor.uid}\n")
|
||||
msg: str = _pause_msg
|
||||
if shield:
|
||||
# log.warning(
|
||||
msg = (
|
||||
'\n\n'
|
||||
' ------ - ------\n'
|
||||
'Debugger invoked with `shield=True` so an extra\n'
|
||||
'`trio.CancelScope.__exit__()` frame is shown..\n'
|
||||
'\n'
|
||||
'Try going up one frame to see your pause point!\n'
|
||||
'\n'
|
||||
' SORRY we need to fix this!\n'
|
||||
' ------ - ------\n\n'
|
||||
) + msg
|
||||
|
||||
# pdbp.set_trace()
|
||||
# TODO: maybe print the actor supervion tree up to the
|
||||
# root here? Bo
|
||||
log.pdb(
|
||||
f'{msg}\n'
|
||||
'|\n'
|
||||
f'|_ {actor.uid}\n'
|
||||
)
|
||||
# no f!#$&* idea, but when we're in async land
|
||||
# we need 2x frames up?
|
||||
frame = frame.f_back
|
||||
for i in range(extra_frames_up_when_async):
|
||||
frame: FrameType = frame.f_back
|
||||
log.debug(
|
||||
f'Going up frame {i} -> {frame}\n'
|
||||
)
|
||||
|
||||
else:
|
||||
pdb, undo_sigint = mk_mpdb()
|
||||
|
@ -794,17 +677,278 @@ def _set_trace(
|
|||
Lock.local_task_in_debug = 'sync'
|
||||
|
||||
pdb.set_trace(frame=frame)
|
||||
# undo_
|
||||
|
||||
|
||||
# TODO: allow pausing from sync code, normally by remapping
|
||||
# python's builtin breakpoint() hook to this runtime aware version.
|
||||
async def _pause(
|
||||
|
||||
debug_func: Callable = _set_trace,
|
||||
release_lock_signal: trio.Event | None = None,
|
||||
|
||||
# TODO: allow caller to pause despite task cancellation,
|
||||
# exactly the same as wrapping with:
|
||||
# with CancelScope(shield=True):
|
||||
# await pause()
|
||||
# => the REMAINING ISSUE is that the scope's .__exit__() frame
|
||||
# is always show in the debugger on entry.. and there seems to
|
||||
# be no way to override it?..
|
||||
# shield: bool = False,
|
||||
|
||||
shield: bool = False,
|
||||
task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
Inner impl for `pause()` to avoid the `trio.CancelScope.__exit__()`
|
||||
stack frame when not shielded (since apparently i can't figure out
|
||||
how to hide it using the normal mechanisms..)
|
||||
|
||||
Hopefully we won't need this in the long run.
|
||||
|
||||
'''
|
||||
__tracebackhide__: bool = True
|
||||
actor = current_actor()
|
||||
pdb, undo_sigint = mk_mpdb()
|
||||
task_name: str = trio.lowlevel.current_task().name
|
||||
|
||||
if (
|
||||
not Lock.local_pdb_complete
|
||||
or Lock.local_pdb_complete.is_set()
|
||||
):
|
||||
Lock.local_pdb_complete = trio.Event()
|
||||
|
||||
debug_func = partial(
|
||||
debug_func,
|
||||
)
|
||||
|
||||
# TODO: need a more robust check for the "root" actor
|
||||
if (
|
||||
not is_root_process()
|
||||
and actor._parent_chan # a connected child
|
||||
):
|
||||
|
||||
if Lock.local_task_in_debug:
|
||||
|
||||
# Recurrence entry case: this task already has the lock and
|
||||
# is likely recurrently entering a breakpoint
|
||||
if Lock.local_task_in_debug == task_name:
|
||||
# noop on recurrent entry case but we want to trigger
|
||||
# a checkpoint to allow other actors error-propagate and
|
||||
# potetially avoid infinite re-entries in some subactor.
|
||||
await trio.lowlevel.checkpoint()
|
||||
return
|
||||
|
||||
# if **this** actor is already in debug mode block here
|
||||
# waiting for the control to be released - this allows
|
||||
# support for recursive entries to `tractor.breakpoint()`
|
||||
log.warning(f"{actor.uid} already has a debug lock, waiting...")
|
||||
|
||||
await Lock.local_pdb_complete.wait()
|
||||
await trio.sleep(0.1)
|
||||
|
||||
# mark local actor as "in debug mode" to avoid recurrent
|
||||
# entries/requests to the root process
|
||||
Lock.local_task_in_debug = task_name
|
||||
|
||||
# this **must** be awaited by the caller and is done using the
|
||||
# root nursery so that the debugger can continue to run without
|
||||
# being restricted by the scope of a new task nursery.
|
||||
|
||||
# TODO: if we want to debug a trio.Cancelled triggered exception
|
||||
# we have to figure out how to avoid having the service nursery
|
||||
# cancel on this task start? I *think* this works below:
|
||||
# ```python
|
||||
# actor._service_n.cancel_scope.shield = shield
|
||||
# ```
|
||||
# but not entirely sure if that's a sane way to implement it?
|
||||
try:
|
||||
with trio.CancelScope(shield=True):
|
||||
await actor._service_n.start(
|
||||
wait_for_parent_stdin_hijack,
|
||||
actor.uid,
|
||||
)
|
||||
Lock.repl = pdb
|
||||
except RuntimeError:
|
||||
Lock.release()
|
||||
|
||||
if actor._cancel_called:
|
||||
# service nursery won't be usable and we
|
||||
# don't want to lock up the root either way since
|
||||
# we're in (the midst of) cancellation.
|
||||
return
|
||||
|
||||
raise
|
||||
|
||||
elif is_root_process():
|
||||
|
||||
# we also wait in the root-parent for any child that
|
||||
# may have the tty locked prior
|
||||
# TODO: wait, what about multiple root tasks acquiring it though?
|
||||
if Lock.global_actor_in_debug == actor.uid:
|
||||
# re-entrant root process already has it: noop.
|
||||
return
|
||||
|
||||
# XXX: since we need to enter pdb synchronously below,
|
||||
# we have to release the lock manually from pdb completion
|
||||
# callbacks. Can't think of a nicer way then this atm.
|
||||
if Lock._debug_lock.locked():
|
||||
log.warning(
|
||||
'Root actor attempting to shield-acquire active tty lock'
|
||||
f' owned by {Lock.global_actor_in_debug}')
|
||||
|
||||
# must shield here to avoid hitting a ``Cancelled`` and
|
||||
# a child getting stuck bc we clobbered the tty
|
||||
with trio.CancelScope(shield=True):
|
||||
await Lock._debug_lock.acquire()
|
||||
else:
|
||||
# may be cancelled
|
||||
await Lock._debug_lock.acquire()
|
||||
|
||||
Lock.global_actor_in_debug = actor.uid
|
||||
Lock.local_task_in_debug = task_name
|
||||
Lock.repl = pdb
|
||||
|
||||
try:
|
||||
# TODO: do we want to support using this **just** for the
|
||||
# locking / common code (prolly to help address #320)?
|
||||
#
|
||||
# if debug_func is None:
|
||||
# assert release_lock_signal, (
|
||||
# 'Must pass `release_lock_signal: trio.Event` if no '
|
||||
# 'trace func provided!'
|
||||
# )
|
||||
# print(f"{actor.uid} ENTERING WAIT")
|
||||
# with trio.CancelScope(shield=True):
|
||||
# await release_lock_signal.wait()
|
||||
|
||||
# else:
|
||||
# block here one (at the appropriate frame *up*) where
|
||||
# ``breakpoint()`` was awaited and begin handling stdio.
|
||||
log.debug('Entering sync world of the `pdb` REPL..')
|
||||
try:
|
||||
debug_func(
|
||||
actor,
|
||||
pdb,
|
||||
extra_frames_up_when_async=2,
|
||||
shield=shield,
|
||||
)
|
||||
except BaseException:
|
||||
log.exception(
|
||||
'Failed to invoke internal `debug_func = '
|
||||
f'{debug_func.func.__name__}`\n'
|
||||
)
|
||||
raise
|
||||
|
||||
except bdb.BdbQuit:
|
||||
Lock.release()
|
||||
raise
|
||||
|
||||
except BaseException:
|
||||
log.exception(
|
||||
'Failed to engage debugger via `_pause()` ??\n'
|
||||
)
|
||||
raise
|
||||
|
||||
# XXX: apparently we can't do this without showing this frame
|
||||
# in the backtrace on first entry to the REPL? Seems like an odd
|
||||
# behaviour that should have been fixed by now. This is also why
|
||||
# we scrapped all the @cm approaches that were tried previously.
|
||||
# finally:
|
||||
# __tracebackhide__ = True
|
||||
# # frame = sys._getframe()
|
||||
# # last_f = frame.f_back
|
||||
# # last_f.f_globals['__tracebackhide__'] = True
|
||||
# # signal.signal = pdbp.hideframe(signal.signal)
|
||||
|
||||
|
||||
async def pause(
|
||||
|
||||
debug_func: Callable = _set_trace,
|
||||
release_lock_signal: trio.Event | None = None,
|
||||
|
||||
# TODO: allow caller to pause despite task cancellation,
|
||||
# exactly the same as wrapping with:
|
||||
# with CancelScope(shield=True):
|
||||
# await pause()
|
||||
# => the REMAINING ISSUE is that the scope's .__exit__() frame
|
||||
# is always show in the debugger on entry.. and there seems to
|
||||
# be no way to override it?..
|
||||
# shield: bool = False,
|
||||
|
||||
shield: bool = False,
|
||||
task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
A pause point (more commonly known as a "breakpoint") interrupt
|
||||
instruction for engaging a blocking debugger instance to
|
||||
conduct manual console-based-REPL-interaction from within
|
||||
`tractor`'s async runtime, normally from some single-threaded
|
||||
and currently executing actor-hosted-`trio`-task in some
|
||||
(remote) process.
|
||||
|
||||
NOTE: we use the semantics "pause" since it better encompasses
|
||||
the entirety of the necessary global-runtime-state-mutation any
|
||||
actor-task must access and lock in order to get full isolated
|
||||
control over the process tree's root TTY:
|
||||
https://en.wikipedia.org/wiki/Breakpoint
|
||||
|
||||
'''
|
||||
__tracebackhide__: bool = True
|
||||
|
||||
if shield:
|
||||
# NOTE XXX: even hard coding this inside the `class CancelScope:`
|
||||
# doesn't seem to work for me!?
|
||||
# ^ XXX ^
|
||||
|
||||
# def _exit(self, *args, **kwargs):
|
||||
# __tracebackhide__: bool = True
|
||||
# super().__exit__(*args, **kwargs)
|
||||
|
||||
trio.CancelScope.__enter__.__tracebackhide__ = True
|
||||
trio.CancelScope.__exit__.__tracebackhide__ = True
|
||||
|
||||
# import types
|
||||
# with trio.CancelScope(shield=shield) as cs:
|
||||
# cs.__exit__ = types.MethodType(_exit, cs)
|
||||
# cs.__exit__.__tracebackhide__ = True
|
||||
|
||||
with trio.CancelScope(shield=shield) as cs:
|
||||
# setattr(cs.__exit__.__func__, '__tracebackhide__', True)
|
||||
# setattr(cs.__enter__.__func__, '__tracebackhide__', True)
|
||||
|
||||
# NOTE: so the caller can always cancel even if shielded
|
||||
task_status.started(cs)
|
||||
return await _pause(
|
||||
debug_func=debug_func,
|
||||
release_lock_signal=release_lock_signal,
|
||||
shield=True,
|
||||
task_status=task_status,
|
||||
)
|
||||
else:
|
||||
return await _pause(
|
||||
debug_func=debug_func,
|
||||
release_lock_signal=release_lock_signal,
|
||||
shield=False,
|
||||
task_status=task_status,
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
# TODO: allow pausing from sync code.
|
||||
# normally by remapping python's builtin breakpoint() hook to this
|
||||
# runtime aware version which takes care of all .
|
||||
def pause_from_sync() -> None:
|
||||
print("ENTER SYNC PAUSE")
|
||||
actor: tractor.Actor = current_actor(
|
||||
err_on_no_runtime=False,
|
||||
)
|
||||
if actor:
|
||||
try:
|
||||
import greenback
|
||||
__tracebackhide__ = True
|
||||
# __tracebackhide__ = True
|
||||
|
||||
|
||||
actor: tractor.Actor = tractor.current_actor()
|
||||
# task_can_release_tty_lock = trio.Event()
|
||||
|
||||
# spawn bg task which will lock out the TTY, we poll
|
||||
|
@ -812,12 +956,17 @@ def pause_from_sync() -> None:
|
|||
# waiting.. not the most ideal but works for now ;)
|
||||
greenback.await_(
|
||||
actor._service_n.start(partial(
|
||||
_pause,
|
||||
pause,
|
||||
debug_func=None,
|
||||
# release_lock_signal=task_can_release_tty_lock,
|
||||
))
|
||||
)
|
||||
|
||||
except ModuleNotFoundError:
|
||||
log.warning('NO GREENBACK FOUND')
|
||||
else:
|
||||
log.warning('Not inside actor-runtime')
|
||||
|
||||
db, undo_sigint = mk_mpdb()
|
||||
Lock.local_task_in_debug = 'sync'
|
||||
# db.config.enable_hidden_frames = True
|
||||
|
@ -851,11 +1000,7 @@ def pause_from_sync() -> None:
|
|||
# using the "pause" semantics instead since
|
||||
# that better covers actually somewhat "pausing the runtime"
|
||||
# for this particular paralell task to do debugging B)
|
||||
pause = partial(
|
||||
_pause,
|
||||
_set_trace,
|
||||
)
|
||||
pp = pause # short-hand for "pause point"
|
||||
# pp = pause # short-hand for "pause point"
|
||||
|
||||
|
||||
async def breakpoint(**kwargs):
|
||||
|
@ -866,9 +1011,18 @@ async def breakpoint(**kwargs):
|
|||
await pause(**kwargs)
|
||||
|
||||
|
||||
_crash_msg: str = (
|
||||
'Attaching to pdb REPL in crashed actor'
|
||||
)
|
||||
|
||||
|
||||
def _post_mortem(
|
||||
actor: tractor.Actor,
|
||||
pdb: MultiActorPdb,
|
||||
shield: bool = False,
|
||||
|
||||
# only for compat with `._set_trace()`..
|
||||
extra_frames_up_when_async=0,
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
|
@ -876,20 +1030,28 @@ def _post_mortem(
|
|||
debugger instance.
|
||||
|
||||
'''
|
||||
log.pdb(f"\nAttaching to pdb in crashed actor: {actor.uid}\n")
|
||||
# TODO: print the actor supervion tree up to the root
|
||||
# here! Bo
|
||||
log.pdb(
|
||||
f'{_crash_msg}\n'
|
||||
'|\n'
|
||||
f'|_ {actor.uid}\n'
|
||||
)
|
||||
|
||||
# TODO: you need ``pdbpp`` master (at least this commit
|
||||
# https://github.com/pdbpp/pdbpp/commit/b757794857f98d53e3ebbe70879663d7d843a6c2)
|
||||
# to fix this and avoid the hang it causes. See issue:
|
||||
# https://github.com/pdbpp/pdbpp/issues/480
|
||||
# TODO: help with a 3.10+ major release if/when it arrives.
|
||||
|
||||
pdbp.xpm(Pdb=lambda: pdb)
|
||||
# TODO: only replacing this to add the
|
||||
# `end=''` to the print XD
|
||||
# pdbp.xpm(Pdb=lambda: pdb)
|
||||
info = sys.exc_info()
|
||||
print(traceback.format_exc(), end='')
|
||||
pdbp.post_mortem(
|
||||
t=info[2],
|
||||
Pdb=lambda: pdb,
|
||||
)
|
||||
|
||||
|
||||
post_mortem = partial(
|
||||
_pause,
|
||||
_post_mortem,
|
||||
pause,
|
||||
debug_func=_post_mortem,
|
||||
)
|
||||
|
||||
|
||||
|
@ -930,9 +1092,10 @@ async def acquire_debug_lock(
|
|||
'''
|
||||
Grab root's debug lock on entry, release on exit.
|
||||
|
||||
This helper is for actor's who don't actually need
|
||||
to acquired the debugger but want to wait until the
|
||||
lock is free in the process-tree root.
|
||||
This helper is for actor's who don't actually need to acquired
|
||||
the debugger but want to wait until the lock is free in the
|
||||
process-tree root such that they don't clobber an ongoing pdb
|
||||
REPL session in some peer or child!
|
||||
|
||||
'''
|
||||
if not debug_mode():
|
||||
|
@ -953,14 +1116,18 @@ async def maybe_wait_for_debugger(
|
|||
poll_delay: float = 0.1,
|
||||
child_in_debug: bool = False,
|
||||
|
||||
) -> None:
|
||||
header_msg: str = '',
|
||||
|
||||
) -> bool: # was locked and we polled?
|
||||
|
||||
if (
|
||||
not debug_mode()
|
||||
and not child_in_debug
|
||||
):
|
||||
return
|
||||
return False
|
||||
|
||||
|
||||
msg: str = header_msg
|
||||
if (
|
||||
is_root_process()
|
||||
):
|
||||
|
@ -970,41 +1137,147 @@ async def maybe_wait_for_debugger(
|
|||
# will make the pdb repl unusable.
|
||||
# Instead try to wait for pdb to be released before
|
||||
# tearing down.
|
||||
sub_in_debug: tuple[str, str] | None = None
|
||||
in_debug: tuple[str, str]|None = Lock.global_actor_in_debug
|
||||
debug_complete: trio.Event|None = Lock.no_remote_has_tty
|
||||
|
||||
for _ in range(poll_steps):
|
||||
|
||||
if Lock.global_actor_in_debug:
|
||||
sub_in_debug = tuple(Lock.global_actor_in_debug)
|
||||
|
||||
log.debug('Root polling for debug')
|
||||
|
||||
with trio.CancelScope(shield=True):
|
||||
await trio.sleep(poll_delay)
|
||||
|
||||
# TODO: could this make things more deterministic? wait
|
||||
# to see if a sub-actor task will be scheduled and grab
|
||||
# the tty lock on the next tick?
|
||||
# XXX: doesn't seem to work
|
||||
# await trio.testing.wait_all_tasks_blocked(cushion=0)
|
||||
|
||||
debug_complete = Lock.no_remote_has_tty
|
||||
if (
|
||||
debug_complete
|
||||
and sub_in_debug is not None
|
||||
and not debug_complete.is_set()
|
||||
):
|
||||
log.pdb(
|
||||
'Root has errored but pdb is in use by '
|
||||
f'child {sub_in_debug}\n'
|
||||
'Waiting on tty lock to release..'
|
||||
if in_debug == current_actor().uid:
|
||||
log.debug(
|
||||
msg
|
||||
+
|
||||
'Root already owns the TTY LOCK'
|
||||
)
|
||||
return True
|
||||
|
||||
await debug_complete.wait()
|
||||
|
||||
await trio.sleep(poll_delay)
|
||||
continue
|
||||
elif in_debug:
|
||||
msg += (
|
||||
f'Debug `Lock` in use by subactor: {in_debug}\n'
|
||||
)
|
||||
# TODO: could this make things more deterministic?
|
||||
# wait to see if a sub-actor task will be
|
||||
# scheduled and grab the tty lock on the next
|
||||
# tick?
|
||||
# XXX => but it doesn't seem to work..
|
||||
# await trio.testing.wait_all_tasks_blocked(cushion=0)
|
||||
else:
|
||||
log.debug(
|
||||
'Root acquired TTY LOCK'
|
||||
msg
|
||||
+
|
||||
'Root immediately acquired debug TTY LOCK'
|
||||
)
|
||||
return False
|
||||
|
||||
for istep in range(poll_steps):
|
||||
if (
|
||||
debug_complete
|
||||
and not debug_complete.is_set()
|
||||
and in_debug is not None
|
||||
):
|
||||
log.pdb(
|
||||
msg
|
||||
+
|
||||
'Root is waiting on tty lock to release..\n'
|
||||
)
|
||||
with trio.CancelScope(shield=True):
|
||||
await debug_complete.wait()
|
||||
log.pdb(
|
||||
f'Child subactor released debug lock\n'
|
||||
f'|_{in_debug}\n'
|
||||
)
|
||||
|
||||
# is no subactor locking debugger currently?
|
||||
if (
|
||||
in_debug is None
|
||||
and (
|
||||
debug_complete is None
|
||||
or debug_complete.is_set()
|
||||
)
|
||||
):
|
||||
log.pdb(
|
||||
msg
|
||||
+
|
||||
'Root acquired tty lock!'
|
||||
)
|
||||
break
|
||||
|
||||
else:
|
||||
# TODO: don't need this right?
|
||||
# await trio.lowlevel.checkpoint()
|
||||
|
||||
log.debug(
|
||||
'Root polling for debug:\n'
|
||||
f'poll step: {istep}\n'
|
||||
f'poll delya: {poll_delay}'
|
||||
)
|
||||
with trio.CancelScope(shield=True):
|
||||
await trio.sleep(poll_delay)
|
||||
continue
|
||||
|
||||
# fallthrough on failure to acquire..
|
||||
# else:
|
||||
# raise RuntimeError(
|
||||
# msg
|
||||
# +
|
||||
# 'Root actor failed to acquire debug lock?'
|
||||
# )
|
||||
return True
|
||||
|
||||
# else:
|
||||
# # TODO: non-root call for #320?
|
||||
# this_uid: tuple[str, str] = current_actor().uid
|
||||
# async with acquire_debug_lock(
|
||||
# subactor_uid=this_uid,
|
||||
# ):
|
||||
# pass
|
||||
return False
|
||||
|
||||
# TODO: better naming and what additionals?
|
||||
# - [ ] optional runtime plugging?
|
||||
# - [ ] detection for sync vs. async code?
|
||||
# - [ ] specialized REPL entry when in distributed mode?
|
||||
# - [x] allow ignoring kbi Bo
|
||||
@cm
|
||||
def open_crash_handler(
|
||||
catch: set[BaseException] = {
|
||||
Exception,
|
||||
BaseException,
|
||||
},
|
||||
ignore: set[BaseException] = {
|
||||
KeyboardInterrupt,
|
||||
},
|
||||
):
|
||||
'''
|
||||
Generic "post mortem" crash handler using `pdbp` REPL debugger.
|
||||
|
||||
We expose this as a CLI framework addon to both `click` and
|
||||
`typer` users so they can quickly wrap cmd endpoints which get
|
||||
automatically wrapped to use the runtime's `debug_mode: bool`
|
||||
AND `pdbp.pm()` around any code that is PRE-runtime entry
|
||||
- any sync code which runs BEFORE the main call to
|
||||
`trio.run()`.
|
||||
|
||||
'''
|
||||
try:
|
||||
yield
|
||||
except tuple(catch) as err:
|
||||
|
||||
if type(err) not in ignore:
|
||||
pdbp.xpm()
|
||||
|
||||
raise
|
||||
|
||||
|
||||
@cm
|
||||
def maybe_open_crash_handler(pdb: bool = False):
|
||||
'''
|
||||
Same as `open_crash_handler()` but with bool input flag
|
||||
to allow conditional handling.
|
||||
|
||||
Normally this is used with CLI endpoints such that if the --pdb
|
||||
flag is passed the pdb REPL is engaed on any crashes B)
|
||||
'''
|
||||
rtctx = nullcontext
|
||||
if pdb:
|
||||
rtctx = open_crash_handler
|
||||
|
||||
with rtctx():
|
||||
yield
|
|
@ -0,0 +1,84 @@
|
|||
# tractor: structured concurrent "actors".
|
||||
# Copyright eternity Tyler Goodlet.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
'''
|
||||
The fundamental cross process SC abstraction: an inter-actor,
|
||||
cancel-scope linked task "context".
|
||||
|
||||
A ``Context`` is very similar to the ``trio.Nursery.cancel_scope`` built
|
||||
into each ``trio.Nursery`` except it links the lifetimes of memory space
|
||||
disjoint, parallel executing tasks in separate actors.
|
||||
|
||||
'''
|
||||
from signal import (
|
||||
signal,
|
||||
SIGUSR1,
|
||||
)
|
||||
|
||||
import trio
|
||||
|
||||
@trio.lowlevel.disable_ki_protection
|
||||
def dump_task_tree() -> None:
|
||||
import stackscope
|
||||
from tractor.log import get_console_log
|
||||
|
||||
tree_str: str = str(
|
||||
stackscope.extract(
|
||||
trio.lowlevel.current_root_task(),
|
||||
recurse_child_tasks=True
|
||||
)
|
||||
)
|
||||
log = get_console_log('cancel')
|
||||
log.pdb(
|
||||
f'Dumping `stackscope` tree:\n\n'
|
||||
f'{tree_str}\n'
|
||||
)
|
||||
# import logging
|
||||
# try:
|
||||
# with open("/dev/tty", "w") as tty:
|
||||
# tty.write(tree_str)
|
||||
# except BaseException:
|
||||
# logging.getLogger(
|
||||
# "task_tree"
|
||||
# ).exception("Error printing task tree")
|
||||
|
||||
|
||||
def signal_handler(sig: int, frame: object) -> None:
|
||||
import traceback
|
||||
try:
|
||||
trio.lowlevel.current_trio_token(
|
||||
).run_sync_soon(dump_task_tree)
|
||||
except RuntimeError:
|
||||
# not in async context -- print a normal traceback
|
||||
traceback.print_stack()
|
||||
|
||||
|
||||
|
||||
def enable_stack_on_sig(
|
||||
sig: int = SIGUSR1
|
||||
) -> None:
|
||||
'''
|
||||
Enable `stackscope` tracing on reception of a signal; by
|
||||
default this is SIGUSR1.
|
||||
|
||||
'''
|
||||
signal(
|
||||
sig,
|
||||
signal_handler,
|
||||
)
|
||||
# NOTE: not the above can be triggered from
|
||||
# a (xonsh) shell using:
|
||||
# kill -SIGUSR1 @$(pgrep -f '<cmd>')
|
|
@ -0,0 +1,129 @@
|
|||
# tractor: structured concurrent "actors".
|
||||
# Copyright 2018-eternity Tyler Goodlet.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
"""
|
||||
CLI framework extensions for hacking on the actor runtime.
|
||||
|
||||
Currently popular frameworks supported are:
|
||||
|
||||
- `typer` via the `@callback` API
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
)
|
||||
from typing_extensions import Annotated
|
||||
|
||||
import typer
|
||||
|
||||
|
||||
_runtime_vars: dict[str, Any] = {}
|
||||
|
||||
|
||||
def load_runtime_vars(
|
||||
ctx: typer.Context,
|
||||
callback: Callable,
|
||||
pdb: bool = False, # --pdb
|
||||
ll: Annotated[
|
||||
str,
|
||||
typer.Option(
|
||||
'--loglevel',
|
||||
'-l',
|
||||
help='BigD logging level',
|
||||
),
|
||||
] = 'cancel', # -l info
|
||||
):
|
||||
'''
|
||||
Maybe engage crash handling with `pdbp` when code inside
|
||||
a `typer` CLI endpoint cmd raises.
|
||||
|
||||
To use this callback simply take your `app = typer.Typer()` instance
|
||||
and decorate this function with it like so:
|
||||
|
||||
.. code:: python
|
||||
|
||||
from tractor.devx import cli
|
||||
|
||||
app = typer.Typer()
|
||||
|
||||
# manual decoration to hook into `click`'s context system!
|
||||
cli.load_runtime_vars = app.callback(
|
||||
invoke_without_command=True,
|
||||
)
|
||||
|
||||
And then you can use the now augmented `click` CLI context as so,
|
||||
|
||||
.. code:: python
|
||||
|
||||
@app.command(
|
||||
context_settings={
|
||||
"allow_extra_args": True,
|
||||
"ignore_unknown_options": True,
|
||||
}
|
||||
)
|
||||
def my_cli_cmd(
|
||||
ctx: typer.Context,
|
||||
):
|
||||
rtvars: dict = ctx.runtime_vars
|
||||
pdb: bool = rtvars['pdb']
|
||||
|
||||
with tractor.devx.cli.maybe_open_crash_handler(pdb=pdb):
|
||||
trio.run(
|
||||
partial(
|
||||
my_tractor_main_task_func,
|
||||
debug_mode=pdb,
|
||||
loglevel=rtvars['ll'],
|
||||
)
|
||||
)
|
||||
|
||||
which will enable log level and debug mode globally for the entire
|
||||
`tractor` + `trio` runtime thereafter!
|
||||
|
||||
Bo
|
||||
|
||||
'''
|
||||
global _runtime_vars
|
||||
_runtime_vars |= {
|
||||
'pdb': pdb,
|
||||
'll': ll,
|
||||
}
|
||||
|
||||
ctx.runtime_vars: dict[str, Any] = _runtime_vars
|
||||
print(
|
||||
f'`typer` sub-cmd: {ctx.invoked_subcommand}\n'
|
||||
f'`tractor` runtime vars: {_runtime_vars}'
|
||||
)
|
||||
|
||||
# XXX NOTE XXX: hackzone.. if no sub-cmd is specified (the
|
||||
# default if the user just invokes `bigd`) then we simply
|
||||
# invoke the sole `_bigd()` cmd passing in the "parent"
|
||||
# typer.Context directly to that call since we're treating it
|
||||
# as a "non sub-command" or wtv..
|
||||
# TODO: ideally typer would have some kinda built-in way to get
|
||||
# this behaviour without having to construct and manually
|
||||
# invoke our own cmd..
|
||||
if (
|
||||
ctx.invoked_subcommand is None
|
||||
or ctx.invoked_subcommand == callback.__name__
|
||||
):
|
||||
cmd: typer.core.TyperCommand = typer.core.TyperCommand(
|
||||
name='bigd',
|
||||
callback=callback,
|
||||
)
|
||||
ctx.params = {'ctx': ctx}
|
||||
cmd.invoke(ctx)
|
Loading…
Reference in New Issue