Compare commits

..

3 Commits

Author SHA1 Message Date
Tyler Goodlet f7469442e3 Use "sclang"-style syntax in `to_asyncio` task logging
Just like we've started doing throughout the rest of the actor runtime
for reporting (and where "sclang" = "structured conc (s)lang", our
little supervision-focused operations syntax i've been playing with in
log msg content).

Further tweaks:
- report the `trio_done_fute` alongside the `main_outcome` value.
- add a todo list for supporting `greenback` for pause points.
2024-07-11 19:22:40 -04:00
Tyler Goodlet 8363317e11 Pass `infect_asyncio` setting via runtime-vars
The reason for this "duplication" with the `--asyncio` CLI flag (passed
to the child during spawn) is 2-fold:
- allows verifying inside `Actor._from_parent()` that the `trio` runtime was
  started via `.start_guest_run()` as well as if the
  `Actor._infected_aio` spawn-entrypoint value has been set (by the
  `._entry.<spawn-backend>_main()` whenever `--asyncio` is passed)
  such that any mismatch can be signaled via an `InternalError`.
- enables checking the `._state._runtime_vars['_is_infected_aio']` value
  directly (say from a non-actor/`trio`-thread) instead of calling
  `._state.current_actor(err_on_no_runtime=False)` in certain edge
  cases.

Impl/testing deats:
- add `._state._runtime_vars['_is_infected_aio'] = False` default.
- raise `InternalError` on any `--asyncio`-flag-passed vs.
  `_runtime_vars`-value-relayed-from-parent inside
  `Actor._from_parent()` and include a `Runner.is_guest` assert for good
  measure B)
- set and relay `infect_asyncio: bool` via runtime-vars to child in
  `ActorNursery.start_actor()`.
- verify `actor.is_infected_aio()`, `actor._infected_aio` and
  `_state._runtime_vars['_is_infected_aio']` are all set in test suite's
  `asyncio_actor()` endpoint.
2024-07-11 13:22:53 -04:00
Tyler Goodlet a628eabb30 Officially test proto-ed `stackscope` integration
By re-purposing our `pexpect`-based console matching with a new
`debugging/shield_hang_in_sub.py` example, this tests a few "hanging
actor" conditions more formally:

- that despite a hanging actor's task we can dump
  a `stackscope.extract()` tree on relay of `SIGUSR1`.
- the actor tree will terminate despite a shielded forever-sleep by our
  "T-800" zombie reaper machinery activating and hard killing the
  underlying subprocess.

Some test deats:
- simulates the expect actions of a real user by manually using
  `os.kill()` to send both signals to the actor-tree program.
- `pexpect`-matches against `log.devx()` emissions under normal
  `debug_mode == True` usage.
- ensure we get the actual "T-800 deployed" `log.error()` msg and
  that the actor tree eventually terminates!

Surrounding (re-org/impl/test-suite) changes:
- allow disabling usage via a `maybe_enable_greenback: bool` to
  `open_root_actor()` but enable by def.
- pretty up the actual `.devx()` content from `.devx._stackscope`
  including be extra pedantic about the conc-primitives for each signal
  event.
- try to avoid double handles of `SIGUSR1` even though it seems the
  original (what i thought was a) problem was actually just double
  logging in the handler..
  |_ avoid double applying the handler func via `signal.signal()`,
  |_ use a global to avoid double handle func calls and,
  |_ a `threading.RLock` around handling.
- move common fixtures and helper routines from `test_debugger` to
  `tests/devx/conftest.py` and import them for use in both test mods.
2024-07-10 19:58:27 -04:00
7 changed files with 178 additions and 23 deletions

View File

@ -0,0 +1,81 @@
'''
Verify we can dump a `stackscope` tree on a hang.
'''
import os
import signal
import trio
import tractor
@tractor.context
async def start_n_shield_hang(
ctx: tractor.Context,
):
# actor: tractor.Actor = tractor.current_actor()
# sync to parent-side task
await ctx.started(os.getpid())
print('Entering shield sleep..')
with trio.CancelScope(shield=True):
await trio.sleep_forever() # in subactor
# XXX NOTE ^^^ since this shields, we expect
# the zombie reaper (aka T800) to engage on
# SIGINT from the user and eventually hard-kill
# this subprocess!
async def main(
from_test: bool = False,
) -> None:
async with (
tractor.open_nursery(
debug_mode=True,
enable_stack_on_sig=True,
# maybe_enable_greenback=False,
loglevel='devx',
) as an,
):
ptl: tractor.Portal = await an.start_actor(
'hanger',
enable_modules=[__name__],
debug_mode=True,
)
async with ptl.open_context(
start_n_shield_hang,
) as (ctx, cpid):
_, proc, _ = an._children[ptl.chan.uid]
assert cpid == proc.pid
print(
'Yo my child hanging..?\n'
'Sending SIGUSR1 to see a tree-trace!\n'
)
# XXX simulate the wrapping test's "user actions"
# (i.e. if a human didn't run this manually but wants to
# know what they should do to reproduce test behaviour)
if from_test:
os.kill(
cpid,
signal.SIGUSR1,
)
# simulate user cancelling program
await trio.sleep(0.5)
os.kill(
os.getpid(),
signal.SIGINT,
)
else:
# actually let user send the ctl-c
await trio.sleep_forever() # in root
if __name__ == '__main__':
trio.run(main)

View File

@ -21,9 +21,11 @@ import trio
import tractor
from tractor import (
current_actor,
Actor,
to_asyncio,
RemoteActorError,
ContextCancelled,
_state,
)
from tractor.trionics import BroadcastReceiver
from tractor._testing import expect_ctxc
@ -80,7 +82,16 @@ async def asyncio_actor(
) -> None:
assert tractor.current_actor().is_infected_aio()
# ensure internal runtime state is consistent
actor: Actor = tractor.current_actor()
assert (
actor.is_infected_aio()
and
actor._infected_aio
and
_state._runtime_vars['_is_infected_aio']
)
target: Callable = globals()[target]
if '.' in expect_err:
@ -136,7 +147,7 @@ def test_aio_simple_error(reg_addr):
assert err
assert isinstance(err, RemoteActorError)
assert err.boxed_type == AssertionError
assert err.boxed_type is AssertionError
def test_tractor_cancels_aio(reg_addr):

View File

@ -20,6 +20,7 @@ Sub-process entry points.
"""
from __future__ import annotations
from functools import partial
import multiprocessing as mp
import os
import textwrap
from typing import (
@ -64,20 +65,22 @@ def _mp_main(
'''
actor._forkserver_info = forkserver_info
from ._spawn import try_set_start_method
spawn_ctx = try_set_start_method(start_method)
spawn_ctx: mp.context.BaseContext = try_set_start_method(start_method)
assert spawn_ctx
if actor.loglevel is not None:
log.info(
f"Setting loglevel for {actor.uid} to {actor.loglevel}")
f'Setting loglevel for {actor.uid} to {actor.loglevel}'
)
get_console_log(actor.loglevel)
assert spawn_ctx
# TODO: use scops headers like for `trio` below!
# (well after we libify it maybe..)
log.info(
f"Started new {spawn_ctx.current_process()} for {actor.uid}")
_state._current_actor = actor
log.debug(f"parent_addr is {parent_addr}")
f'Started new {spawn_ctx.current_process()} for {actor.uid}'
# f"parent_addr is {parent_addr}"
)
_state._current_actor: Actor = actor
trio_main = partial(
async_main,
actor=actor,
@ -94,7 +97,9 @@ def _mp_main(
pass # handle it the same way trio does?
finally:
log.info(f"Subactor {actor.uid} terminated")
log.info(
f'`mp`-subactor {actor.uid} exited'
)
# TODO: move this func to some kinda `.devx._conc_lang.py` eventually

View File

@ -59,6 +59,7 @@ import os
import warnings
import trio
from trio._core import _run as trio_runtime
from trio import (
CancelScope,
Nursery,
@ -80,6 +81,7 @@ from ._context import (
from .log import get_logger
from ._exceptions import (
ContextCancelled,
InternalError,
ModuleNotExposed,
MsgTypeError,
unpack_error,
@ -102,6 +104,7 @@ from ._rpc import (
if TYPE_CHECKING:
from ._supervise import ActorNursery
from trio._channel import MemoryChannelState
log = get_logger('tractor')
@ -897,11 +900,15 @@ class Actor:
f'peer: {chan.uid}\n'
f'cid:{cid}\n'
)
ctx._allow_overruns = allow_overruns
ctx._allow_overruns: bool = allow_overruns
# adjust buffer size if specified
state = ctx._send_chan._state # type: ignore
if msg_buffer_size and state.max_buffer_size != msg_buffer_size:
state: MemoryChannelState = ctx._send_chan._state # type: ignore
if (
msg_buffer_size
and
state.max_buffer_size != msg_buffer_size
):
state.max_buffer_size = msg_buffer_size
except KeyError:
@ -1095,7 +1102,36 @@ class Actor:
'`tractor.pause_from_sync()` not available!'
)
rvs['_is_root'] = False
# XXX ensure the "infected `asyncio` mode" setting
# passed down from our spawning parent is consistent
# with `trio`-runtime initialization:
# - during sub-proc boot, the entrypoint func
# (`._entry.<spawn_backend>_main()`) should set
# `._infected_aio = True` before calling
# `run_as_asyncio_guest()`,
# - the value of `infect_asyncio: bool = True` as
# passed to `ActorNursery.start_actor()` must be
# the same as `_runtime_vars['_is_infected_aio']`
if (
(aio_rtv := rvs['_is_infected_aio'])
!=
(aio_attr := self._infected_aio)
):
raise InternalError(
'Parent sent runtime-vars that mismatch for the '
'"infected `asyncio` mode" settings ?!?\n\n'
f'rvs["_is_infected_aio"] = {aio_rtv}\n'
f'self._infected_aio = {aio_attr}\n'
)
if aio_rtv:
assert trio_runtime.GLOBAL_RUN_CONTEXT.runner.is_guest
# ^TODO^ possibly add a `sniffio` or
# `trio` pub-API for `is_guest_mode()`?
rvs['_is_root'] = False # obvi XD
# update process-wide globals
_state._runtime_vars.update(rvs)
# XXX: ``msgspec`` doesn't support serializing tuples

View File

@ -44,6 +44,8 @@ _runtime_vars: dict[str, Any] = {
'_root_mailbox': (None, None),
'_registry_addrs': [],
'_is_infected_aio': False,
# for `tractor.pause_from_sync()` & `breakpoint()` support
'use_greenback': False,
}
@ -70,7 +72,8 @@ def current_actor(
'''
if (
err_on_no_runtime
and _current_actor is None
and
_current_actor is None
):
msg: str = 'No local actor has been initialized yet?\n'
from ._exceptions import NoRuntime

View File

@ -158,6 +158,7 @@ class ActorNursery:
# configure and pass runtime state
_rtv = _state._runtime_vars.copy()
_rtv['_is_root'] = False
_rtv['_is_infected_aio'] = infect_asyncio
# allow setting debug policy per actor
if debug_mode is not None:

View File

@ -276,7 +276,10 @@ def _run_asyncio_task(
chan._aio_task: asyncio.Task = task
# XXX TODO XXX get this actually workin.. XD
# maybe setup `greenback` for `asyncio`-side task REPLing
# -[ ] we need logic to setup `greenback` for `asyncio`-side task
# REPLing.. which should normally be nearly the same as for
# `trio`?
# -[ ] add to a new `.devx._greenback.maybe_init_for_asyncio()`?
if (
debug_mode()
and
@ -305,15 +308,22 @@ def _run_asyncio_task(
msg: str = (
'Infected `asyncio` task {etype_str}\n'
f'|_{task}\n'
)
if isinstance(terr, CancelledError):
msg += (
f'c)>\n'
f' |_{task}\n'
)
log.cancel(
msg.format(etype_str='cancelled')
)
else:
msg += (
f'x)>\n'
f' |_{task}\n'
)
log.exception(
msg.format(etype_str='cancelled')
msg.format(etype_str='errored')
)
assert type(terr) is type(aio_err), (
@ -619,9 +629,10 @@ def run_as_asyncio_guest(
# )
def trio_done_callback(main_outcome):
log.info(
f'trio_main finished with\n'
f'|_{main_outcome!r}'
log.runtime(
f'`trio` guest-run finishing with outcome\n'
f'>) {main_outcome}\n'
f'|_{trio_done_fute}\n'
)
if isinstance(main_outcome, Error):
@ -643,6 +654,12 @@ def run_as_asyncio_guest(
else:
trio_done_fute.set_result(main_outcome)
log.info(
f'`trio` guest-run finished with outcome\n'
f')>\n'
f'|_{trio_done_fute}\n'
)
startup_msg += (
f'-> created {trio_done_callback!r}\n'
f'-> scheduling `trio_main`: {trio_main!r}\n'
@ -681,7 +698,8 @@ def run_as_asyncio_guest(
# error path in `asyncio`'s runtime..?
asyncio.CancelledError,
) as fute_err:
) as _fute_err:
fute_err = _fute_err
err_message: str = (
'main `asyncio` task '
)