forked from goodboy/tractor
1
0
Fork 0
tractor/tractor/_root.py

315 lines
9.3 KiB
Python
Raw Normal View History

Re-license code base for distribution under AGPL This commit obviously denotes a re-license of all applicable parts of the code base. Acknowledgement of this change was completed in #274 by the majority of the current set of contributors. From here henceforth all changes will be AGPL licensed and distributed. This is purely an effort to maintain the same copy-left policy whilst closing the (perceived) SaaS loophole the GPL allows for. It is merely for this loophole: to avoid code hiding by any potential "network providers" who are attempting to use the project to make a profit without either compensating the authors or re-distributing their changes. I thought quite a bit about this change and can't see a reason not to close the SaaS loophole in our current license. We still are (hard) copy-left and I plan to keep the code base this way for a couple reasons: - The code base produces income/profit through parent projects and is demonstrably of high value. - I believe firms should not get free lunch for the sake of "contributions from their employees" or "usage as a service" which I have found to be a dubious argument at best. - If a firm who intends to profit from the code base wants to use it they can propose a secondary commercial license to purchase with the proceeds going to the project's authors under some form of well defined contract. - Many successful projects like Qt use this model; I see no reason it can't work in this case until such a time as the authors feel it should be loosened. There has been detailed discussion in #103 on licensing alternatives. The main point of this AGPL change is to protect the code base for the time being from exploitation while it grows and as we move into the next phase of development which will include extension into the multi-host distributed software space.
2021-12-13 18:08:32 +00:00
# tractor: structured concurrent "actors".
# Copyright 2018-eternity Tyler Goodlet.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
'''
Root actor runtime ignition(s).
'''
from contextlib import asynccontextmanager
from functools import partial
import importlib
import logging
import signal
import sys
import os
import typing
2020-12-27 16:51:38 +00:00
import warnings
from exceptiongroup import BaseExceptionGroup
import trio
from ._runtime import (
Actor,
Arbiter,
async_main,
)
from . import _debug
from . import _spawn
from . import _state
from . import log
from ._ipc import _connect_chan
from ._exceptions import is_multi_cancelled
# set at startup and after forks
2021-09-08 00:24:02 +00:00
_default_arbiter_host: str = '127.0.0.1'
_default_arbiter_port: int = 1616
logger = log.get_logger('tractor')
@asynccontextmanager
async def open_root_actor(
2020-12-27 16:51:38 +00:00
*,
2020-12-27 16:51:38 +00:00
# defaults are above
arbiter_addr: tuple[str, int] | None = None,
# defaults are above
registry_addr: tuple[str, int] | None = None,
2020-12-27 16:51:38 +00:00
2023-01-26 17:43:06 +00:00
name: str | None = 'root',
2020-12-27 16:51:38 +00:00
# either the `multiprocessing` start method:
# https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
# OR `trio` (the new default).
2023-01-26 17:43:06 +00:00
start_method: _spawn.SpawnMethodKey | None = None,
2020-12-27 16:51:38 +00:00
# enables the multi-process debugger support
debug_mode: bool = False,
2020-12-27 16:51:38 +00:00
2021-01-03 02:34:39 +00:00
# internal logging
2023-01-26 17:43:06 +00:00
loglevel: str | None = None,
2021-01-03 02:34:39 +00:00
2023-01-26 17:43:06 +00:00
enable_modules: list | None = None,
rpc_module_paths: list | None = None,
2021-01-03 02:34:39 +00:00
) -> typing.Any:
'''
Runtime init entry point for ``tractor``.
'''
# Override the global debugger hook to make it play nice with
# ``trio``, see much discussion in:
# https://github.com/python-trio/trio/issues/1155#issuecomment-742964018
builtin_bp_handler = sys.breakpointhook
orig_bp_path: str | None = os.environ.get('PYTHONBREAKPOINT', None)
os.environ['PYTHONBREAKPOINT'] = 'tractor._debug._set_trace'
# attempt to retreive ``trio``'s sigint handler and stash it
# on our debugger lock state.
2023-01-26 17:43:06 +00:00
_debug.Lock._trio_handler = signal.getsignal(signal.SIGINT)
# mark top most level process as root actor
_state._runtime_vars['_is_root'] = True
2021-01-03 02:34:39 +00:00
# caps based rpc list
2021-01-05 13:28:06 +00:00
enable_modules = enable_modules or []
if rpc_module_paths:
warnings.warn(
"`rpc_module_paths` is now deprecated, use "
" `enable_modules` instead.",
DeprecationWarning,
stacklevel=2,
)
enable_modules.extend(rpc_module_paths)
2021-01-03 02:34:39 +00:00
if start_method is not None:
_spawn.try_set_start_method(start_method)
if arbiter_addr is not None:
warnings.warn(
'`arbiter_addr` is now deprecated and has been renamed to'
'`registry_addr`.\nUse that instead..',
DeprecationWarning,
stacklevel=2,
)
registry_addr = (host, port) = (
registry_addr
or arbiter_addr
or (
_default_arbiter_host,
_default_arbiter_port,
)
)
loglevel = (loglevel or log._default_loglevel).upper()
2021-11-28 17:46:57 +00:00
if debug_mode and _spawn._spawn_method == 'trio':
_state._runtime_vars['_debug_mode'] = True
# expose internal debug module to every actor allowing
# for use of ``await tractor.breakpoint()``
2021-01-05 13:28:06 +00:00
enable_modules.append('tractor._debug')
# if debug mode get's enabled *at least* use that level of
# logging for some informative console prompts.
if (
logging.getLevelName(
# lul, need the upper case for the -> int map?
# sweet "dynamic function behaviour" stdlib...
loglevel,
) > logging.getLevelName('PDB')
):
loglevel = 'PDB'
2021-07-08 17:02:33 +00:00
elif debug_mode:
raise RuntimeError(
"Debug mode is only supported for the `trio` backend!"
)
log.get_console_log(loglevel)
try:
# make a temporary connection to see if an arbiter exists,
# if one can't be made quickly we assume none exists.
arbiter_found = False
# TODO: this connect-and-bail forces us to have to carefully
# rewrap TCP 104-connection-reset errors as EOF so as to avoid
# propagating cancel-causing errors to the channel-msg loop
# machinery. Likely it would be better to eventually have
# a "discovery" protocol with basic handshake instead.
with trio.move_on_after(1):
async with _connect_chan(host, port):
arbiter_found = True
except OSError:
# TODO: make this a "discovery" log level?
2023-01-26 17:43:06 +00:00
logger.warning(f"No actor registry found @ {host}:{port}")
# create a local actor and start up its main routine/task
2020-12-27 16:51:38 +00:00
if arbiter_found:
# we were able to connect to an arbiter
logger.info(f"Arbiter seems to exist @ {host}:{port}")
actor = Actor(
name or 'anonymous',
arbiter_addr=registry_addr,
2021-01-03 02:34:39 +00:00
loglevel=loglevel,
2021-01-05 13:28:06 +00:00
enable_modules=enable_modules,
)
host, port = (host, 0)
else:
2021-01-03 02:34:39 +00:00
# start this local actor as the arbiter (aka a regular actor who
# manages the local registry of "mailboxes")
# Note that if the current actor is the arbiter it is desirable
# for it to stay up indefinitely until a re-election process has
# taken place - which is not implemented yet FYI).
actor = Arbiter(
name or 'arbiter',
arbiter_addr=registry_addr,
2021-01-03 02:34:39 +00:00
loglevel=loglevel,
2021-01-05 13:28:06 +00:00
enable_modules=enable_modules,
)
try:
# assign process-local actor
_state._current_actor = actor
# start local channel-server and fake the portal API
# NOTE: this won't block since we provide the nursery
logger.info(f"Starting local {actor} @ {host}:{port}")
# start the actor runtime in a new task
async with trio.open_nursery() as nursery:
2022-08-03 19:29:34 +00:00
# ``_runtime.async_main()`` creates an internal nursery and
# thus blocks here until the entire underlying actor tree has
# terminated thereby conducting structured concurrency.
await nursery.start(
partial(
2022-08-03 19:29:34 +00:00
async_main,
actor,
accept_addr=(host, port),
parent_addr=None
)
)
try:
yield actor
2021-02-24 17:59:43 +00:00
except (
Exception,
BaseExceptionGroup,
) as err:
2021-08-01 14:43:21 +00:00
entered = await _debug._maybe_enter_pm(err)
if not entered and not is_multi_cancelled(err):
logger.exception("Root actor crashed:")
2021-04-29 02:19:36 +00:00
# always re-raise
raise
2021-04-29 02:19:36 +00:00
finally:
# NOTE: not sure if we'll ever need this but it's
# possibly better for even more determinism?
# logger.cancel(
# f'Waiting on {len(nurseries)} nurseries in root..')
# nurseries = actor._actoruid2nursery.values()
# async with trio.open_nursery() as tempn:
# for an in nurseries:
# tempn.start_soon(an.exited.wait)
logger.cancel("Shutting down root actor")
Remote `Context` cancellation semantics rework B) This adds remote cancellation semantics to our `tractor.Context` machinery to more closely match that of `trio.CancelScope` but with operational differences to handle the nature of parallel tasks interoperating across multiple memory boundaries: - if an actor task cancels some context it has opened via `Context.cancel()`, the remote (scope linked) task will be cancelled using the normal `CancelScope` semantics of `trio` meaning the remote cancel scope surrounding the far side task is cancelled and `trio.Cancelled`s are expected to be raised in that scope as per normal `trio` operation, and in the case where no error is raised in that remote scope, a `ContextCancelled` error is raised inside the runtime machinery and relayed back to the opener/caller side of the context. - if any actor task cancels a full remote actor runtime using `Portal.cancel_actor()` the same semantics as above apply except every other remote actor task which also has an open context with the actor which was cancelled will also be sent a `ContextCancelled` **but** with the `.canceller` field set to the uid of the original cancel requesting actor. This changeset also includes a more "proper" solution to the issue of "allowing overruns" during streaming without attempting to implement any form of IPC streaming backpressure. Implementing task-granularity backpressure cross-process turns out to be more or less impossible without augmenting out streaming protocol (likely at the cost of performance). Further allowing overruns requires special care since any blocking of the runtime RPC msg loop task effectively can block control msgs such as cancels and stream terminations. The implementation details per abstraction layer are as follows. ._streaming.Context: - add a new contructor factor func `mk_context()` which provides a strictly private init-er whilst allowing us to not have to define an `.__init__()` on the type def. - add public `.cancel_called` and `.cancel_called_remote` properties. - general rename of what was the internal `._backpressure` var to `._allow_overruns: bool`. - move the old contents of `Actor._push_result()` into a new `._deliver_msg()` allowing for better encapsulation of per-ctx msg handling. - always check for received 'error' msgs and process them with the new `_maybe_cancel_and_set_remote_error()` **before** any msg delivery to the local task, thus guaranteeing error and cancellation handling despite any overflow handling. - add a new `._drain_overflows()` task-method for use with new `._allow_overruns: bool = True` mode. - add back a `._scope_nursery: trio.Nursery` (allocated in `Portal.open_context()`) who's sole purpose is to spawn a single task which runs the above method; anything else is an error. - augment `._deliver_msg()` to start a task and run the above method when operating in no overrun mode; the task queues overflow msgs and attempts to send them to the underlying mem chan using a blocking `.send()` call. - on context exit, any existing "drainer task" will be cancelled and remaining overflow queued msgs are discarded with a warning. - rename `._error` -> `_remote_error` and set it in a new method `_maybe_cancel_and_set_remote_error()` which is called before processing - adjust `.result()` to always call `._maybe_raise_remote_err()` at its start such that whenever a `ContextCancelled` arrives we do logic for whether or not to immediately raise that error or ignore it due to the current actor being the one who requested the cancel, by checking the error's `.canceller` field. - set the default value of `._result` to be `id(Context()` thus avoiding conflict with any `.result()` actually being `False`.. ._runtime.Actor: - augment `.cancel()` and `._cancel_task()` and `.cancel_rpc_tasks()` to take a `requesting_uid: tuple` indicating the source actor of every cancellation request. - pass through the new `Context._allow_overruns` through `.get_context()` - call the new `Context._deliver_msg()` from `._push_result()` (since the factoring out that method's contents). ._runtime._invoke: - `TastStatus.started()` back a `Context` (unless an error is raised) instead of the cancel scope to make it easy to set/get state on that context for the purposes of cancellation and remote error relay. - always raise any remote error via `Context._maybe_raise_remote_err()` before doing any `ContextCancelled` logic. - assign any `Context._cancel_called_remote` set by the `requesting_uid` cancel methods (mentioned above) to the `ContextCancelled.canceller`. ._runtime.process_messages: - always pass a `requesting_uid: tuple` to `Actor.cancel()` and `._cancel_task` to that any corresponding `ContextCancelled.canceller` can be set inside `._invoke()`.
2023-04-13 20:03:35 +00:00
await actor.cancel(
requesting_uid=actor.uid,
)
finally:
_state._current_actor = None
# restore breakpoint hook state
sys.breakpointhook = builtin_bp_handler
if orig_bp_path is not None:
os.environ['PYTHONBREAKPOINT'] = orig_bp_path
else:
# clear env back to having no entry
os.environ.pop('PYTHONBREAKPOINT')
logger.runtime("Root actor terminated")
def run_daemon(
enable_modules: list[str],
# runtime kwargs
2023-01-26 17:43:06 +00:00
name: str | None = 'root',
registry_addr: tuple[str, int] = (
_default_arbiter_host,
_default_arbiter_port,
),
2023-01-26 17:43:06 +00:00
start_method: str | None = None,
debug_mode: bool = False,
**kwargs
) -> None:
'''
Spawn daemon actor which will respond to RPC; the main task simply
starts the runtime and then sleeps forever.
This is a very minimal convenience wrapper around starting
a "run-until-cancelled" root actor which can be started with a set
of enabled modules for RPC request handling.
'''
kwargs['enable_modules'] = list(enable_modules)
for path in enable_modules:
importlib.import_module(path)
async def _main():
async with open_root_actor(
registry_addr=registry_addr,
name=name,
start_method=start_method,
debug_mode=debug_mode,
**kwargs,
):
return await trio.sleep_forever()
return trio.run(_main)