piker/piker/_daemon.py

759 lines
20 KiB
Python
Raw Normal View History

2021-03-26 15:51:18 +00:00
# piker: trading gear for hackers
# Copyright (C) Tyler Goodlet (in stewardship for piker0)
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
2021-03-26 15:51:18 +00:00
Structured, daemon tree service management.
"""
from __future__ import annotations
import os
from typing import (
Optional,
Callable,
Any,
ClassVar,
)
from contextlib import (
asynccontextmanager as acm,
)
from collections import defaultdict
2022-07-06 12:58:52 +00:00
import tractor
import trio
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
from trio_typing import TaskStatus
from .log import (
get_logger,
get_console_log,
)
from .brokers import get_brokermod
from pprint import pformat
from functools import partial
log = get_logger(__name__)
_root_dname = 'pikerd'
_default_registry_host: str = '127.0.0.1'
_default_registry_port: int = 6116
_default_reg_addr: tuple[str, int] = (
_default_registry_host,
_default_registry_port,
2022-11-07 15:21:52 +00:00
)
# NOTE: this value is set as an actor-global once the first endpoint
# who is capable, spawns a `pikerd` service tree.
_registry: Registry | None = None
class Registry:
addr: None | tuple[str, int] = None
# TODO: table of uids to sockaddrs
peers: dict[
tuple[str, str],
tuple[str, int],
] = {}
_tractor_kwargs: dict[str, Any] = {}
@acm
async def open_registry(
addr: None | tuple[str, int] = None,
ensure_exists: bool = True,
) -> tuple[str, int]:
global _tractor_kwargs
actor = tractor.current_actor()
uid = actor.uid
if (
Registry.addr is not None
and addr
):
raise RuntimeError(
f'`{uid}` registry addr already bound @ {_registry.sockaddr}'
)
was_set: bool = False
if (
not tractor.is_root_process()
and Registry.addr is None
):
Registry.addr = actor._arb_addr
if (
ensure_exists
and Registry.addr is None
):
raise RuntimeError(
f"`{uid}` registry should already exist bug doesn't?"
)
if (
Registry.addr is None
):
was_set = True
Registry.addr = addr or _default_reg_addr
_tractor_kwargs['arbiter_addr'] = Registry.addr
try:
yield Registry.addr
finally:
# XXX: always clear the global addr if we set it so that the
# next (set of) calls will apply whatever new one is passed
# in.
if was_set:
Registry.addr = None
def get_tractor_runtime_kwargs() -> dict[str, Any]:
'''
Deliver ``tractor`` related runtime variables in a `dict`.
'''
return _tractor_kwargs
_root_modules = [
__name__,
'piker.clearing._ems',
'piker.clearing._client',
'piker.data._sampling',
]
# TODO: factor this into a ``tractor.highlevel`` extension
# pack for the library.
class Services:
2021-06-25 04:44:02 +00:00
actor_n: tractor._supervise.ActorNursery
service_n: trio.Nursery
debug_mode: bool # tractor sub-actor debug mode flag
service_tasks: dict[
str,
tuple[
trio.CancelScope,
tractor.Portal,
trio.Event,
]
] = {}
locks = defaultdict(trio.Lock)
@classmethod
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
async def start_service_task(
self,
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
name: str,
portal: tractor.Portal,
target: Callable,
**kwargs,
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
) -> (trio.CancelScope, tractor.Context):
'''
Open a context in a service sub-actor, add to a stack
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
that gets unwound at ``pikerd`` teardown.
This allows for allocating long-running sub-services in our main
daemon and explicitly controlling their lifetimes.
'''
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
async def open_context_in_task(
task_status: TaskStatus[
tuple[
trio.CancelScope,
trio.Event,
Any,
]
] = trio.TASK_STATUS_IGNORED,
2021-06-25 04:44:02 +00:00
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
) -> Any:
2021-06-25 04:44:02 +00:00
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
with trio.CancelScope() as cs:
async with portal.open_context(
target,
**kwargs,
2021-06-25 04:44:02 +00:00
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
) as (ctx, first):
2021-06-25 04:44:02 +00:00
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
# unblock once the remote context has started
complete = trio.Event()
task_status.started((cs, complete, first))
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
log.info(
f'`pikerd` service {name} started with value {first}'
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
)
try:
# wait on any context's return value
# and any final portal result from the
# sub-actor.
ctx_res = await ctx.result()
# NOTE: blocks indefinitely until cancelled
# either by error from the target context
# function or by being cancelled here by the
# surrounding cancel scope.
return (await portal.result(), ctx_res)
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
finally:
await portal.cancel_actor()
complete.set()
self.service_tasks.pop(name)
cs, complete, first = await self.service_n.start(open_context_in_task)
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
# store the cancel scope and portal for later cancellation or
# retstart if needed.
self.service_tasks[name] = (cs, portal, complete)
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
return cs, first
@classmethod
async def cancel_service(
self,
name: str,
) -> Any:
'''
Cancel the service task and actor for the given ``name``.
'''
log.info(f'Cancelling `pikerd` service {name}')
cs, portal, complete = self.service_tasks[name]
cs.cancel()
await complete.wait()
assert name not in self.service_tasks, \
f'Serice task for {name} not terminated?'
2022-03-07 12:18:53 +00:00
@acm
async def open_piker_runtime(
name: str,
enable_modules: list[str] = [],
loglevel: Optional[str] = None,
# XXX NOTE XXX: you should pretty much never want debug mode
# for data daemons when running in production.
debug_mode: bool = False,
2022-11-07 15:21:52 +00:00
registry_addr: None | tuple[str, int] = None,
# TODO: once we have `rsyscall` support we will read a config
# and spawn the service tree distributed per that.
start_method: str = 'trio',
2023-01-13 23:51:04 +00:00
**tractor_kwargs,
) -> tuple[
tractor.Actor,
tuple[str, int],
]:
'''
Start a piker actor who's runtime will automatically sync with
existing piker actors on the local link based on configuration.
Can be called from a subactor or any program that needs to start
a root actor.
'''
try:
# check for existing runtime
actor = tractor.current_actor().uid
except tractor._exceptions.NoRuntime:
registry_addr = registry_addr or _default_reg_addr
async with (
tractor.open_root_actor(
# passed through to ``open_root_actor``
arbiter_addr=registry_addr,
name=name,
loglevel=loglevel,
debug_mode=debug_mode,
start_method=start_method,
# TODO: eventually we should be able to avoid
# having the root have more then permissions to
# spawn other specialized daemons I think?
enable_modules=enable_modules,
**tractor_kwargs,
) as _,
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
open_registry(registry_addr, ensure_exists=False) as addr,
):
yield (
tractor.current_actor(),
addr,
)
else:
async with open_registry(registry_addr) as addr:
yield (
actor,
addr,
)
2022-03-07 12:18:53 +00:00
@acm
async def open_pikerd(
loglevel: str | None = None,
# XXX: you should pretty much never want debug mode
# for data daemons when running in production.
debug_mode: bool = False,
registry_addr: None | tuple[str, int] = None,
# db init flags
tsdb: bool = False,
es: bool = False,
) -> Services:
'''
Start a root piker daemon who's lifetime extends indefinitely until
cancelled.
A root actor nursery is created which can be used to create and keep
alive underling services (see below).
'''
async with (
open_piker_runtime(
name=_root_dname,
# TODO: eventually we should be able to avoid
# having the root have more then permissions to
# spawn other specialized daemons I think?
enable_modules=_root_modules,
loglevel=loglevel,
debug_mode=debug_mode,
registry_addr=registry_addr,
) as (root_actor, reg_addr),
tractor.open_nursery() as actor_nursery,
trio.open_nursery() as service_nursery,
):
assert root_actor.accept_addr == reg_addr
if tsdb:
from piker.data._ahab import start_ahab
from piker.data.marketstore import start_marketstore
log.info('Spawning `marketstore` supervisor')
ctn_ready, config, (cid, pid) = await service_nursery.start(
start_ahab,
'marketstored',
start_marketstore,
)
log.info(
f'`marketstored` up!\n'
f'pid: {pid}\n'
f'container id: {cid[:12]}\n'
f'config: {pformat(config)}'
)
if es:
from piker.data._ahab import start_ahab
from piker.data.elastic import start_elasticsearch
log.info('Spawning `elasticsearch` supervisor')
ctn_ready, config, (cid, pid) = await service_nursery.start(
partial(
start_ahab,
'elasticsearch',
start_elasticsearch,
start_timeout=240.0 # high cause ci
)
)
log.info(
f'`elasticsearch` up!\n'
f'pid: {pid}\n'
f'container id: {cid[:12]}\n'
f'config: {pformat(config)}'
)
# assign globally for future daemon/task creation
Services.actor_n = actor_nursery
Services.service_n = service_nursery
Services.debug_mode = debug_mode
try:
yield Services
finally:
# TODO: is this more clever/efficient?
# if 'samplerd' in Services.service_tasks:
# await Services.cancel_service('samplerd')
service_nursery.cancel_scope.cancel()
2022-03-07 12:18:53 +00:00
@acm
2021-04-05 12:01:26 +00:00
async def maybe_open_runtime(
loglevel: Optional[str] = None,
**kwargs,
2021-04-05 12:01:26 +00:00
) -> None:
'''
Start the ``tractor`` runtime (a root actor) if none exists.
'''
name = kwargs.pop('name')
2021-04-05 12:01:26 +00:00
if not tractor.current_actor(err_on_no_runtime=False):
async with open_piker_runtime(
name,
loglevel=loglevel,
**kwargs,
) as (_, addr):
yield addr,
2021-04-05 12:01:26 +00:00
else:
async with open_registry() as addr:
yield addr
2021-04-05 12:01:26 +00:00
2022-03-07 12:18:53 +00:00
@acm
async def maybe_open_pikerd(
loglevel: Optional[str] = None,
registry_addr: None | tuple = None,
tsdb: bool = False,
es: bool = False,
**kwargs,
) -> tractor._portal.Portal | ClassVar[Services]:
'''
If no ``pikerd`` daemon-root-actor can be found start it and
yield up (we should probably figure out returning a portal to self
though).
'''
if loglevel:
get_console_log(loglevel)
2021-04-05 12:01:26 +00:00
# subtle, we must have the runtime up here or portal lookup will fail
query_name = kwargs.pop('name', f'piker_query_{os.getpid()}')
# TODO: if we need to make the query part faster we could not init
# an actor runtime and instead just hit the socket?
# from tractor._ipc import _connect_chan, Channel
# async with _connect_chan(host, port) as chan:
# async with open_portal(chan) as arb_portal:
# yield arb_portal
async with (
open_piker_runtime(
name=query_name,
registry_addr=registry_addr,
loglevel=loglevel,
**kwargs,
) as _,
tractor.find_actor(
_root_dname,
arbiter_sockaddr=registry_addr,
) as portal
):
# connect to any existing daemon presuming
# its registry socket was selected.
if (
portal is not None
):
yield portal
return
2021-04-05 12:01:26 +00:00
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
# presume pikerd role since no daemon could be found at
# configured address
2021-04-05 12:01:26 +00:00
async with open_pikerd(
2021-04-15 12:23:08 +00:00
loglevel=loglevel,
2021-05-10 14:15:29 +00:00
debug_mode=kwargs.get('debug_mode', False),
registry_addr=registry_addr,
tsdb=tsdb,
es=es,
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
) as service_manager:
2021-04-05 12:01:26 +00:00
# in the case where we're starting up the
# tractor-piker runtime stack in **this** process
# we return no portal to self.
assert service_manager
yield service_manager
# `brokerd` enabled modules
# NOTE: keeping this list as small as possible is part of our caps-sec
# model and should be treated with utmost care!
_data_mods = [
'piker.brokers.core',
'piker.brokers.data',
'piker.data',
'piker.data.feed',
2021-04-06 18:08:50 +00:00
'piker.data._sampling'
]
2022-03-07 12:18:53 +00:00
@acm
async def find_service(
service_name: str,
) -> tractor.Portal | None:
2022-03-07 12:18:53 +00:00
async with open_registry() as reg_addr:
log.info(f'Scanning for service `{service_name}`')
# attach to existing daemon by name if possible
async with tractor.find_actor(
service_name,
arbiter_sockaddr=reg_addr,
) as maybe_portal:
yield maybe_portal
2022-03-07 12:18:53 +00:00
async def check_for_service(
service_name: str,
) -> None | tuple[str, int]:
2022-03-07 12:18:53 +00:00
'''
Service daemon "liveness" predicate.
'''
async with open_registry(ensure_exists=False) as reg_addr:
async with tractor.query_actor(
service_name,
arbiter_sockaddr=reg_addr,
) as sockaddr:
return sockaddr
2022-03-07 12:18:53 +00:00
@acm
async def maybe_spawn_daemon(
service_name: str,
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
service_task_target: Callable,
spawn_args: dict[str, Any],
loglevel: Optional[str] = None,
singleton: bool = False,
**kwargs,
) -> tractor.Portal:
2022-02-17 21:34:22 +00:00
'''
If no ``service_name`` daemon-actor can be found,
spawn one in a local subactor and return a portal to it.
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
If this function is called from a non-pikerd actor, the
spawned service will persist as long as pikerd does or
it is requested to be cancelled.
This can be seen as a service starting api for remote-actor
clients.
2022-02-17 21:34:22 +00:00
'''
if loglevel:
get_console_log(loglevel)
# serialize access to this section to avoid
# 2 or more tasks racing to create a daemon
lock = Services.locks[service_name]
await lock.acquire()
2022-03-07 12:18:53 +00:00
async with find_service(service_name) as portal:
if portal is not None:
lock.release()
yield portal
return
2022-03-07 12:18:53 +00:00
log.warning(f"Couldn't find any existing {service_name}")
# TODO: really shouldn't the actor spawning be part of the service
# starting method `Services.start_service()` ?
# ask root ``pikerd`` daemon to spawn the daemon we need if
# pikerd is not live we now become the root of the
# process tree
async with maybe_open_pikerd(
loglevel=loglevel,
**kwargs,
) as pikerd_portal:
# we are the root and thus are `pikerd`
# so spawn the target service directly by calling
# the provided target routine.
# XXX: this assumes that the target is well formed and will
# do the right things to setup both a sub-actor **and** call
# the ``_Services`` api from above to start the top level
# service task for that actor.
started: bool
if pikerd_portal is None:
started = await service_task_target(**spawn_args)
else:
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
# tell the remote `pikerd` to start the target,
# the target can't return a non-serializable value
# since it is expected that service startingn is
# non-blocking and the target task will persist running
# on `pikerd` after the client requesting it's start
# disconnects.
started = await pikerd_portal.run(
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
service_task_target,
**spawn_args,
)
if started:
log.info(f'Service {service_name} started!')
async with tractor.wait_for_actor(service_name) as portal:
lock.release()
yield portal
await portal.cancel_actor()
async def spawn_brokerd(
brokername: str,
loglevel: Optional[str] = None,
**tractor_kwargs,
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
) -> bool:
log.info(f'Spawning {brokername} broker daemon')
brokermod = get_brokermod(brokername)
dname = f'brokerd.{brokername}'
extra_tractor_kwargs = getattr(brokermod, '_spawn_kwargs', {})
tractor_kwargs.update(extra_tractor_kwargs)
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
# ask `pikerd` to spawn a new sub-actor and manage it under its
# actor nursery
modpath = brokermod.__name__
broker_enable = [modpath]
for submodname in getattr(
brokermod,
'__enable_modules__',
[],
):
subpath = f'{modpath}.{submodname}'
broker_enable.append(subpath)
portal = await Services.actor_n.start_actor(
dname,
enable_modules=_data_mods + broker_enable,
loglevel=loglevel,
debug_mode=Services.debug_mode,
**tractor_kwargs
)
2021-03-29 12:34:56 +00:00
# non-blocking setup of brokerd service nursery
from .data import _setup_persistent_brokerd
await Services.start_service_task(
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
dname,
portal,
_setup_persistent_brokerd,
brokername=brokername,
2021-03-29 12:34:56 +00:00
)
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
return True
2022-03-07 12:18:53 +00:00
@acm
async def maybe_spawn_brokerd(
brokername: str,
loglevel: Optional[str] = None,
2021-04-10 16:15:05 +00:00
**kwargs,
) -> tractor.Portal:
2022-02-17 21:34:22 +00:00
'''
Helper to spawn a brokerd service *from* a client
who wishes to use the sub-actor-daemon.
'''
async with maybe_spawn_daemon(
f'brokerd.{brokername}',
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
service_task_target=spawn_brokerd,
spawn_args={'brokername': brokername, 'loglevel': loglevel},
2021-04-10 16:15:05 +00:00
loglevel=loglevel,
**kwargs,
) as portal:
yield portal
async def spawn_emsd(
loglevel: Optional[str] = None,
**extra_tractor_kwargs
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
) -> bool:
"""
Start the clearing engine under ``pikerd``.
"""
log.info('Spawning emsd')
portal = await Services.actor_n.start_actor(
'emsd',
enable_modules=[
'piker.clearing._ems',
'piker.clearing._client',
],
loglevel=loglevel,
debug_mode=Services.debug_mode, # set by pikerd flag
**extra_tractor_kwargs
)
# non-blocking setup of clearing service
from .clearing._ems import _setup_persistent_emsd
await Services.start_service_task(
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
'emsd',
portal,
_setup_persistent_emsd,
)
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
return True
2022-03-07 12:18:53 +00:00
@acm
async def maybe_open_emsd(
brokername: str,
loglevel: Optional[str] = None,
**kwargs,
) -> tractor._portal.Portal: # noqa
async with maybe_spawn_daemon(
'emsd',
Better formalize `pikerd` service semantics An async exit stack around the new `@tractor.context` is problematic since a pushed context can't bubble errors unless the exit stack has been closed. But in that case why do you need the exit stack if you're going to push it and wait it right away; it seems more correct to use a nursery and spawn a task in `pikerd` that waits on the both the target context completion first (thus being able to bubble up any errors from the remote, and top level service task) and the sub-actor portal. (Sub)service Daemons are spawned with `.start_actor()` and thus will block forever until cancelled so, add a way to cancel them explicitly which we'll need eventually for restarts and dynamic feed management. The big lesson here is that async exit stacks are not conducive to spawning and monitoring service tasks, and especially so if a `@tractor.context` is used since if the `.open_context()` call isn't exited (only possible by the stack being closed), then there will be no way for `trio` to cancel the task that pushed that context (since it can't run a checkpoint while yielded inside the stack) without also cancelling all other contexts pushed on that stack. Presuming one `pikerd` task is used to do the original pushing (which it was) then any error would have to kill all service daemon tasks which obviously won't work. I see this mostly as the painz of tinkering out an SC service manager with `tractor` / `trio` for the first time, so try to go easy on the process ;P
2021-06-26 20:01:21 +00:00
service_task_target=spawn_emsd,
spawn_args={'loglevel': loglevel},
loglevel=loglevel,
**kwargs,
) as portal:
yield portal