255 lines
8.4 KiB
Python
255 lines
8.4 KiB
Python
# piker: trading gear for hackers
|
|
# Copyright (C) Tyler Goodlet (in stewardship for pikers)
|
|
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
"""
|
|
Daemon-actor spawning "endpoint-hooks".
|
|
|
|
"""
|
|
from __future__ import annotations
|
|
from typing import (
|
|
Callable,
|
|
Any,
|
|
)
|
|
from contextlib import (
|
|
asynccontextmanager as acm,
|
|
)
|
|
from collections import defaultdict
|
|
|
|
import tractor
|
|
import trio
|
|
|
|
from ._util import (
|
|
log, # sub-sys logger
|
|
)
|
|
from ._mngr import (
|
|
get_service_mngr,
|
|
ServiceMngr,
|
|
)
|
|
from ._actor_runtime import maybe_open_pikerd
|
|
from ._registry import find_service
|
|
|
|
|
|
@acm
|
|
async def maybe_spawn_daemon(
|
|
service_name: str,
|
|
service_task_target: Callable,
|
|
spawn_args: dict[str, Any],
|
|
|
|
loglevel: str | None = None,
|
|
singleton: bool = False,
|
|
|
|
_locks = defaultdict(trio.Lock),
|
|
**pikerd_kwargs,
|
|
|
|
) -> tractor.Portal:
|
|
'''
|
|
If no ``service_name`` daemon-actor can be found,
|
|
spawn one in a local subactor and return a portal to it.
|
|
|
|
If this function is called from a non-pikerd actor, the
|
|
spawned service will persist as long as pikerd does or
|
|
it is requested to be cancelled.
|
|
|
|
This can be seen as a service starting api for remote-actor
|
|
clients.
|
|
|
|
'''
|
|
# serialize access to this section to avoid
|
|
# 2 or more tasks racing to create a daemon
|
|
lock = _locks[service_name]
|
|
await lock.acquire()
|
|
|
|
async with find_service(
|
|
service_name,
|
|
registry_addrs=[('127.0.0.1', 6116)],
|
|
) as portal:
|
|
if portal is not None:
|
|
lock.release()
|
|
yield portal
|
|
return
|
|
|
|
log.warning(
|
|
f"Couldn't find any existing {service_name}\n"
|
|
'Attempting to spawn new daemon-service..'
|
|
)
|
|
|
|
# ask root ``pikerd`` daemon to spawn the daemon we need if
|
|
# pikerd is not live we now become the root of the
|
|
# process tree
|
|
async with maybe_open_pikerd(
|
|
loglevel=loglevel,
|
|
**pikerd_kwargs,
|
|
|
|
) as pikerd_portal:
|
|
|
|
# we are the root and thus are `pikerd`
|
|
# so spawn the target service directly by calling
|
|
# the provided target routine.
|
|
# XXX: this assumes that the target is well formed and will
|
|
# do the right things to setup both a sub-actor **and** call
|
|
# the ``_Services`` api from above to start the top level
|
|
# service task for that actor.
|
|
started: bool
|
|
if pikerd_portal is None:
|
|
started = await service_task_target(
|
|
loglevel=loglevel,
|
|
**spawn_args,
|
|
)
|
|
|
|
else:
|
|
# request a remote `pikerd` (service manager) to start the
|
|
# target daemon-task, the target can't return
|
|
# a non-serializable value since it is expected that service
|
|
# starting is non-blocking and the target task will persist
|
|
# running "under" or "within" the `pikerd` actor tree after
|
|
# the questing client disconnects. in other words this
|
|
# spawns a persistent daemon actor that continues to live
|
|
# for the lifespan of whatever the service manager inside
|
|
# `pikerd` says it should.
|
|
started = await pikerd_portal.run(
|
|
service_task_target,
|
|
loglevel=loglevel,
|
|
**spawn_args,
|
|
)
|
|
|
|
if started:
|
|
log.info(f'Service {service_name} started!')
|
|
|
|
# block until we can discover (by IPC connection) to the newly
|
|
# spawned daemon-actor and then deliver the portal to the
|
|
# caller.
|
|
async with tractor.wait_for_actor(service_name) as portal:
|
|
lock.release()
|
|
yield portal
|
|
# --- ---- ---
|
|
# XXX NOTE XXX
|
|
# --- ---- ---
|
|
# DO NOT PUT A `portal.cancel_actor()` here (as was prior)!
|
|
#
|
|
# Doing so will cause an "out-of-band" ctxc
|
|
# (`tractor.ContextCancelled`) to be raised inside the
|
|
# `ServiceMngr.open_context_in_task()`'s call to
|
|
# `ctx.wait_for_result()` AND the internal self-ctxc
|
|
# "graceful capture" WILL NOT CATCH IT!
|
|
#
|
|
# This can cause certain types of operations to raise
|
|
# that ctxc BEFORE THEY `return`, resulting in
|
|
# a "false-negative" ctxc being raised when really
|
|
# nothing actually failed, other then our semantic
|
|
# "failure" to suppress an expected, graceful,
|
|
# self-cancel scenario..
|
|
#
|
|
# bUt wHy duZ It WorK lIKe dis..
|
|
# ------------------------------
|
|
# from the perspective of the `tractor.Context` this
|
|
# cancel request was conducted "out of band" since
|
|
# `Context.cancel()` was never called and thus the
|
|
# `._cancel_called: bool` was never set. Despite the
|
|
# remote `.canceller` being set to `pikerd` (i.e. the
|
|
# same `Actor.uid` of the raising service-mngr task) the
|
|
# service-task's ctx itself was never marked as having
|
|
# requested cancellation and thus still raises the ctxc
|
|
# bc it was unaware of any such request.
|
|
#
|
|
# How to make grokin these cases easier tho?
|
|
# ------------------------------------------
|
|
# Because `Portal.cancel_actor()` was called it requests
|
|
# "full-`Actor`-runtime-cancellation" of it's peer
|
|
# process which IS NOT THE SAME as a single inter-actor
|
|
# RPC task cancelling its local context with a remote
|
|
# peer `Task` in that same peer process.
|
|
#
|
|
# ?TODO? It might be better if we do one (or all) of the
|
|
# following:
|
|
#
|
|
# -[ ] at least set a special message for the
|
|
# `ContextCancelled` when raised locally by the
|
|
# unaware ctx task such that we check for the
|
|
# `.canceller` being *our `Actor`* and in the case
|
|
# where `Context._cancel_called == False` we specially
|
|
# note that this is likely an "out-of-band"
|
|
# runtime-cancel request triggered by some call to
|
|
# `Portal.cancel_actor()`, possibly even reporting the
|
|
# exact LOC of that caller by tracking it inside our
|
|
# portal-type?
|
|
# -[ ] possibly add another field `ContextCancelled` like
|
|
# maybe a,
|
|
# `.request_type: Literal['os', 'proc', 'actor',
|
|
# 'ctx']` type thing which would allow immediately
|
|
# being able to tell what kind of cancellation caused
|
|
# the unexpected ctxc?
|
|
# -[ ] REMOVE THIS COMMENT, once we've settled on how to
|
|
# better augment `tractor` to be more explicit on this!
|
|
|
|
|
|
async def spawn_emsd(
|
|
|
|
loglevel: str | None = None,
|
|
**extra_tractor_kwargs
|
|
|
|
) -> bool:
|
|
"""
|
|
Start the clearing engine under ``pikerd``.
|
|
|
|
"""
|
|
log.info('Spawning emsd')
|
|
|
|
smngr: ServiceMngr = get_service_mngr()
|
|
portal = await smngr.actor_n.start_actor(
|
|
'emsd',
|
|
enable_modules=[
|
|
'piker.clearing._ems',
|
|
'piker.clearing._client',
|
|
],
|
|
loglevel=loglevel,
|
|
debug_mode=smngr.debug_mode, # set by pikerd flag
|
|
**extra_tractor_kwargs
|
|
)
|
|
|
|
# non-blocking setup of clearing service
|
|
from ..clearing._ems import _setup_persistent_emsd
|
|
|
|
await smngr.start_service_task(
|
|
'emsd',
|
|
portal,
|
|
|
|
# signature of target root-task endpoint
|
|
_setup_persistent_emsd,
|
|
loglevel=loglevel,
|
|
)
|
|
return True
|
|
|
|
|
|
@acm
|
|
async def maybe_open_emsd(
|
|
|
|
brokername: str,
|
|
loglevel: str | None = None,
|
|
|
|
**pikerd_kwargs,
|
|
|
|
) -> tractor.Portal: # noqa
|
|
|
|
async with maybe_spawn_daemon(
|
|
'emsd',
|
|
service_task_target=spawn_emsd,
|
|
spawn_args={},
|
|
loglevel=loglevel,
|
|
**pikerd_kwargs,
|
|
|
|
) as portal:
|
|
yield portal
|