# piker: trading gear for hackers # Copyright (C) Tyler Goodlet (in stewardship for pikers) # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . """ Daemon-actor spawning "endpoint-hooks". """ from __future__ import annotations from typing import ( Callable, Any, ) from contextlib import ( asynccontextmanager as acm, ) from collections import defaultdict import tractor import trio from ._util import ( log, # sub-sys logger ) from ._mngr import ( get_service_mngr, ServiceMngr, ) from ._actor_runtime import maybe_open_pikerd from ._registry import find_service @acm async def maybe_spawn_daemon( service_name: str, service_task_target: Callable, spawn_args: dict[str, Any], loglevel: str | None = None, singleton: bool = False, _locks = defaultdict(trio.Lock), **pikerd_kwargs, ) -> tractor.Portal: ''' If no ``service_name`` daemon-actor can be found, spawn one in a local subactor and return a portal to it. If this function is called from a non-pikerd actor, the spawned service will persist as long as pikerd does or it is requested to be cancelled. This can be seen as a service starting api for remote-actor clients. ''' # serialize access to this section to avoid # 2 or more tasks racing to create a daemon lock = _locks[service_name] await lock.acquire() async with find_service( service_name, registry_addrs=[('127.0.0.1', 6116)], ) as portal: if portal is not None: lock.release() yield portal return log.warning( f"Couldn't find any existing {service_name}\n" 'Attempting to spawn new daemon-service..' ) # ask root ``pikerd`` daemon to spawn the daemon we need if # pikerd is not live we now become the root of the # process tree async with maybe_open_pikerd( loglevel=loglevel, **pikerd_kwargs, ) as pikerd_portal: # we are the root and thus are `pikerd` # so spawn the target service directly by calling # the provided target routine. # XXX: this assumes that the target is well formed and will # do the right things to setup both a sub-actor **and** call # the ``_Services`` api from above to start the top level # service task for that actor. started: bool if pikerd_portal is None: started = await service_task_target( loglevel=loglevel, **spawn_args, ) else: # request a remote `pikerd` (service manager) to start the # target daemon-task, the target can't return # a non-serializable value since it is expected that service # starting is non-blocking and the target task will persist # running "under" or "within" the `pikerd` actor tree after # the questing client disconnects. in other words this # spawns a persistent daemon actor that continues to live # for the lifespan of whatever the service manager inside # `pikerd` says it should. started = await pikerd_portal.run( service_task_target, loglevel=loglevel, **spawn_args, ) if started: log.info(f'Service {service_name} started!') # block until we can discover (by IPC connection) to the newly # spawned daemon-actor and then deliver the portal to the # caller. async with tractor.wait_for_actor(service_name) as portal: lock.release() yield portal # --- ---- --- # XXX NOTE XXX # --- ---- --- # DO NOT PUT A `portal.cancel_actor()` here (as was prior)! # # Doing so will cause an "out-of-band" ctxc # (`tractor.ContextCancelled`) to be raised inside the # `ServiceMngr.open_context_in_task()`'s call to # `ctx.wait_for_result()` AND the internal self-ctxc # "graceful capture" WILL NOT CATCH IT! # # This can cause certain types of operations to raise # that ctxc BEFORE THEY `return`, resulting in # a "false-negative" ctxc being raised when really # nothing actually failed, other then our semantic # "failure" to suppress an expected, graceful, # self-cancel scenario.. # # bUt wHy duZ It WorK lIKe dis.. # ------------------------------ # from the perspective of the `tractor.Context` this # cancel request was conducted "out of band" since # `Context.cancel()` was never called and thus the # `._cancel_called: bool` was never set. Despite the # remote `.canceller` being set to `pikerd` (i.e. the # same `Actor.uid` of the raising service-mngr task) the # service-task's ctx itself was never marked as having # requested cancellation and thus still raises the ctxc # bc it was unaware of any such request. # # How to make grokin these cases easier tho? # ------------------------------------------ # Because `Portal.cancel_actor()` was called it requests # "full-`Actor`-runtime-cancellation" of it's peer # process which IS NOT THE SAME as a single inter-actor # RPC task cancelling its local context with a remote # peer `Task` in that same peer process. # # ?TODO? It might be better if we do one (or all) of the # following: # # -[ ] at least set a special message for the # `ContextCancelled` when raised locally by the # unaware ctx task such that we check for the # `.canceller` being *our `Actor`* and in the case # where `Context._cancel_called == False` we specially # note that this is likely an "out-of-band" # runtime-cancel request triggered by some call to # `Portal.cancel_actor()`, possibly even reporting the # exact LOC of that caller by tracking it inside our # portal-type? # -[ ] possibly add another field `ContextCancelled` like # maybe a, # `.request_type: Literal['os', 'proc', 'actor', # 'ctx']` type thing which would allow immediately # being able to tell what kind of cancellation caused # the unexpected ctxc? # -[ ] REMOVE THIS COMMENT, once we've settled on how to # better augment `tractor` to be more explicit on this! async def spawn_emsd( loglevel: str | None = None, **extra_tractor_kwargs ) -> bool: """ Start the clearing engine under ``pikerd``. """ log.info('Spawning emsd') smngr: ServiceMngr = get_service_mngr() portal = await smngr.actor_n.start_actor( 'emsd', enable_modules=[ 'piker.clearing._ems', 'piker.clearing._client', ], loglevel=loglevel, debug_mode=smngr.debug_mode, # set by pikerd flag **extra_tractor_kwargs ) # non-blocking setup of clearing service from ..clearing._ems import _setup_persistent_emsd await smngr.start_service_task( 'emsd', portal, # signature of target root-task endpoint _setup_persistent_emsd, loglevel=loglevel, ) return True @acm async def maybe_open_emsd( brokername: str, loglevel: str | None = None, **pikerd_kwargs, ) -> tractor.Portal: # noqa async with maybe_spawn_daemon( 'emsd', service_task_target=spawn_emsd, spawn_args={}, loglevel=loglevel, **pikerd_kwargs, ) as portal: yield portal