Ensure all sub-services cancel on `pikerd` exit
Previously we were relying on implicit actor termination in `maybe_spawn_daemon()` but really on `pikerd` teardown we should be sure to tear down not only all service tasks in each actor but also the actor runtimes. This adjusts `Services.cancel_service()` to only cancel the service task scope and wait on the `complete` event and reworks the `open_context_in_task()` inner closure body to, - always cancel the service actor at exit. - not call `.cancel_service()` (potentially causing recursion issues on cancellation). - allocate a `complete: trio.Event` to signal full task + actor termination. - pop the service task from the `.service_tasks` registry. Further, add a `maybe_set_global_registry_sockaddr()` helper-cm to do the work of checking whether a registry socket needs-to/has-been set and use it for discovery calls to the `pikerd` service tree.samplerd_service
parent
6a1bb13feb
commit
c8c641a038
190
piker/_daemon.py
190
piker/_daemon.py
|
@ -19,7 +19,10 @@ Structured, daemon tree service management.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
from typing import Optional, Union, Callable, Any
|
from typing import Optional, Union, Callable, Any
|
||||||
from contextlib import asynccontextmanager as acm
|
from contextlib import (
|
||||||
|
asynccontextmanager as acm,
|
||||||
|
contextmanager as cm,
|
||||||
|
)
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
import tractor
|
import tractor
|
||||||
|
@ -57,12 +60,21 @@ _root_modules = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: factor this into a ``tractor.highlevel`` extension
|
||||||
|
# pack for the library.
|
||||||
class Services:
|
class Services:
|
||||||
|
|
||||||
actor_n: tractor._supervise.ActorNursery
|
actor_n: tractor._supervise.ActorNursery
|
||||||
service_n: trio.Nursery
|
service_n: trio.Nursery
|
||||||
debug_mode: bool # tractor sub-actor debug mode flag
|
debug_mode: bool # tractor sub-actor debug mode flag
|
||||||
service_tasks: dict[str, tuple[trio.CancelScope, tractor.Portal]] = {}
|
service_tasks: dict[
|
||||||
|
str,
|
||||||
|
tuple[
|
||||||
|
trio.CancelScope,
|
||||||
|
tractor.Portal,
|
||||||
|
trio.Event,
|
||||||
|
]
|
||||||
|
] = {}
|
||||||
locks = defaultdict(trio.Lock)
|
locks = defaultdict(trio.Lock)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -84,7 +96,12 @@ class Services:
|
||||||
'''
|
'''
|
||||||
async def open_context_in_task(
|
async def open_context_in_task(
|
||||||
task_status: TaskStatus[
|
task_status: TaskStatus[
|
||||||
trio.CancelScope] = trio.TASK_STATUS_IGNORED,
|
tuple[
|
||||||
|
trio.CancelScope,
|
||||||
|
trio.Event,
|
||||||
|
Any,
|
||||||
|
]
|
||||||
|
] = trio.TASK_STATUS_IGNORED,
|
||||||
|
|
||||||
) -> Any:
|
) -> Any:
|
||||||
|
|
||||||
|
@ -96,28 +113,33 @@ class Services:
|
||||||
) as (ctx, first):
|
) as (ctx, first):
|
||||||
|
|
||||||
# unblock once the remote context has started
|
# unblock once the remote context has started
|
||||||
task_status.started((cs, first))
|
complete = trio.Event()
|
||||||
|
task_status.started((cs, complete, first))
|
||||||
log.info(
|
log.info(
|
||||||
f'`pikerd` service {name} started with value {first}'
|
f'`pikerd` service {name} started with value {first}'
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
# wait on any context's return value
|
# wait on any context's return value
|
||||||
|
# and any final portal result from the
|
||||||
|
# sub-actor.
|
||||||
ctx_res = await ctx.result()
|
ctx_res = await ctx.result()
|
||||||
except tractor.ContextCancelled:
|
|
||||||
return await self.cancel_service(name)
|
# NOTE: blocks indefinitely until cancelled
|
||||||
else:
|
# either by error from the target context
|
||||||
# wait on any error from the sub-actor
|
# function or by being cancelled here by the
|
||||||
# NOTE: this will block indefinitely until
|
# surrounding cancel scope.
|
||||||
# cancelled either by error from the target
|
|
||||||
# context function or by being cancelled here by
|
|
||||||
# the surrounding cancel scope
|
|
||||||
return (await portal.result(), ctx_res)
|
return (await portal.result(), ctx_res)
|
||||||
|
|
||||||
cs, first = await self.service_n.start(open_context_in_task)
|
finally:
|
||||||
|
await portal.cancel_actor()
|
||||||
|
complete.set()
|
||||||
|
self.service_tasks.pop(name)
|
||||||
|
|
||||||
|
cs, complete, first = await self.service_n.start(open_context_in_task)
|
||||||
|
|
||||||
# store the cancel scope and portal for later cancellation or
|
# store the cancel scope and portal for later cancellation or
|
||||||
# retstart if needed.
|
# retstart if needed.
|
||||||
self.service_tasks[name] = (cs, portal)
|
self.service_tasks[name] = (cs, portal, complete)
|
||||||
|
|
||||||
return cs, first
|
return cs, first
|
||||||
|
|
||||||
|
@ -127,13 +149,38 @@ class Services:
|
||||||
name: str,
|
name: str,
|
||||||
|
|
||||||
) -> Any:
|
) -> Any:
|
||||||
|
'''
|
||||||
|
Cancel the service task and actor for the given ``name``.
|
||||||
|
|
||||||
|
'''
|
||||||
log.info(f'Cancelling `pikerd` service {name}')
|
log.info(f'Cancelling `pikerd` service {name}')
|
||||||
cs, portal = self.service_tasks[name]
|
cs, portal, complete = self.service_tasks[name]
|
||||||
# XXX: not entirely sure why this is required,
|
|
||||||
# and should probably be better fine tuned in
|
|
||||||
# ``tractor``?
|
|
||||||
cs.cancel()
|
cs.cancel()
|
||||||
return await portal.cancel_actor()
|
await complete.wait()
|
||||||
|
assert name not in self.service_tasks, \
|
||||||
|
f'Serice task for {name} not terminated?'
|
||||||
|
|
||||||
|
|
||||||
|
@cm
|
||||||
|
def maybe_set_global_registry_sockaddr(
|
||||||
|
registry_addr: None | tuple[str, int] = None,
|
||||||
|
) -> None:
|
||||||
|
|
||||||
|
global _registry_addr
|
||||||
|
was_set: bool = False
|
||||||
|
if (
|
||||||
|
_registry_addr is None
|
||||||
|
or registry_addr
|
||||||
|
):
|
||||||
|
_registry_addr = registry_addr or _default_reg_addr
|
||||||
|
try:
|
||||||
|
yield _registry_addr
|
||||||
|
finally:
|
||||||
|
# XXX: always clear the global addr if we set it so that the
|
||||||
|
# next (set of) calls will apply whatever new one is passed
|
||||||
|
# in.
|
||||||
|
if was_set:
|
||||||
|
_registry_addr = None
|
||||||
|
|
||||||
|
|
||||||
@acm
|
@acm
|
||||||
|
@ -155,40 +202,38 @@ async def open_pikerd(
|
||||||
alive underling services (see below).
|
alive underling services (see below).
|
||||||
|
|
||||||
'''
|
'''
|
||||||
global _registry_addr
|
|
||||||
|
|
||||||
if (
|
with maybe_set_global_registry_sockaddr(registry_addr) as reg_addr:
|
||||||
_registry_addr is None
|
async with (
|
||||||
or registry_addr
|
tractor.open_root_actor(
|
||||||
):
|
|
||||||
_registry_addr = registry_addr or _default_reg_addr
|
|
||||||
|
|
||||||
# XXX: this may open a root actor as well
|
# passed through to ``open_root_actor``
|
||||||
async with (
|
arbiter_addr=reg_addr,
|
||||||
tractor.open_root_actor(
|
name=_root_dname,
|
||||||
|
loglevel=loglevel,
|
||||||
|
debug_mode=debug_mode,
|
||||||
|
start_method=start_method,
|
||||||
|
|
||||||
# passed through to ``open_root_actor``
|
# TODO: eventually we should be able to avoid
|
||||||
arbiter_addr=_registry_addr,
|
# having the root have more then permissions to
|
||||||
name=_root_dname,
|
# spawn other specialized daemons I think?
|
||||||
loglevel=loglevel,
|
enable_modules=_root_modules,
|
||||||
debug_mode=debug_mode,
|
) as _,
|
||||||
start_method=start_method,
|
|
||||||
|
|
||||||
# TODO: eventually we should be able to avoid
|
tractor.open_nursery() as actor_nursery,
|
||||||
# having the root have more then permissions to
|
):
|
||||||
# spawn other specialized daemons I think?
|
async with trio.open_nursery() as service_nursery:
|
||||||
enable_modules=_root_modules,
|
|
||||||
) as _,
|
|
||||||
|
|
||||||
tractor.open_nursery() as actor_nursery,
|
# assign globally for future daemon/task creation
|
||||||
):
|
Services.actor_n = actor_nursery
|
||||||
async with trio.open_nursery() as service_nursery:
|
Services.service_n = service_nursery
|
||||||
|
Services.debug_mode = debug_mode
|
||||||
# assign globally for future daemon/task creation
|
try:
|
||||||
Services.actor_n = actor_nursery
|
yield
|
||||||
Services.service_n = service_nursery
|
finally:
|
||||||
Services.debug_mode = debug_mode
|
# if 'samplerd' in Services.service_tasks:
|
||||||
yield
|
# await Services.cancel_service('samplerd')
|
||||||
|
service_nursery.cancel_scope.cancel()
|
||||||
|
|
||||||
|
|
||||||
@acm
|
@acm
|
||||||
|
@ -209,32 +254,24 @@ async def open_piker_runtime(
|
||||||
existing piker actors on the local link based on configuration.
|
existing piker actors on the local link based on configuration.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
global _registry_addr
|
with maybe_set_global_registry_sockaddr(registry_addr) as reg_addr:
|
||||||
|
async with (
|
||||||
|
tractor.open_root_actor(
|
||||||
|
|
||||||
if (
|
# passed through to ``open_root_actor``
|
||||||
_registry_addr is None
|
arbiter_addr=reg_addr,
|
||||||
or registry_addr
|
name=name,
|
||||||
):
|
loglevel=loglevel,
|
||||||
_registry_addr = registry_addr or _default_reg_addr
|
debug_mode=debug_mode,
|
||||||
|
start_method=start_method,
|
||||||
|
|
||||||
# XXX: this may open a root actor as well
|
# TODO: eventually we should be able to avoid
|
||||||
async with (
|
# having the root have more then permissions to
|
||||||
tractor.open_root_actor(
|
# spawn other specialized daemons I think?
|
||||||
|
enable_modules=_root_modules + enable_modules,
|
||||||
# passed through to ``open_root_actor``
|
) as _,
|
||||||
arbiter_addr=_registry_addr,
|
):
|
||||||
name=name,
|
yield tractor.current_actor()
|
||||||
loglevel=loglevel,
|
|
||||||
debug_mode=debug_mode,
|
|
||||||
start_method=start_method,
|
|
||||||
|
|
||||||
# TODO: eventually we should be able to avoid
|
|
||||||
# having the root have more then permissions to
|
|
||||||
# spawn other specialized daemons I think?
|
|
||||||
enable_modules=_root_modules + enable_modules,
|
|
||||||
) as _,
|
|
||||||
):
|
|
||||||
yield tractor.current_actor()
|
|
||||||
|
|
||||||
|
|
||||||
@acm
|
@acm
|
||||||
|
@ -325,6 +362,11 @@ async def find_service(
|
||||||
service_name: str,
|
service_name: str,
|
||||||
) -> Optional[tractor.Portal]:
|
) -> Optional[tractor.Portal]:
|
||||||
|
|
||||||
|
global _registry_addr
|
||||||
|
if not _registry_addr:
|
||||||
|
yield None
|
||||||
|
return
|
||||||
|
|
||||||
log.info(f'Scanning for service `{service_name}`')
|
log.info(f'Scanning for service `{service_name}`')
|
||||||
# attach to existing daemon by name if possible
|
# attach to existing daemon by name if possible
|
||||||
async with tractor.find_actor(
|
async with tractor.find_actor(
|
||||||
|
@ -342,6 +384,10 @@ async def check_for_service(
|
||||||
Service daemon "liveness" predicate.
|
Service daemon "liveness" predicate.
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
global _registry_addr
|
||||||
|
if not _registry_addr:
|
||||||
|
return None
|
||||||
|
|
||||||
async with tractor.query_actor(
|
async with tractor.query_actor(
|
||||||
service_name,
|
service_name,
|
||||||
arbiter_sockaddr=_registry_addr,
|
arbiter_sockaddr=_registry_addr,
|
||||||
|
|
Loading…
Reference in New Issue