More service-mngr clarity notes

Nothing changing functionally here just adding more `tractor` operational notes, tips for debug tooling and typing fixes B) Of particular note is adding further details about the reason we do not need to call `Context.cancel()` inside the `finally:` block of `.open_context_in_task()` thanks to `tractor`'s new and improved inter-actor cancellation semantics Bo
2024-08-21 13:26:05 -04:00 · 2024-08-21 13:26:05 -04:00 · 4ce161e443
parent 4f34797780
commit 4ce161e443
1 changed files with 52 additions and 11 deletions
--- a/piker/service/_mngr.py
+++ b/piker/service/_mngr.py
@ -144,6 +144,9 @@ async def open_service_mngr(
            # TODO: is this more clever/efficient?
            # if 'samplerd' in mngr.service_tasks:
            #     await mngr.cancel_service('samplerd')
+
+            # await tractor.pause(shield=True)
+            # ^XXX, if needed mk sure to shield it ;)
            tn.cancel_scope.cancel()


@ -241,7 +244,11 @@ class ServiceMngr:
                ]
            ] = trio.TASK_STATUS_IGNORED,

-        ) -> Any:
+        ) -> tuple[
+            trio.CancelScope,
+            Context,
+            Any,  # started value from ctx
+        ]:

            # TODO: use the ctx._scope directly here instead?
            # -[ ] actually what semantics do we expect for this
@ -251,6 +258,10 @@ class ServiceMngr:
                    async with portal.open_context(
                        target,
                        allow_overruns=allow_overruns,
+
+                        # hide_tb=False,
+                        # ^XXX^ HAWT TIPZ
+
                        **ctx_kwargs,

                    ) as (ctx, started):
@ -269,7 +280,9 @@ class ServiceMngr:
                        # wait on any context's return value
                        # and any final portal result from the
                        # sub-actor.
-                        ctx_res: Any = await ctx.wait_for_result()
+                        ctx_res: Any = await ctx.wait_for_result(
+                            # hide_tb=False,
+                        )

                        # NOTE: blocks indefinitely until cancelled
                        # either by error from the target context
@ -304,25 +317,53 @@ class ServiceMngr:
                finally:
                    # NOTE: the ctx MUST be cancelled first if we
                    # don't want the above `ctx.wait_for_result()` to
-                    # raise a self-ctxc. WHY, well since from the ctx's
+                    # raise a self-ctxc.
+                    #
+                    # WHY, well since from the ctx's
                    # perspective the cancel request will have
                    # arrived out-out-of-band at the `Actor.cancel()`
-                    # level, thus `Context.cancel_called == False`,
+                    # level (since pikerd will have called
+                    # `Portal.cancel_actor()`), and thus
+                    # `Context.cancel_called == False`,
                    # meaning `ctx._is_self_cancelled() == False`.
-                    # with trio.CancelScope(shield=True):
-                    # await ctx.cancel()
+                    #
+                    # HOWEVER, this should happen implicitly WITHOUT
+                    # a manual `ctx.cancel()` call HERE since,
+                    #
+                    # - in the mngr shutdown case the surrounding
+                    #  `.service_n.cancel_scope` should be
+                    #  `.cancel_called == True` and the
+                    #  `Portal.open_context()` internals should take
+                    #  care of it.
+                    #
+                    # - in the specific-service cancellation case,
+                    #   `.cancel_service()` makes the manual
+                    #   `ctx.cancel()` call for us which SHOULD mean
+                    #   the ctxc is never raised above (since, again,
+                    #   it will be gracefully suppressed by
+                    #   `.open_context()` internals) and thus we only
+                    #   need to shut down the service actor.
                    await portal.cancel_actor()
-                    complete.set()
                    self.service_tasks.pop(name)
+                    complete.set()

-        cs, sub_ctx, complete, started = await self.service_n.start(
+        (
+            cs,  # internally allocated
+            sub_ctx,  # RPC peer-actor ctx
+            complete,  # termination syncing
+            started,  # proxyed from internal `.open_context()` entry.
+        ) = await self.service_n.start(
            open_context_in_task
        )

        # store the cancel scope and portal for later cancellation or
        # retstart if needed.
        self.service_tasks[name] = (cs, sub_ctx, portal, complete)
-        return cs, sub_ctx, started
+        return (
+            cs,
+            sub_ctx,
+            started,
+        )

    async def cancel_service(
        self,
@ -341,11 +382,11 @@ class ServiceMngr:
        await complete.wait()

        if name in self.service_tasks:
-            # TODO: custom err?
-            # raise ServiceError(
            raise RuntimeError(
                f'Serice task for {name} not terminated?'
            )
+            # raise ServiceError(
+            # ^TODO? custom err type?

        # assert name not in self.service_tasks, \
        #     f'Serice task for {name} not terminated?'