Move some infos to runtime level

Add PDB level and make runtime below info but above debug
WIP attempt to relay context error via raised-in-scope-nursery task
2021-06-26 23:40:21 -04:00 · 2021-06-26 23:40:21 -04:00 · 2021-06-24 19:56:05 -04:00 · 2021-06-24 18:49:51 -04:00 · 2021-06-15 17:54:42 -04:00 · 2021-06-14 16:34:44 -04:00
8 changed files with 387 additions and 207 deletions
--- a/tractor/_actor.py
+++ b/tractor/_actor.py
@ -12,7 +12,7 @@ import uuid
 import typing
 from typing import Dict, List, Tuple, Any, Optional, Union
 from types import ModuleType
-import signal
+# import signal
 import sys
 import os
 from contextlib import ExitStack
@ -31,6 +31,7 @@ from ._exceptions import (
    ModuleNotExposed,
    is_multi_cancelled,
    ContextCancelled,
+    TransportClosed,
 )
 from . import _debug
 from ._discovery import get_arbiter
@ -47,6 +48,7 @@ class ActorFailure(Exception):


 async def _invoke(
+
    actor: 'Actor',
    cid: str,
    chan: Channel,
@ -59,10 +61,12 @@ async def _invoke(
    """Invoke local func and deliver result(s) over provided channel.
    """
    treat_as_gen = False
-    cs = None
+
    cancel_scope = trio.CancelScope()
-    ctx = Context(chan, cid, _cancel_scope=cancel_scope)
-    context = False
+    cs: trio.CancelScope = None
+
+    ctx = Context(chan, cid)
+    context: bool = False

    if getattr(func, '_tractor_stream_function', False):
        # handle decorated ``@tractor.stream`` async functions
@ -150,14 +154,22 @@ async def _invoke(
            # context func with support for bi-dir streaming
            await chan.send({'functype': 'context', 'cid': cid})

-            with cancel_scope as cs:
+            async with trio.open_nursery() as scope_nursery:
+                ctx._scope_nursery = scope_nursery
+                cs = scope_nursery.cancel_scope
                task_status.started(cs)
                await chan.send({'return': await coro, 'cid': cid})

            if cs.cancelled_caught:
+                if ctx._cancel_called:
+                    msg = f'{func.__name__} cancelled itself',
+
+                else:
+                    msg = f'{func.__name__} was remotely cancelled',
+
                # task-contex was cancelled so relay to the cancel to caller
                raise ContextCancelled(
-                    f'{func.__name__} cancelled itself',
+                    msg,
                    suberror_type=trio.Cancelled,
                )

@ -170,20 +182,29 @@ async def _invoke(

    except (Exception, trio.MultiError) as err:

-        # TODO: maybe we'll want differnet "levels" of debugging
-        # eventualy such as ('app', 'supervisory', 'runtime') ?
-        if not isinstance(err, trio.ClosedResourceError) and (
-            not is_multi_cancelled(err)) and (
-            not isinstance(err, ContextCancelled)
-        ):
-            # XXX: is there any case where we'll want to debug IPC
-            # disconnects? I can't think of a reason that inspecting
-            # this type of failure will be useful for respawns or
-            # recovery logic - the only case is some kind of strange bug
-            # in `trio` itself?
-            entered = await _debug._maybe_enter_pm(err)
-            if not entered:
-                log.exception("Actor crashed:")
+        if not is_multi_cancelled(err):
+
+            log.exception("Actor crashed:")
+
+            # TODO: maybe we'll want different "levels" of debugging
+            # eventualy such as ('app', 'supervisory', 'runtime') ?
+
+            # if not isinstance(err, trio.ClosedResourceError) and (
+            # if not is_multi_cancelled(err) and (
+
+            if not isinstance(err, ContextCancelled) or (
+                isinstance(err, ContextCancelled) and ctx._cancel_called
+            ):
+                # XXX: is there any case where we'll want to debug IPC
+                # disconnects? I can't think of a reason that inspecting
+                # this type of failure will be useful for respawns or
+                # recovery logic - the only case is some kind of strange bug
+                # in `trio` itself?
+                await _debug._maybe_enter_pm(err)
+
+                # entered = await _debug._maybe_enter_pm(err)
+                # if not entered:
+                #     log.exception("Actor crashed:")

        # always ship errors back to caller
        err_msg = pack_error(err)
@ -192,8 +213,10 @@ async def _invoke(
            await chan.send(err_msg)

        except trio.ClosedResourceError:
-            log.warning(
-                f"Failed to ship error to caller @ {chan.uid}")
+            # if we can't propagate the error that's a big boo boo
+            log.error(
+                f"Failed to ship error to caller @ {chan.uid} !?"
+            )

        if cs is None:
            # error is from above code not from rpc invocation
@ -378,19 +401,34 @@ class Actor:
            raise mne

    async def _stream_handler(
+
        self,
        stream: trio.SocketStream,
+
    ) -> None:
        """Entry point for new inbound connections to the channel server.
+
        """
        self._no_more_peers = trio.Event()  # unset
+
        chan = Channel(stream=stream)
        log.info(f"New connection to us {chan}")

        # send/receive initial handshake response
        try:
            uid = await self._do_handshake(chan)
-        except StopAsyncIteration:
+
+        except (
+            trio.BrokenResourceError,
+            trio.ClosedResourceError,
+            TransportClosed,
+        ):
+            # XXX: This may propagate up from ``Channel._aiter_recv()``
+            # and ``MsgpackStream._inter_packets()`` on a read from the
+            # stream particularly when the runtime is first starting up
+            # inside ``open_root_actor()`` where there is a check for
+            # a bound listener on the "arbiter" addr.  the reset will be
+            # because the handshake was never meant took place.
            log.warning(f"Channel {chan} failed to handshake")
            return

@ -418,10 +456,24 @@ class Actor:
        try:
            await self._process_messages(chan)
        finally:
+
+            # channel cleanup sequence
+
+            # for (channel, cid) in self._rpc_tasks.copy():
+            #     if channel is chan:
+            #         with trio.CancelScope(shield=True):
+            #             await self._cancel_task(cid, channel)
+
+            #             # close all consumer side task mem chans
+            #             send_chan, _ = self._cids2qs[(chan.uid, cid)]
+            #             assert send_chan.cid == cid  # type: ignore
+            #             await send_chan.aclose()
+
            # Drop ref to channel so it can be gc-ed and disconnected
            log.debug(f"Releasing channel {chan} from {chan.uid}")
            chans = self._peers.get(chan.uid)
            chans.remove(chan)
+
            if not chans:
                log.debug(f"No more channels for {chan.uid}")
                self._peers.pop(chan.uid, None)
@ -434,14 +486,22 @@ class Actor:

            # # XXX: is this necessary (GC should do it?)
            if chan.connected():
+                # if the channel is still connected it may mean the far
+                # end has not closed and we may have gotten here due to
+                # an error and so we should at least try to terminate
+                # the channel from this end gracefully.
+
                log.debug(f"Disconnecting channel {chan}")
                try:
-                    # send our msg loop terminate sentinel
+                    # send a msg loop terminate sentinel
                    await chan.send(None)
+
+                    # XXX: do we want this?
+                    # causes "[104] connection reset by peer" on other end
                    # await chan.aclose()
+
                except trio.BrokenResourceError:
-                    log.exception(
-                        f"Channel for {chan.uid} was already zonked..")
+                    log.warning(f"Channel for {chan.uid} was already closed")

    async def _push_result(
        self,
@ -451,18 +511,22 @@ class Actor:
    ) -> None:
        """Push an RPC result to the local consumer's queue.
        """
-        actorid = chan.uid
-        assert actorid, f"`actorid` can't be {actorid}"
-        send_chan, recv_chan = self._cids2qs[(actorid, cid)]
+        # actorid = chan.uid
+        assert chan.uid, f"`chan.uid` can't be {chan.uid}"
+        send_chan, recv_chan = self._cids2qs[(chan.uid, cid)]
        assert send_chan.cid == cid  # type: ignore

-        # if 'stop' in msg:
+        if 'error' in msg:
+            ctx = getattr(recv_chan, '_ctx', None)
+            # if ctx:
+            #     ctx._error_from_remote_msg(msg)
+
        #     log.debug(f"{send_chan} was terminated at remote end")
        #     # indicate to consumer that far end has stopped
        #     return await send_chan.aclose()

        try:
-            log.debug(f"Delivering {msg} from {actorid} to caller {cid}")
+            log.debug(f"Delivering {msg} from {chan.uid} to caller {cid}")
            # maintain backpressure
            await send_chan.send(msg)

@ -481,12 +545,14 @@ class Actor:
        self,
        actorid: Tuple[str, str],
        cid: str
+
    ) -> Tuple[trio.abc.SendChannel, trio.abc.ReceiveChannel]:
+
        log.debug(f"Getting result queue for {actorid} cid {cid}")
        try:
            send_chan, recv_chan = self._cids2qs[(actorid, cid)]
        except KeyError:
-            send_chan, recv_chan = trio.open_memory_channel(1000)
+            send_chan, recv_chan = trio.open_memory_channel(2*10)
            send_chan.cid = cid  # type: ignore
            recv_chan.cid = cid  # type: ignore
            self._cids2qs[(actorid, cid)] = send_chan, recv_chan
@ -544,9 +610,15 @@ class Actor:
                            if channel is chan:
                                await self._cancel_task(cid, channel)

+                                # close all consumer side task mem chans
+                                # send_chan, _ = self._cids2qs[(chan.uid, cid)]
+                                # assert send_chan.cid == cid  # type: ignore
+                                # await send_chan.aclose()
+
                        log.debug(
                                f"Msg loop signalled to terminate for"
                                f" {chan} from {chan.uid}")
+
                        break

                    log.trace(   # type: ignore
@ -641,22 +713,36 @@ class Actor:
                    )
                    await self.cancel_rpc_tasks(chan)

-        except trio.ClosedResourceError:
-            log.error(f"{chan} form {chan.uid} broke")
+        except (
+            TransportClosed,
+            trio.BrokenResourceError,
+            trio.ClosedResourceError
+        ):
+            # channels "breaking" is ok since we don't have a teardown
+            # handshake for them (yet) and instead we simply bail out
+            # of the message loop and expect the teardown sequence
+            # to clean up.
+            log.error(f"{chan} form {chan.uid} closed abruptly")
+            # raise
+
        except (Exception, trio.MultiError) as err:
            # ship any "internal" exception (i.e. one from internal machinery
            # not from an rpc task) to parent
            log.exception("Actor errored:")
            if self._parent_chan:
                await self._parent_chan.send(pack_error(err))
-            raise
+
            # if this is the `MainProcess` we expect the error broadcasting
            # above to trigger an error at consuming portal "checkpoints"
+            raise
+
        except trio.Cancelled:
            # debugging only
            log.debug(f"Msg loop was cancelled for {chan}")
            raise
+
        finally:
+            # msg debugging for when he machinery is brokey
            log.debug(
                f"Exiting msg loop for {chan} from {chan.uid} "
                f"with last msg:\n{msg}")
@ -709,8 +795,8 @@ class Actor:
                # Disable sigint handling in children if NOT running in
                # debug mode; we shouldn't need it thanks to our
                # cancellation machinery.
-                if 'debug_mode' not in rvs:
-                    signal.signal(signal.SIGINT, signal.SIG_IGN)
+                # if 'debug_mode' not in rvs:
+                #     signal.signal(signal.SIGINT, signal.SIG_IGN)

            return chan, accept_addr

@ -1089,6 +1175,7 @@ class Actor:
        parlance.
        """
        await chan.send(self.uid)
+        # breakpoint()
        uid: Tuple[str, str] = tuple(await chan.recv())

        # if not isinstance(uid, tuple):
--- a/tractor/_exceptions.py
+++ b/tractor/_exceptions.py
@ -41,6 +41,10 @@ class ContextCancelled(RemoteActorError):
    "Inter-actor task context cancelled itself on the callee side."


+class TransportClosed(trio.ClosedResourceError):
+    "Underlying channel transport was closed prior to use"
+
+
 class NoResult(RuntimeError):
    "No final result is expected for this actor"

@ -66,12 +70,15 @@ def pack_error(exc: BaseException) -> Dict[str, Any]:


 def unpack_error(
+
    msg: Dict[str, Any],
    chan=None,
    err_type=RemoteActorError
+
 ) -> Exception:
    """Unpack an 'error' message from the wire
    into a local ``RemoteActorError``.
+
    """
    error = msg['error']

--- a/tractor/_ipc.py
+++ b/tractor/_ipc.py
@ -2,6 +2,7 @@
 Inter-process comms abstractions
 """
 from functools import partial
+import math
 import struct
 import typing
 from typing import Any, Tuple, Optional
@ -13,6 +14,7 @@ import trio
 from async_generator import asynccontextmanager

 from .log import get_logger
+from ._exceptions import TransportClosed
 log = get_logger(__name__)

 # :eyeroll:
@ -24,7 +26,7 @@ except ImportError:
    Unpacker = partial(msgpack.Unpacker, strict_map_key=False)


-class MsgpackStream:
+class MsgpackTCPStream:
    '''A ``trio.SocketStream`` delivering ``msgpack`` formatted data
    using ``msgpack-python``.

@ -47,7 +49,10 @@ class MsgpackStream:
        assert isinstance(rsockname, tuple)
        self._raddr = rsockname[:2]

+        # start and seed first entry to read loop
        self._agen = self._iter_packets()
+        # self._agen.asend(None) is None
+
        self._send_lock = trio.StrictFIFOLock()

    async def _iter_packets(self) -> typing.AsyncGenerator[dict, None]:
@ -58,16 +63,13 @@ class MsgpackStream:
            use_list=False,
        )
        while True:
-            try:
-                data = await self.stream.receive_some(2**10)
-                log.trace(f"received {data}")  # type: ignore
-            except trio.BrokenResourceError:
-                log.warning(f"Stream connection {self.raddr} broke")
-                return
+            data = await self.stream.receive_some(2**10)
+            log.trace(f"received {data}")  # type: ignore

            if data == b'':
-                log.debug(f"Stream connection {self.raddr} was closed")
-                return
+                raise TransportClosed(
+                    f'transport {self} was already closed prior ro read'
+                )

            unpacker.feed(data)
            for packet in unpacker:
@ -98,7 +100,7 @@ class MsgpackStream:
        return self.stream.socket.fileno() != -1


-class MsgspecStream(MsgpackStream):
+class MsgspecTCPStream(MsgpackTCPStream):
    '''A ``trio.SocketStream`` delivering ``msgpack`` formatted data
    using ``msgspec``.

@ -123,24 +125,22 @@ class MsgspecStream(MsgpackStream):
        while True:
            try:
                header = await self.recv_stream.receive_exactly(4)
-                if header is None:
-                    continue

-                if header == b'':
-                    log.debug(f"Stream connection {self.raddr} was closed")
-                    return
+            except (ValueError):
+                raise TransportClosed(
+                    f'transport {self} was already closed prior ro read'
+                )

-                size, = struct.unpack("<I", header)
+            if header == b'':
+                raise TransportClosed(
+                    f'transport {self} was already closed prior ro read'
+                )

-                log.trace(f'received header {size}')
+            size, = struct.unpack("<I", header)

-                msg_bytes = await self.recv_stream.receive_exactly(size)
+            log.trace(f'received header {size}')

-            # the value error here is to catch a connect with immediate
-            # disconnect that will cause an EOF error inside `tricycle`.
-            except (ValueError, trio.BrokenResourceError):
-                log.warning(f"Stream connection {self.raddr} broke")
-                return
+            msg_bytes = await self.recv_stream.receive_exactly(size)

            log.trace(f"received {msg_bytes}")  # type: ignore
            yield decoder.decode(msg_bytes)
@ -169,8 +169,9 @@ class Channel:
        on_reconnect: typing.Callable[..., typing.Awaitable] = None,
        auto_reconnect: bool = False,
        stream: trio.SocketStream = None,  # expected to be active
-        # stream_serializer: type = MsgpackStream,
-        stream_serializer_type: type = MsgspecStream,
+
+        # stream_serializer_type: type = MsgspecTCPStream,
+        stream_serializer_type: type = MsgpackTCPStream,

    ) -> None:

@ -192,6 +193,8 @@ class Channel:
        self._exc: Optional[Exception] = None
        self._agen = self._aiter_recv()

+        self._closed: bool = False
+
    def __repr__(self) -> str:
        if self.msgstream:
            return repr(
@ -208,35 +211,52 @@ class Channel:
        return self.msgstream.raddr if self.msgstream else None

    async def connect(
-        self, destaddr: Tuple[Any, ...] = None,
+        self,
+        destaddr: Tuple[Any, ...] = None,
        **kwargs
+
    ) -> trio.SocketStream:
+
        if self.connected():
            raise RuntimeError("channel is already connected?")
+
        destaddr = destaddr or self._destaddr
        assert isinstance(destaddr, tuple)
-        stream = await trio.open_tcp_stream(*destaddr, **kwargs)
+
+        stream = await trio.open_tcp_stream(
+            *destaddr,
+            happy_eyeballs_delay=math.inf,
+            **kwargs
+        )
        self.msgstream = self.stream_serializer_type(stream)
        return stream

    async def send(self, item: Any) -> None:
+
        log.trace(f"send `{item}`")  # type: ignore
        assert self.msgstream
+
        await self.msgstream.send(item)

    async def recv(self) -> Any:
        assert self.msgstream
+
        try:
            return await self.msgstream.recv()
+
        except trio.BrokenResourceError:
            if self._autorecon:
                await self._reconnect()
                return await self.recv()

+            raise
+
    async def aclose(self) -> None:
        log.debug(f"Closing {self}")
        assert self.msgstream
        await self.msgstream.stream.aclose()
+        self._closed = True
+        log.error(f'CLOSING CHAN {self}')

    async def __aenter__(self):
        await self.connect()
--- a/tractor/_portal.py
+++ b/tractor/_portal.py
@ -177,6 +177,7 @@ class Portal:
                f"Cancelling all streams with {self.channel.uid}")
            for stream in self._streams.copy():
                try:
+                    # with trio.CancelScope(shield=True):
                    await stream.aclose()
                except trio.ClosedResourceError:
                    # don't error the stream having already been closed
@ -332,12 +333,6 @@ class Portal:
            # message right now since there shouldn't be a reason to
            # stop and restart the stream, right?
            try:
-
-                # We are for sure done with this stream and no more
-                # messages are expected to be delivered from the
-                # runtime's msg loop.
-                await recv_chan.aclose()
-
                await ctx.cancel()

            except trio.ClosedResourceError:
@ -375,64 +370,78 @@ class Portal:

        recv_chan: Optional[trio.MemoryReceiveChannel] = None

+        cid, recv_chan, functype, first_msg = await self._submit(
+            fn_mod_path, fn_name, kwargs)
+
+        assert functype == 'context'
+        msg = await recv_chan.receive()
+
        try:
-            cid, recv_chan, functype, first_msg = await self._submit(
-                fn_mod_path, fn_name, kwargs)
+            # the "first" value here is delivered by the callee's
+            # ``Context.started()`` call.
+            first = msg['started']

-            assert functype == 'context'
-            msg = await recv_chan.receive()
+        except KeyError:
+            assert msg.get('cid'), ("Received internal error at context?")

-            try:
-                # the "first" value here is delivered by the callee's
-                # ``Context.started()`` call.
-                first = msg['started']
-
-            except KeyError:
-                assert msg.get('cid'), ("Received internal error at context?")
-
-                if msg.get('error'):
-                    # raise the error message
-                    raise unpack_error(msg, self.channel)
-                else:
-                    raise
-
-            # deliver context instance and .started() msg value in open
-            # tuple.
-            ctx = Context(
-                self.channel,
-                cid,
-                _portal=self,
-                _recv_chan=recv_chan,
-            )
-
-            try:
-                yield ctx, first
-
-                if cancel_on_exit:
-                    await ctx.cancel()
-
-                else:
-                    if not ctx._cancel_called:
-                        await ctx.result()
-
-            except ContextCancelled:
-                # if the context was cancelled by client code
-                # then we don't need to raise since user code
-                # is expecting this.
-                if not ctx._cancel_called:
-                    raise
-
-            except BaseException:
-                # the context cancels itself on any deviation
-                await ctx.cancel()
+            if msg.get('error'):
+                # raise the error message
+                raise unpack_error(msg, self.channel)
+            else:
                raise

-            finally:
-                log.info(f'Context for {func.__name__} completed')
+        # deliver context instance and .started() msg value in open
+        # tuple.
+        try:
+            async with trio.open_nursery() as scope_nursery:
+                ctx = Context(
+                    self.channel,
+                    cid,
+                    _portal=self,
+                    _recv_chan=recv_chan,
+                    _scope_nursery=scope_nursery,
+                )
+                recv_chan._ctx = ctx
+
+                yield ctx, first
+
+            log.info(f'Context for {func.__name__} completed')
+
+            if cancel_on_exit:
+                await ctx.cancel()
+
+            else:
+                if not ctx._cancel_called:
+                    await ctx.result()
+
+            await recv_chan.aclose()
+
+            # except TypeError:
+            #     # if fn_name == '_emsd_main':
+            #     import tractor
+            #     await tractor.breakpoint()
+
+        except ContextCancelled:
+            if not ctx._cancel_called:
+                raise
+
+            # if the context was cancelled by client code
+            # then we don't need to raise since user code
+            # is expecting this and the block should exit.
+            else:
+                log.debug(f'Context {ctx} cancelled gracefully')
+
+        except trio.Cancelled:
+            # the context cancels itself on any deviation
+            await ctx.cancel()
+            raise
+
+        # finally:
+        #     log.info(f'Context for {func.__name__} completed')
+
+        # finally:
+        #     if recv_chan is not None:

-        finally:
-            if recv_chan is not None:
-                await recv_chan.aclose()

@dataclass
 class LocalPortal:
@ -470,6 +479,7 @@ async def open_portal(
    was_connected = False

    async with maybe_open_nursery(nursery, shield=shield) as nursery:
+
        if not channel.connected():
            await channel.connect()
            was_connected = True
@ -491,11 +501,12 @@ async def open_portal(
        portal = Portal(channel)
        try:
            yield portal
+
        finally:
            await portal.aclose()

            if was_connected:
-                # cancel remote channel-msg loop
+                # gracefully signal remote channel-msg loop
                await channel.send(None)

            # cancel background msg loop task
--- a/tractor/_spawn.py
+++ b/tractor/_spawn.py
@ -147,7 +147,7 @@ async def cancel_on_completion(
            )

        else:
-            log.info(
+            log.runtime(
                f"Cancelling {portal.channel.uid} gracefully "
                f"after result {result}")

@ -263,7 +263,7 @@ async def new_proc(
                parent_addr,
                infect_asyncio=infect_asyncio
            ) as proc:
-                log.info(f"Started {proc}")
+                log.runtime(f"Started {proc}")

                # wait for actor to spawn and connect back to us
                # channel should have handshake completed by the
@ -416,7 +416,7 @@ async def mp_new_proc(
        if not proc.is_alive():
            raise ActorFailure("Couldn't start sub-actor?")

-        log.info(f"Started {proc}")
+        log.runtime(f"Started {proc}")

        try:
            # wait for actor to spawn and connect back to us
--- a/tractor/_streaming.py
+++ b/tractor/_streaming.py
@ -7,7 +7,7 @@ from contextlib import contextmanager, asynccontextmanager
 from dataclasses import dataclass
 from typing import (
    Any, Iterator, Optional, Callable,
-    AsyncGenerator,
+    AsyncGenerator, Dict,
 )

 import warnings
@ -61,11 +61,15 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):
        return msg['yield']

    async def receive(self):
-        # see ``.aclose()`` for an alt to always checking this
+        # see ``.aclose()`` for notes on the old behaviour prior to
+        # introducing this
        if self._eoc:
            raise trio.EndOfChannel

        try:
+            # if self._ctx.chan.uid[0] == 'brokerd.ib':
+            #     breakpoint()
+
            msg = await self._rx_chan.receive()
            return msg['yield']

@ -81,12 +85,13 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):
            if msg.get('stop'):
                log.debug(f"{self} was stopped at remote end")

-                # when the send is closed we assume the stream has
-                # terminated and signal this local iterator to stop
-                await self.aclose()
+                # # when the send is closed we assume the stream has
+                # # terminated and signal this local iterator to stop
+                # await self.aclose()

                # XXX: this causes ``ReceiveChannel.__anext__()`` to
-                # raise a ``StopAsyncIteration``.
+                # raise a ``StopAsyncIteration`` **and** in our catch
+                # block below it will trigger ``.aclose()``.
                raise trio.EndOfChannel

            # TODO: test that shows stream raising an expected error!!!
@ -97,44 +102,50 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):
            else:
                raise

-        except trio.ClosedResourceError:
-            # XXX: this indicates that a `stop` message was
-            # sent by the far side of the underlying channel.
-            # Currently this is triggered by calling ``.aclose()`` on
+        except (
+            trio.ClosedResourceError,  # by self._rx_chan
+            trio.EndOfChannel,  # by self._rx_chan or `stop` msg from far end
+            trio.Cancelled,  # by local cancellation
+        ):
+            # XXX: we close the stream on any of these error conditions:
+
+            # a ``ClosedResourceError`` indicates that the internal
+            # feeder memory receive channel was closed likely by the
+            # runtime after the associated transport-channel
+            # disconnected or broke.
+
+            # an ``EndOfChannel`` indicates either the internal recv
+            # memchan exhausted **or** we raisesd it just above after
+            # receiving a `stop` message from the far end of the stream.
+
+            # Previously this was triggered by calling ``.aclose()`` on
            # the send side of the channel inside
-            # ``Actor._push_result()``, but maybe it should be put here?
-            # to avoid exposing the internal mem chan closing mechanism?
-            # in theory we could instead do some flushing of the channel
-            # if needed to ensure all consumers are complete before
-            # triggering closure too early?
+            # ``Actor._push_result()`` (should still be commented code
+            # there - which should eventually get removed), but now the
+            # 'stop' message handling has been put just above.

-            # Locally, we want to close this stream gracefully, by
+            # TODO: Locally, we want to close this stream gracefully, by
            # terminating any local consumers tasks deterministically.
-            # We **don't** want to be closing this send channel and not
-            # relaying a final value to remaining consumers who may not
-            # have been scheduled to receive it yet?
-
-            # lots of testing to do here
+            # One we have broadcast support, we **don't** want to be
+            # closing this stream and not flushing a final value to
+            # remaining (clone) consumers who may not have been
+            # scheduled to receive it yet.

            # when the send is closed we assume the stream has
            # terminated and signal this local iterator to stop
            await self.aclose()

-            raise trio.EndOfChannel
+            raise  # propagate

-            # if not isinstance(self, MsgStream):
-            #     # XXX: this was how we handled this originally for the
-            #     # single direction case?
-            #     raise trio.EndOfChannel
+        # except trio.Cancelled:
+        #     if not self._shielded:
+        #         # if shielded we don't propagate a cancelled
+        #         raise

-            # else:
-            #     # in 2-way case raise the closed error
-            #     raise  # propagate
-
-        except trio.Cancelled:
-            # relay cancels to the remote task
-            await self.aclose()
-            raise
+        # except trio.Cancelled:
+        #     # relay cancels to the remote task
+        #     await self.aclose()
+        #     raise

    @contextmanager
    def shield(
@ -158,13 +169,11 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):
        on close.

        """
-        self._eoc = True
-
        # XXX: keep proper adherance to trio's `.aclose()` semantics:
        # https://trio.readthedocs.io/en/stable/reference-io.html#trio.abc.AsyncResource.aclose
        rx_chan = self._rx_chan

-        if rx_chan._closed:
+        if rx_chan._closed:  # or self._eoc:
            log.warning(f"{self} is already closed")

            # this stream has already been closed so silently succeed as
@ -184,6 +193,9 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):
            log.warning(f"{self} is shielded, portal channel being kept alive")
            return

+        # XXX: This must be set **AFTER** the shielded test above!
+        self._eoc = True
+
        # NOTE: this is super subtle IPC messaging stuff:
        # Relay stop iteration to far end **iff** we're
        # in bidirectional mode. If we're only streaming
@ -195,59 +207,63 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):
        # isn't expecting messages to be sent by the caller.
        # Thus, we must check that this context DOES NOT
        # have a portal reference to ensure this is indeed the callee
-        # side and can relay a 'stop'. In the bidirectional case,
-        # `Context.open_stream()` will create the `Actor._cids2qs`
-        # entry from a call to `Actor.get_memchans()`.
+        # side and can relay a 'stop'.
+
+        # In the bidirectional case, `Context.open_stream()` will create
+        # the `Actor._cids2qs` entry from a call to
+        # `Actor.get_memchans()` and will send the stop message in
+        # ``__aexit__()`` on teardown so it **does not** need to be
+        # called here.
        if not self._ctx._portal:
            try:
                # only for 2 way streams can we can send
                # stop from the caller side
                await self._ctx.send_stop()

-            except trio.BrokenResourceError:
+            except (
+                trio.BrokenResourceError,
+                trio.ClosedResourceError
+            ):
                # the underlying channel may already have been pulled
                # in which case our stop message is meaningless since
                # it can't traverse the transport.
                log.debug(f'Channel for {self} was already closed')

-        # close the local mem chan??!?
+        # close the local mem chan ``self._rx_chan`` ??!?

-        # NOT if we're a ``MsgStream``!
+        # DEFINITELY NOT if we're a bi-dir ``MsgStream``!
        # BECAUSE this same core-msg-loop mem recv-chan is used to deliver
        # the potential final result from the surrounding inter-actor
        # `Context` so we don't want to close it until that context has
        # run to completion.

-        # XXX: Notes on old behaviour.
+        # XXX: Notes on old behaviour:
+        # await rx_chan.aclose()

-        # In the receive-only case, ``Portal.open_stream_from()`` should
-        # call this explicitly on teardown but additionally if for some
-        # reason stream consumer code tries to manually receive a new
+        # In the receive-only case, ``Portal.open_stream_from()`` used
+        # to rely on this call explicitly on teardown such that a new
+        # call to ``.receive()`` after ``rx_chan`` had been closed, would
+        # result in us raising a ``trio.EndOfChannel`` (since we
+        # remapped the ``trio.ClosedResourceError`). However, now if for some
+        # reason the stream's consumer code tries to manually receive a new
        # value before ``.aclose()`` is called **but** the far end has
        # stopped `.receive()` **must** raise ``trio.EndofChannel`` in
-        # order to avoid an infinite hang on ``.__anext__()``. So we can
-        # instead uncomment this check and close the underlying msg-loop
-        # mem chan below and not then **not** check for ``self._eoc`` in
-        # ``.receive()`` (if for some reason we think that check is
-        # a bottle neck - not likely) such that the
-        # ``trio.ClosedResourceError`` would instead trigger the
-        # ``trio.EndOfChannel`` in ``.receive()`` (as it originally was
-        # before bi-dir streaming support).
+        # order to avoid an infinite hang on ``.__anext__()``; this is
+        # why we added ``self._eoc`` to denote stream closure indepedent
+        # of ``rx_chan``.

-        # if not isinstance(self, MsgStream):
-        #     await rx_chan.aclose()
-
-    # TODO: but make it broadcasting to consumers
-    # def clone(self):
-    #     """Clone this receive channel allowing for multi-task
-    #     consumption from the same channel.
-
-    #     """
-    #     return ReceiveStream(
-    #         self._cid,
-    #         self._rx_chan.clone(),
-    #         self._portal,
-    #     )
+        # In theory we could still use this old method and close the
+        # underlying msg-loop mem chan as above and then **not** check
+        # for ``self._eoc`` in ``.receive()`` (if for some reason we
+        # think that check is a bottle neck - not likely) **but** then
+        # we would need to map the resulting
+        # ``trio.ClosedResourceError`` to a ``trio.EndOfChannel`` in
+        # ``.receive()`` (as it originally was before bi-dir streaming
+        # support) in order to trigger stream closure. The old behaviour
+        # is arguably more confusing since we lose detection of the
+        # runtime's closure of ``rx_chan`` in the case where we may
+        # still need to consume msgs that are "in transit" from the far
+        # end (eg. for ``Context.result()``).


 class MsgStream(ReceiveMsgStream, trio.abc.Channel):
@ -260,8 +276,22 @@ class MsgStream(ReceiveMsgStream, trio.abc.Channel):
        self,
        data: Any
    ) -> None:
+        '''Send a message over this stream to the far end.
+
+        '''
        await self._ctx.chan.send({'yield': data, 'cid': self._ctx.cid})

+    # TODO: but make it broadcasting to consumers
+    def clone(self):
+        """Clone this receive channel allowing for multi-task
+        consumption from the same channel.
+
+        """
+        return MsgStream(
+            self._ctx,
+            self._rx_chan.clone(),
+        )
+

@dataclass
 class Context:
@ -288,7 +318,7 @@ class Context:
    _cancel_called: bool = False

    # only set on the callee side
-    _cancel_scope: Optional[trio.CancelScope] = None
+    _scope_nursery: Optional[trio.Nursery] = None

    async def send_yield(self, data: Any) -> None:

@ -303,6 +333,16 @@ class Context:
    async def send_stop(self) -> None:
        await self.chan.send({'stop': True, 'cid': self.cid})

+    def _error_from_remote_msg(
+        self,
+        msg: Dict[str, Any],
+
+    ) -> None:
+        async def raiser():
+            raise unpack_error(msg, self.chan)
+
+        self._scope_nursery.start_soon(raiser)
+
    async def cancel(self) -> None:
        '''Cancel this inter-actor-task context.

@ -341,13 +381,16 @@ class Context:
                        f"{cid} for {self._portal.channel.uid}")
        else:
            # ensure callee side
-            assert self._cancel_scope
+            assert self._scope_nursery
            # TODO: should we have an explicit cancel message
            # or is relaying the local `trio.Cancelled` as an
            # {'error': trio.Cancelled, cid: "blah"} enough?
            # This probably gets into the discussion in
            # https://github.com/goodboy/tractor/issues/36
-            self._cancel_scope.cancel()
+            self._scope_nursery.cancel_scope.cancel()
+
+        if self._recv_chan:
+            await self._recv_chan.aclose()

    @asynccontextmanager
    async def open_stream(
@ -418,7 +461,8 @@ class Context:
                yield rchan

            except trio.EndOfChannel:
-                # stream iteration stop signal
+                # likely the far end sent us a 'stop' message to
+                # terminate the stream.
                raise

            else:
--- a/tractor/log.py
+++ b/tractor/log.py
@ -30,16 +30,17 @@ DATE_FORMAT = '%b %d %H:%M:%S'
 LEVELS = {
    'GARBAGE': 1,
    'TRACE': 5,
-    'PROFILE': 15,
-    'RUNTIME': 500,
+    'RUNTIME': 15,
+    'PDB': 500,
    'QUIET': 1000,
 }
 STD_PALETTE = {
    'CRITICAL': 'red',
    'ERROR': 'red',
-    'RUNTIME': 'white',
+    'PDB': 'white',
    'WARNING': 'yellow',
    'INFO': 'green',
+    'RUNTIME': 'white',
    'DEBUG': 'white',
    'TRACE': 'cyan',
    'GARBAGE': 'blue',
--- a/tractor/to_asyncio.py
+++ b/tractor/to_asyncio.py
@ -101,14 +101,16 @@ def _run_asyncio_task(
        """Cancel the calling ``trio`` task on error.
        """
        nonlocal aio_err
-        aio_err = task.exception()
+        try:
+            aio_err = task.exception()
+        except asyncio.CancelledError as cerr:
+            aio_err = cerr

        if aio_err:
            log.exception(f"asyncio task errorred:\n{aio_err}")

        # cancel_scope.cancel()
        from_aio._err = aio_err
-        to_trio.close()

    task.add_done_callback(cancel_trio)

@ -233,17 +235,25 @@ async def run_task(
        #         raise aio_err

    # Do we need this?
-    except BaseException as err:
+    except Exception as err:
        # await tractor.breakpoint()
        aio_err = from_aio._err
+
+        # try:
        if aio_err is not None:
            # always raise from any captured asyncio error
            raise err from aio_err
        else:
            raise
+        # finally:
+        #     if not task.done():
+        #         task.cancel()

-    finally:
-        task.cancel()
+    except trio.Cancelled:
+        if not task.done():
+            task.cancel()
+
+        raise


 # async def stream_from_task
Author	SHA1	Message	Date
Tyler Goodlet	e2e572ef51	Move some infos to runtime level	2021-06-26 23:40:21 -04:00
Tyler Goodlet	6cf6a0e70b	Add PDB level and make runtime below info but above debug	2021-06-26 23:40:21 -04:00
Tyler Goodlet	0a5cf6bbd2	WIP attempt to relay context error via raised-in-scope-nursery task	2021-06-24 19:56:05 -04:00
Tyler Goodlet	e6c9232b45	Add our own "transport closed" signal This change some super old (and bad) code from the project's very early days. For some redic reason i must have thought masking `trio`'s internal stream / transport errors and a TCP EOF as `StopAsyncIteration` somehow a good idea. The reality is you probably want to know the difference between an unexpected transport error and a simple EOF lol. This begins to resolve that by adding our own special `TransportClosed` error to signal the "graceful" termination of a channel's underlying transport. Oh, and this builds on the `msgspec` integration which helped shed light on the core issues here B)	2021-06-24 18:49:51 -04:00
Tyler Goodlet	62ece4327d	Just drop SIGINT masking; it seems to fix piker crash-hangs	2021-06-15 17:54:42 -04:00
Tyler Goodlet	f7b7c3fb93	Set stream "end of channel" after shielded check! Another face palm that was causing serious issues for code that is using the `.shielded` feature.. Add a bunch more detailed comments for all this subtlety and hopefully get it right once and for all. Also aggregated the `trio` errors that should trigger closure inside `.aclose()`, hopefully that's right too.	2021-06-14 16:34:44 -04:00
Tyler Goodlet	186968d3e7	Don't clobber msg loop mem chan...	2021-06-14 13:27:52 -04:00
Tyler Goodlet	7e2a054dd7	Drop bad .close() call	2021-06-14 09:34:00 -04:00
Tyler Goodlet	b494f29d55	Proxy asyncio cancelleds as well	2021-06-14 09:34:00 -04:00
Tyler Goodlet	9ced2066b9	Power of 2 cuz puters	2021-06-14 09:34:00 -04:00