Merge pull request #358 from goodboy/switch_to_pdbp

Switch to `pdbp` 🏄🏼
Add news file
2023-05-15 09:58:58 -04:00 · 2023-05-15 09:35:59 -04:00 · 2023-05-15 09:14:42 -04:00 · 2023-05-15 09:01:55 -04:00 · 2023-05-15 09:01:27 -04:00 · 2023-05-15 00:47:29 -04:00
89 changed files with 12127 additions and 3927 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -1,8 +1,14 @@
 name: CI

-on: push
+on:
+  # any time someone pushes a new branch to origin
+  push:
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:

 jobs:
+
  mypy:
    name: 'MyPy'
    runs-on: ubuntu-latest
@ -14,32 +20,112 @@ jobs:
      - name: Setup python
        uses: actions/setup-python@v2
        with:
-          python-version: '3.9'
+          python-version: '3.10'

      - name: Install dependencies
        run: pip install -U . --upgrade-strategy eager -r requirements-test.txt

      - name: Run MyPy check
-        run: mypy tractor/ --ignore-missing-imports
+        run: mypy tractor/ --ignore-missing-imports --show-traceback
+
+  # test that we can generate a software distribution and install it
+  # thus avoid missing file issues after packaging.
+  sdist-linux:
+    name: 'sdist'
+    runs-on: ubuntu-latest

-  testing:
-    name: '${{ matrix.os }} Python ${{ matrix.python }} - ${{ matrix.spawn_backend }}'
-    timeout-minutes: 10
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest, windows-latest]
-        python: ['3.7', '3.8', '3.9']
-        spawn_backend: ['trio', 'mp']
    steps:
      - name: Checkout
        uses: actions/checkout@v2
+
+      - name: Setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.10'
+
+      - name: Build sdist
+        run: python setup.py sdist --formats=zip
+
+      - name: Install sdist from .zips
+        run: python -m pip install dist/*.zip
+
+
+  testing-linux:
+    name: '${{ matrix.os }} Python ${{ matrix.python }} - ${{ matrix.spawn_backend }}'
+    timeout-minutes: 10
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ['3.10']
+        spawn_backend: [
+          'trio',
+          'mp_spawn',
+          'mp_forkserver',
+        ]
+
+    steps:
+
+      - name: Checkout
+        uses: actions/checkout@v2
+
      - name: Setup python
        uses: actions/setup-python@v2
        with:
          python-version: '${{ matrix.python }}'
+
      - name: Install dependencies
        run: pip install -U . -r requirements-test.txt -r requirements-docs.txt --upgrade-strategy eager
+
+      - name: List dependencies
+        run: pip list
+
      - name: Run tests
-        run: pytest tests/ --spawn-backend=${{ matrix.spawn_backend }} -rs
+        run: pytest tests/ --spawn-backend=${{ matrix.spawn_backend }} -rsx
+
+  # We skip 3.10 on windows for now due to not having any collabs to
+  # debug the CI failures. Anyone wanting to hack and solve them is very
+  # welcome, but our primary user base is not using that OS.
+
+  # TODO: use job filtering to accomplish instead of repeated
+  # boilerplate as is above XD:
+  # - https://docs.github.com/en/actions/learn-github-actions/managing-complex-workflows
+  # - https://docs.github.com/en/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix
+  # - https://docs.github.com/en/actions/learn-github-actions/workflow-syntax-for-github-actions#jobsjob_idif
+  # testing-windows:
+  #   name: '${{ matrix.os }} Python ${{ matrix.python }} - ${{ matrix.spawn_backend }}'
+  #   timeout-minutes: 12
+  #   runs-on: ${{ matrix.os }}
+
+  #   strategy:
+  #     fail-fast: false
+  #     matrix:
+  #       os: [windows-latest]
+  #       python: ['3.10']
+  #       spawn_backend: ['trio', 'mp']
+
+  #   steps:
+
+  #     - name: Checkout
+  #       uses: actions/checkout@v2
+
+  #     - name: Setup python
+  #       uses: actions/setup-python@v2
+  #       with:
+  #         python-version: '${{ matrix.python }}'
+
+  #     - name: Install dependencies
+  #       run: pip install -U . -r requirements-test.txt -r requirements-docs.txt --upgrade-strategy eager
+
+  #     # TODO: pretty sure this solves debugger deps-issues on windows, but it needs to
+  #     # be verified by someone with a native setup.
+  #     # - name: Force pyreadline3
+  #     #   run: pip uninstall pyreadline; pip install -U pyreadline3
+
+  #     - name: List dependencies
+  #       run: pip list
+
+  #     - name: Run tests
+  #       run: pytest tests/ --spawn-backend=${{ matrix.spawn_backend }} -rsx
--- a/147
+++ b/147
@ -1,23 +1,21 @@
-                    GNU GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
+                    GNU AFFERO GENERAL PUBLIC LICENSE
+                       Version 3, 19 November 2007

- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

-  The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
+  The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.

  The licenses for most software and other practical works are designed
 to take away your freedom to share and change the works.  By contrast,
-the GNU General Public License is intended to guarantee your freedom to
+our General Public Licenses are intended to guarantee your freedom to
 share and change all versions of a program--to make sure it remains free
-software for all its users.  We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors.  You can apply it to
-your programs, too.
+software for all its users.

  When we speak of free software, we are referring to freedom, not
 price.  Our General Public Licenses are designed to make sure that you
@ -26,44 +24,34 @@ them if you wish), that you receive source code or can get it if you
 want it, that you can change the software or use pieces of it in new
 free programs, and that you know you can do these things.

-  To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights.  Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
+  Developers that use our General Public Licenses protect your rights
+with two steps: (1) assert copyright on the software, and (2) offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.

-  For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received.  You must make sure that they, too, receive
-or can get the source code.  And you must show them these terms so they
-know their rights.
+  A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.

-  Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
+  The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.

-  For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software.  For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
-  Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so.  This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software.  The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable.  Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products.  If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
-  Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary.  To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
+  An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.

  The precise terms and conditions for copying, distribution and
 modification follow.
@ -72,7 +60,7 @@ modification follow.

  0. Definitions.

-  "This License" refers to version 3 of the GNU General Public License.
+  "This License" refers to version 3 of the GNU Affero General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
 works, such as semiconductor masks.
@ -549,35 +537,45 @@ to collect a royalty for further conveying from those to whom you convey
 the Program, the only way you could satisfy both those terms and this
 License would be to refrain entirely from conveying the Program.

-  13. Use with the GNU Affero General Public License.
+  13. Remote Network Interaction; Use with the GNU General Public License.
+
+  Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.

  Notwithstanding any other provision of this License, you have
 permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
+under version 3 of the GNU General Public License into a single
 combined work, and to convey the resulting work.  The terms of this
 License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time.  Such new versions will
-be similar in spirit to the present version, but may differ in detail to
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
 address new problems or concerns.

  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU General
+Program specifies that a certain numbered version of the GNU Affero General
 Public License "or any later version" applies to it, you have the
 option of following the terms and conditions either of that numbered
 version or of any later version published by the Free Software
 Foundation.  If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
+GNU Affero General Public License, you may choose any version ever published
 by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
+versions of the GNU Affero General Public License can be used, that proxy's
 public statement of acceptance of a version permanently authorizes you
 to choose that version for the Program.

@ -635,40 +633,29 @@ the "copyright" line and a pointer to where the full notice is found.
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
+    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
+    GNU Affero General Public License for more details.

-    You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.

 Also add information on how to contact you by electronic and paper mail.

-  If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
-    <program>  Copyright (C) <year>  <name of author>
-    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-    This is free software, and you are welcome to redistribute it
-    under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License.  Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
+  If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a "Source" link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.

  You should also get your employer (if you work as a programmer) or school,
 if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
-  The GNU General Public License does not permit incorporating your program
-into proprietary programs.  If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library.  If this is what you want to do, use the GNU Lesser General
-Public License instead of this License.  But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
+For more information on this, and how to apply and follow the GNU AGPL, see
+<https://www.gnu.org/licenses/>.
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1,2 @@
+# https://packaging.python.org/en/latest/guides/using-manifest-in/#using-manifest-in
+include docs/README.rst
--- a/NEWS.rst
+++ b/NEWS.rst
@ -0,0 +1,528 @@
+=========
+Changelog
+=========
+
+.. towncrier release notes start
+
+tractor 0.1.0a5 (2022-08-03)
+============================
+
+This is our final release supporting Python 3.9 since we will be moving
+internals to the new `match:` syntax from 3.10 going forward and
+further, we have officially dropped usage of the `msgpack` library and
+happily adopted `msgspec`.
+
+Features
+--------
+
+- `#165 <https://github.com/goodboy/tractor/issues/165>`_: Add SIGINT
+  protection to our `pdbpp` based debugger subystem such that for
+  (single-depth) actor trees in debug mode we ignore interrupts in any
+  actor currently holding the TTY lock thus avoiding clobbering IPC
+  connections and/or task and process state when working in the REPL.
+
+  As a big note currently so called "nested" actor trees (trees with
+  actors having more then one parent/ancestor) are not fully supported
+  since we don't yet have a mechanism to relay the debug mode knowledge
+  "up" the actor tree (for eg. when handling a crash in a leaf actor).
+  As such currently there is a set of tests and known scenarios which will
+  result in process cloberring by the zombie repaing machinery and these
+  have been documented in https://github.com/goodboy/tractor/issues/320.
+
+  The implementation details include:
+
+  - utilizing a custom SIGINT handler which we apply whenever an actor's
+    runtime enters the debug machinery, which we also make sure the
+    stdlib's `pdb` configuration doesn't override (which it does by
+    default without special instance config).
+  - litter the runtime with `maybe_wait_for_debugger()` mostly in spots
+    where the root actor should block before doing embedded nursery
+    teardown ops which both cancel potential-children-in-deubg as well
+    as eventually trigger zombie reaping machinery.
+  - hardening of the TTY locking semantics/API both in terms of IPC
+    terminations and cancellation and lock release determinism from
+    sync debugger instance methods.
+  - factoring of locking infrastructure into a new `._debug.Lock` global
+    which encapsulates all details of the ``trio`` sync primitives and
+    task/actor uid management and tracking.
+
+  We also add `ctrl-c` cases throughout the test suite though these are
+  disabled for py3.9 (`pdbpp` UX differences that don't seem worth
+  compensating for, especially since this will be our last 3.9 supported
+  release) and there are a slew of marked cases that aren't expected to
+  work in CI more generally (as mentioned in the "nested" tree note
+  above) despite seemingly working  when run manually on linux.
+
+- `#304 <https://github.com/goodboy/tractor/issues/304>`_: Add a new
+  ``to_asyncio.LinkedTaskChannel.subscribe()`` which gives task-oriented
+  broadcast functionality semantically equivalent to
+  ``tractor.MsgStream.subscribe()`` this makes it possible for multiple
+  ``trio``-side tasks to consume ``asyncio``-side task msgs in tandem.
+
+  Further Improvements to the test suite were added in this patch set
+  including a new scenario test for a sub-actor managed "service nursery"
+  (implementing the basics of a "service manager") including use of
+  *infected asyncio* mode. Further we added a lower level
+  ``test_trioisms.py`` to start to track issues we need to work around in
+  ``trio`` itself which in this case included a bug we were trying to
+  solve related to https://github.com/python-trio/trio/issues/2258.
+
+
+Bug Fixes
+---------
+
+- `#318 <https://github.com/goodboy/tractor/issues/318>`_: Fix
+  a previously undetected ``trio``-``asyncio`` task lifetime linking
+  issue with the ``to_asyncio.open_channel_from()`` api where both sides
+  where not properly waiting/signalling termination and it was possible
+  for ``asyncio``-side errors to not propagate due to a race condition.
+
+  The implementation fix summary is:
+  - add state to signal the end of the ``trio`` side task to be
+    read by the ``asyncio`` side and always cancel any ongoing
+    task in such cases.
+  - always wait on the ``asyncio`` task termination from the ``trio``
+    side on error before maybe raising said error.
+  - always close the ``trio`` mem chan on exit to ensure the other
+    side can detect it and follow.
+
+
+Trivial/Internal Changes
+------------------------
+
+- `#248 <https://github.com/goodboy/tractor/issues/248>`_: Adjust the
+  `tractor._spawn.soft_wait()` strategy to avoid sending an actor cancel
+  request (via `Portal.cancel_actor()`) if either the child process is
+  detected as having terminated or the IPC channel is detected to be
+  closed.
+
+  This ensures (even) more deterministic inter-actor cancellation by
+  avoiding the timeout condition where possible when a whild never
+  sucessfully spawned, crashed, or became un-contactable over IPC.
+
+- `#295 <https://github.com/goodboy/tractor/issues/295>`_: Add an
+  experimental ``tractor.msg.NamespacePath`` type for passing Python
+  objects by "reference" through a ``str``-subtype message and using the
+  new ``pkgutil.resolve_name()`` for reference loading.
+
+- `#298 <https://github.com/goodboy/tractor/issues/298>`_: Add a new
+  `tractor.experimental` subpackage for staging new high level APIs and
+  subystems that we might eventually make built-ins.
+
+- `#300 <https://github.com/goodboy/tractor/issues/300>`_: Update to and
+  pin latest ``msgpack`` (1.0.3) and ``msgspec`` (0.4.0) both of which
+  required adjustments for backwards imcompatible API tweaks.
+
+- `#303 <https://github.com/goodboy/tractor/issues/303>`_: Fence off
+  ``multiprocessing`` imports until absolutely necessary in an effort to
+  avoid "resource tracker" spawning side effects that seem to have
+  varying degrees of unreliability per Python release. Port to new
+  ``msgspec.DecodeError``.
+
+- `#305 <https://github.com/goodboy/tractor/issues/305>`_: Add
+  ``tractor.query_actor()`` an addr looker-upper which doesn't deliver
+  a ``Portal`` instance and instead just a socket address ``tuple``.
+
+  Sometimes it's handy to just have a simple way to figure out if
+  a "service" actor is up, so add this discovery helper for that. We'll
+  prolly just leave it undocumented for now until we figure out
+  a longer-term/better discovery system.
+
+- `#316 <https://github.com/goodboy/tractor/issues/316>`_: Run windows
+  CI jobs on python 3.10 after some hacks for ``pdbpp`` dependency
+  issues.
+
+  Issue was to do with the now deprecated `pyreadline` project which
+  should be changed over to `pyreadline3`.
+
+- `#317 <https://github.com/goodboy/tractor/issues/317>`_: Drop use of
+  the ``msgpack`` package and instead move fully to the ``msgspec``
+  codec library.
+
+  We've now used ``msgspec`` extensively in production and there's no
+  reason to not use it as default. Further this change preps us for the up
+  and coming typed messaging semantics (#196), dialog-unprotocol system
+  (#297), and caps-based messaging-protocols (#299) planned before our
+  first beta.
+
+
+tractor 0.1.0a4 (2021-12-18)
+============================
+
+Features
+--------
+- `#275 <https://github.com/goodboy/tractor/issues/275>`_: Re-license
+  code base under AGPLv3. Also see `#274
+  <https://github.com/goodboy/tractor/pull/274>`_ for majority
+  contributor consensus on this decision.
+
+- `#121 <https://github.com/goodboy/tractor/issues/121>`_: Add
+  "infected ``asyncio`` mode; a sub-system to spawn and control
+  ``asyncio`` actors using ``trio``'s guest-mode.
+
+  This gets us the following very interesting functionality:
+
+  - ability to spawn an actor that has a process entry point of
+    ``asyncio.run()`` by passing ``infect_asyncio=True`` to
+    ``Portal.start_actor()`` (and friends).
+  - the ``asyncio`` actor embeds ``trio`` using guest-mode and starts
+    a main ``trio`` task which runs the ``tractor.Actor._async_main()``
+    entry point engages all the normal ``tractor`` runtime IPC/messaging
+    machinery; for all purposes the actor is now running normally on
+    a ``trio.run()``.
+  - the actor can now make one-to-one task spawning requests to the
+    underlying ``asyncio`` event loop using either of:
+
+    * ``to_asyncio.run_task()`` to spawn and run an ``asyncio`` task to
+      completion and block until a return value is delivered.
+    * ``async with to_asyncio.open_channel_from():`` which spawns a task
+      and hands it a pair of "memory channels" to allow for bi-directional
+      streaming between the now SC-linked ``trio`` and ``asyncio`` tasks.
+
+  The output from any call(s) to ``asyncio`` can be handled as normal in
+  ``trio``/``tractor`` task operation with the caveat of the overhead due
+  to guest-mode use.
+
+  For more details see the `original PR
+  <https://github.com/goodboy/tractor/pull/121>`_ and `issue
+  <https://github.com/goodboy/tractor/issues/120>`_.
+
+- `#257 <https://github.com/goodboy/tractor/issues/257>`_: Add
+  ``trionics.maybe_open_context()`` an actor-scoped async multi-task
+  context manager resource caching API.
+
+  Adds an SC-safe cacheing async context manager api that only enters on
+  the *first* task entry and only exits on the *last* task exit while in
+  between delivering the same cached value per input key. Keys can be
+  either an explicit ``key`` named arg provided by the user or a
+  hashable ``kwargs`` dict (will be converted to a ``list[tuple]``) which
+  is passed to the underlying manager function as input.
+
+- `#261 <https://github.com/goodboy/tractor/issues/261>`_: Add
+  cross-actor-task ``Context`` oriented error relay, a new stream
+  overrun error-signal ``StreamOverrun``, and support disabling
+  ``MsgStream`` backpressure as the default before a stream is opened or
+  by choice of the user.
+
+  We added stricter semantics around ``tractor.Context.open_stream():``
+  particularly to do with streams which are only opened at one end.
+  Previously, if only one end opened a stream there was no way for that
+  sender to know if msgs are being received until first, the feeder mem
+  chan on the receiver side hit a backpressure state and then that
+  condition delayed its msg loop processing task to eventually create
+  backpressure on the associated IPC transport. This is non-ideal in the
+  case where the receiver side never opened a stream by mistake since it
+  results in silent block of the sender and no adherence to the underlying
+  mem chan buffer size settings (which is still unsolved btw).
+
+  To solve this we add non-backpressure style message pushing inside
+  ``Actor._push_result()`` by default and only use the backpressure
+  ``trio.MemorySendChannel.send()`` call **iff** the local end of the
+  context has entered ``Context.open_stream():``. This way if the stream
+  was never opened but the mem chan is overrun, we relay back to the
+  sender a (new exception) ``SteamOverrun`` error which is raised in the
+  sender's scope with a special error message about the stream never
+  having been opened. Further, this behaviour (non-backpressure style
+  where senders can expect an error on overruns) can now be enabled with
+  ``.open_stream(backpressure=False)`` and the underlying mem chan size
+  can be specified with a kwarg ``msg_buffer_size: int``.
+
+  Further bug fixes and enhancements in this changeset include:
+
+  - fix a race we were ignoring where if the callee task opened a context
+    it could enter ``Context.open_stream()`` before calling
+    ``.started()``.
+  - Disallow calling ``Context.started()`` more then once.
+  - Enable ``Context`` linked tasks error relaying via the new
+    ``Context._maybe_raise_from_remote_msg()`` which (for now) uses
+    a simple ``trio.Nursery.start_soon()`` to raise the error via closure
+    in the local scope.
+
+- `#267 <https://github.com/goodboy/tractor/issues/267>`_: This
+  (finally) adds fully acknowledged remote cancellation messaging
+  support for both explicit ``Portal.cancel_actor()`` calls as well as
+  when there is a "runtime-wide" cancellations (eg. during KBI or
+  general actor nursery exception handling which causes a full actor
+  "crash"/termination).
+
+  You can think of this as the most ideal case in 2-generals where the
+  actor requesting the cancel of its child is able to always receive back
+  the ACK to that request. This leads to a more deterministic shutdown of
+  the child where the parent is able to wait for the child to fully
+  respond to the request. On a localhost setup, where the parent can
+  monitor the state of the child through process or other OS APIs instead
+  of solely through IPC messaging, the parent can know whether or not the
+  child decided to cancel with more certainty. In the case of separate
+  hosts, we still rely on a simple timeout approach until such a time
+  where we prefer to get "fancier".
+
+- `#271 <https://github.com/goodboy/tractor/issues/271>`_: Add a per
+  actor ``debug_mode: bool`` control to our nursery.
+
+  This allows spawning actors via ``ActorNursery.start_actor()`` (and
+  other dependent methods) with a ``debug_mode=True`` flag much like
+  ``tractor.open_nursery():`` such that per process crash handling
+  can be toggled for cases where a user does not need/want all child actors
+  to drop into the debugger on error. This is often useful when you have
+  actor-tasks which are expected to error often (and be re-run) but want
+  to specifically interact with some (problematic) child.
+
+
+Bugfixes
+--------
+
+- `#239 <https://github.com/goodboy/tractor/issues/239>`_: Fix
+  keyboard interrupt handling in ``Portal.open_context()`` blocks.
+
+  Previously this was not triggering cancellation of the remote task
+  context and could result in hangs if a stream was also opened. This
+  fix is to accept `BaseException` since it is likely any other top
+  level exception other then KBI (even though not expected) should also
+  get this result.
+
+- `#264 <https://github.com/goodboy/tractor/issues/264>`_: Fix
+  ``Portal.run_in_actor()`` returns ``None`` result.
+
+  ``None`` was being used as the cached result flag and obviously breaks
+  on a ``None`` returned from the remote target task. This would cause an
+  infinite hang if user code ever called ``Portal.result()`` *before* the
+  nursery exit. The simple fix is to use the *return message* as the
+  initial "no-result-received-yet" flag value and, once received, the
+  return value is read from the message to avoid the cache logic error.
+
+- `#266 <https://github.com/goodboy/tractor/issues/266>`_: Fix
+  graceful cancellation of daemon actors
+
+  Previously, his was a bug where if the soft wait on a sub-process (the
+  ``await .proc.wait()``) in the reaper task teardown was cancelled we
+  would fail over to the hard reaping sequence (meant for culling off any
+  potential zombies via system kill signals). The hard reap has a timeout
+  of 3s (currently though in theory we could make it shorter?) before
+  system signalling kicks in. This means that any daemon actor still
+  running during nursery exit would get hard reaped (3s later) instead of
+  cancelled via IPC message. Now we catch the ``trio.Cancelled``, call
+  ``Portal.cancel_actor()`` on the daemon and expect the child to
+  self-terminate after the runtime cancels and shuts down the process.
+
+- `#278 <https://github.com/goodboy/tractor/issues/278>`_: Repair
+  inter-actor stream closure semantics to work correctly with
+  ``tractor.trionics.BroadcastReceiver`` task fan out usage.
+
+  A set of previously unknown bugs discovered in `#257
+  <https://github.com/goodboy/tractor/pull/257>`_ let graceful stream
+  closure result in hanging consumer tasks that use the broadcast APIs.
+  This adds better internal closure state tracking to the broadcast
+  receiver and message stream APIs and in particular ensures that when an
+  underlying stream/receive-channel (a broadcast receiver is receiving
+  from) is closed, all consumer tasks waiting on that underlying channel
+  are woken so they can receive the ``trio.EndOfChannel`` signal and
+  promptly terminate.
+
+
+tractor 0.1.0a3 (2021-11-02)
+============================
+
+Features
+--------
+
+- Switch to using the ``trio`` process spawner by default on windows. (#166)
+
+  This gets windows users debugger support (manually tested) and in
+  general a more resilient (nested) actor tree implementation.
+
+- Add optional `msgspec <https://jcristharif.com/msgspec/>`_ support
+  as an alernative, faster MessagePack codec. (#214)
+
+  Provides us with a path toward supporting typed IPC message contracts. Further,
+  ``msgspec`` structs may be a valid tool to start for formalizing our
+  "SC dialog un-protocol" messages as described in `#36
+  <https://github.com/goodboy/tractor/issues/36>`_.
+
+- Introduce a new ``tractor.trionics`` `sub-package`_ that exposes
+  a selection of our relevant high(er) level trio primitives and
+  goodies. (#241)
+
+  At outset we offer a ``gather_contexts()`` context manager for
+  concurrently entering a sequence of async context managers (much like
+  a version of ``asyncio.gather()`` but for context managers) and use it
+  in a new ``tractor.open_actor_cluster()`` manager-helper that can be
+  entered to concurrently spawn a flat actor pool. We also now publicly
+  expose our "broadcast channel" APIs (``open_broadcast_receiver()``)
+  from here.
+
+.. _sub-package: ../tractor/trionics
+
+- Change the core message loop to handle task and actor-runtime cancel
+  requests immediately instead of scheduling them as is done for rpc-task
+  requests. (#245)
+
+  In order to obtain more reliable teardown mechanics for (complex) actor
+  trees it's important that we specially treat cancel requests as having
+  higher priority. Previously, it was possible that task cancel requests
+  could actually also themselves be cancelled if a "actor-runtime" cancel
+  request was received (can happen during messy multi actor crashes that
+  propagate). Instead cancels now block the msg loop until serviced and
+  a response is relayed back to the requester. This also allows for
+  improved debugger support since we have determinism guarantees about
+  which processes must wait before hard killing their children.
+
+- (`#248 <https://github.com/goodboy/tractor/pull/248>`_) Drop Python
+  3.8 support in favour of rolling with two latest releases for the time
+  being.
+
+
+Misc
+----
+
+- (`#243 <https://github.com/goodboy/tractor/pull/243>`_) add a distinct
+  ``'CANCEL'`` log level to allow the runtime to emit details about
+  cancellation machinery statuses.
+
+
+tractor 0.1.0a2 (2021-09-07)
+============================
+
+Features
+--------
+
+- Add `tokio-style broadcast channels
+  <https://docs.rs/tokio/1.11.0/tokio/sync/broadcast/index.html>`_ as
+  a solution for `#204 <https://github.com/goodboy/tractor/pull/204>`_ and
+  discussed thoroughly in `trio/#987
+  <https://github.com/python-trio/trio/issues/987>`_.
+
+  This gives us local task broadcast functionality using a new
+  ``BroadcastReceiver`` type which can wrap ``trio.ReceiveChannel``  and
+  provide fan-out copies of a stream of data to every subscribed consumer.
+  We use this new machinery to provide a ``ReceiveMsgStream.subscribe()``
+  async context manager which can be used by actor-local concumers tasks
+  to easily pull from a shared and dynamic IPC stream. (`#229
+  <https://github.com/goodboy/tractor/pull/229>`_)
+
+
+Bugfixes
+--------
+
+- Handle broken channel/stream faults where the root's tty lock is left
+  acquired by some child actor who went MIA and the root ends up hanging
+  indefinitely. (`#234 <https://github.com/goodboy/tractor/pull/234>`_)
+
+  There's two parts here: we no longer shield wait on the lock and,
+  now always do our best to release the lock on the expected worst
+  case connection faults.
+
+
+Deprecations and Removals
+-------------------------
+
+- Drop stream "shielding" support which was originally added to sidestep
+  a cancelled call to ``.receive()``
+
+  In the original api design a stream instance was returned directly from
+  a call to ``Portal.run()`` and thus there was no "exit phase" to handle
+  cancellations and errors which would trigger implicit closure. Now that
+  we have said enter/exit semantics with ``Portal.open_stream_from()`` and
+  ``Context.open_stream()`` we can drop this implicit (and arguably
+  confusing) behavior. (`#230 <https://github.com/goodboy/tractor/pull/230>`_)
+
+- Drop Python 3.7 support in preparation for supporting 3.9+ syntax.
+  (`#232 <https://github.com/goodboy/tractor/pull/232>`_)
+
+
+tractor 0.1.0a1 (2021-08-01)
+============================
+
+Features
+--------
+- Updated our uni-directional streaming API (`#206
+  <https://github.com/goodboy/tractor/pull/206>`_) to require a context
+  manager style ``async with Portal.open_stream_from(target) as stream:``
+  which explicitly determines when to stop a stream in the calling (aka
+  portal opening) actor much like ``async_generator.aclosing()``
+  enforcement.
+
+- Improved the ``multiprocessing`` backend sub-actor reaping (`#208
+  <https://github.com/goodboy/tractor/pull/208>`_) during actor nursery
+  exit, particularly during cancellation scenarios that previously might
+  result in hard to debug hangs.
+
+- Added initial bi-directional streaming support in `#219
+  <https://github.com/goodboy/tractor/pull/219>`_ with follow up debugger
+  improvements via `#220 <https://github.com/goodboy/tractor/pull/220>`_
+  using the new ``tractor.Context`` cross-actor task syncing system.
+  The debugger upgrades add an edge triggered last-in-tty-lock semaphore
+  which allows the root process for a tree to avoid clobbering children
+  who have queued to acquire the ``pdb`` repl by waiting to cancel
+  sub-actors until the lock is known to be released **and** has no
+  pending waiters.
+
+
+Experiments and WIPs
+--------------------
+- Initial optional ``msgspec`` serialization support in `#214
+  <https://github.com/goodboy/tractor/pull/214>`_ which should hopefully
+  land by next release.
+
+- Improved "infect ``asyncio``" cross-loop task cancellation and error
+  propagation by vastly simplifying the cross-loop-task streaming approach. 
+  We may end up just going with a use of ``anyio`` in the medium term to
+  avoid re-doing work done by their cross-event-loop portals.  See the
+  ``infect_asyncio`` for details.
+
+
+Improved Documentation
+----------------------
+- `Updated our readme <https://github.com/goodboy/tractor/pull/211>`_ to
+  include more (and better) `examples
+  <https://github.com/goodboy/tractor#run-a-func-in-a-process>`_ (with
+  matching multi-terminal process monitoring shell commands) as well as
+  added many more examples to the `repo set
+  <https://github.com/goodboy/tractor/tree/master/examples>`_.
+
+- Added a readme `"actors under the hood" section
+  <https://github.com/goodboy/tractor#under-the-hood>`_ in an effort to
+  guard against suggestions for changing the API away from ``trio``'s
+  *tasks-as-functions* style.
+
+- Moved to using the `sphinx book theme
+  <https://sphinx-book-theme.readthedocs.io/en/latest/index.html>`_
+  though it needs some heavy tweaking and doesn't seem to show our logo
+  on rtd :(
+
+
+Trivial/Internal Changes
+------------------------
+- Added a new ``TransportClosed`` internal exception/signal (`#215
+  <https://github.com/goodboy/tractor/pull/215>`_ for catching TCP
+  channel gentle closes instead of silently falling through the message
+  handler loop via an async generator ``return``.
+
+
+Deprecations and Removals
+-------------------------
+- Dropped support for invoking sync functions (`#205
+  <https://github.com/goodboy/tractor/pull/205>`_) in other
+  actors/processes since you can always wrap a sync function from an
+  async one.  Users can instead consider using ``trio-parallel`` which
+  is a project specifically geared for purely synchronous calls in
+  sub-processes.
+
+- Deprecated our ``tractor.run()`` entrypoint `#197
+  <https://github.com/goodboy/tractor/pull/197>`_; the runtime is now
+  either started implicitly in first actor nursery use or via an
+  explicit call to ``tractor.open_root_actor()``. Full removal of
+  ``tractor.run()`` will come by beta release.
+
+
+tractor 0.1.0a0 (2021-02-28)
+============================
+
+..
+    TODO: fill out more of the details of the initial feature set in some TLDR form
+
+Summary
+-------
+- ``trio`` based process spawner (using ``subprocess``)
+- initial multi-process debugging with ``pdb++``
+- windows support using both ``trio`` and ``multiprocessing`` spawners
+- "portal" api for cross-process, structured concurrent, (streaming) IPC
--- a/docs/README.rst
+++ b/docs/README.rst
@ -3,13 +3,20 @@
 |gh_actions|
 |docs|

-``tractor`` is a `structured concurrent`_, multi-processing_ runtime built on trio_.
+``tractor`` is a `structured concurrent`_, multi-processing_ runtime
+built on trio_.

-Fundamentally ``tractor`` gives you parallelism via ``trio``-"*actors*":
-our nurseries_ let you spawn new Python processes which each run a ``trio``
+Fundamentally, ``tractor`` gives you parallelism via
+``trio``-"*actors*": independent Python processes (aka
+non-shared-memory threads) which maintain structured
+concurrency (SC) *end-to-end* inside a *supervision tree*.
+
+Cross-process (and thus cross-host) SC is accomplished through the
+combined use of our "actor nurseries_" and an "SC-transitive IPC
+protocol" constructed on top of multiple Pythons each running a ``trio``
 scheduled runtime - a call to ``trio.run()``.

-We believe the system adhere's to the `3 axioms`_ of an "`actor model`_"
+We believe the system adheres to the `3 axioms`_ of an "`actor model`_"
 but likely *does not* look like what *you* probably think an "actor
 model" looks like, and that's *intentional*.

@ -21,12 +28,16 @@ Features
 --------
 - **It's just** a ``trio`` API
 - *Infinitely nesteable* process trees
- Built-in inter-process streaming APIs
- A (first ever?) "native" multi-core debugger UX for Python using `pdb++`_
+- Builtin IPC streaming APIs with task fan-out broadcasting
+- A "native" multi-core debugger REPL using `pdbp`_ (a fork & fix of
+  `pdb++`_ thanks to @mdmintz!)
 - Support for a swappable, OS specific, process spawning layer
- A modular transport stack, allowing for custom serialization,
-  communications protocols, and environment specific IPC primitives
- `structured concurrency`_ from the ground up
+- A modular transport stack, allowing for custom serialization (eg. with
+  `msgspec`_), communications protocols, and environment specific IPC
+  primitives
+- Support for spawning process-level-SC, inter-loop one-to-one-task oriented
+  ``asyncio`` actors via "infected ``asyncio``" mode
+- `structured chadcurrency`_ from the ground up


 Run a func in a process
@ -114,7 +125,7 @@ Zombie safe: self-destruct a process tree
            f"running in pid {os.getpid()}"
        )

-       await trio.sleep_forever()
+        await trio.sleep_forever()


    async def main():
@ -145,7 +156,7 @@ it **is a bug**.

 "Native" multi-process debugging
 --------------------------------
-Using the magic of `pdb++`_ and our internal IPC, we've
+Using the magic of `pdbp`_ and our internal IPC, we've
 been able to create a native feeling debugging experience for
 any (sub-)process in your ``tractor`` tree.

@ -312,9 +323,173 @@ real time::
 This uses no extra threads, fancy semaphores or futures; all we need
 is ``tractor``'s IPC!

+"Infected ``asyncio``" mode
+---------------------------
+Have a bunch of ``asyncio`` code you want to force to be SC at the process level?
+
+Check out our experimental system for `guest-mode`_ controlled
+``asyncio`` actors:
+
+.. code:: python
+
+    import asyncio
+    from statistics import mean
+    import time
+
+    import trio
+    import tractor
+
+
+    async def aio_echo_server(
+        to_trio: trio.MemorySendChannel,
+        from_trio: asyncio.Queue,
+    ) -> None:
+
+        # a first message must be sent **from** this ``asyncio``
+        # task or the ``trio`` side will never unblock from
+        # ``tractor.to_asyncio.open_channel_from():``
+        to_trio.send_nowait('start')
+
+        # XXX: this uses an ``from_trio: asyncio.Queue`` currently but we
+        # should probably offer something better.
+        while True:
+            # echo the msg back
+            to_trio.send_nowait(await from_trio.get())
+            await asyncio.sleep(0)
+
+
+    @tractor.context
+    async def trio_to_aio_echo_server(
+        ctx: tractor.Context,
+    ):
+        # this will block until the ``asyncio`` task sends a "first"
+        # message.
+        async with tractor.to_asyncio.open_channel_from(
+            aio_echo_server,
+        ) as (first, chan):
+
+            assert first == 'start'
+            await ctx.started(first)
+
+            async with ctx.open_stream() as stream:
+
+                async for msg in stream:
+                    await chan.send(msg)
+
+                    out = await chan.receive()
+                    # echo back to parent actor-task
+                    await stream.send(out)
+
+
+    async def main():
+
+        async with tractor.open_nursery() as n:
+            p = await n.start_actor(
+                'aio_server',
+                enable_modules=[__name__],
+                infect_asyncio=True,
+            )
+            async with p.open_context(
+                trio_to_aio_echo_server,
+            ) as (ctx, first):
+
+                assert first == 'start'
+
+                count = 0
+                async with ctx.open_stream() as stream:
+
+                    delays = []
+                    send = time.time()
+
+                    await stream.send(count)
+                    async for msg in stream:
+                        recv = time.time()
+                        delays.append(recv - send)
+                        assert msg == count
+                        count += 1
+                        send = time.time()
+                        await stream.send(count)
+
+                        if count >= 1e3:
+                            break
+
+            print(f'mean round trip rate (Hz): {1/mean(delays)}')
+            await p.cancel_actor()
+
+
+    if __name__ == '__main__':
+        trio.run(main)
+
+
+Yes, we spawn a python process, run ``asyncio``, start ``trio`` on the
+``asyncio`` loop, then send commands to the ``trio`` scheduled tasks to
+tell ``asyncio`` tasks what to do XD
+
+We need help refining the `asyncio`-side channel API to be more
+`trio`-like. Feel free to sling your opinion in `#273`_!
+
+
+.. _#273: https://github.com/goodboy/tractor/issues/273
+
+
+Higher level "cluster" APIs
+---------------------------
+To be extra terse the ``tractor`` devs have started hacking some "higher
+level" APIs for managing actor trees/clusters. These interfaces should
+generally be condsidered provisional for now but we encourage you to try
+them and provide feedback. Here's a new API that let's you quickly
+spawn a flat cluster:
+
+.. code:: python
+
+    import trio
+    import tractor
+
+
+    async def sleepy_jane():
+        uid = tractor.current_actor().uid
+        print(f'Yo i am actor {uid}')
+        await trio.sleep_forever()
+
+
+    async def main():
+        '''
+        Spawn a flat actor cluster, with one process per
+        detected core.
+
+        '''
+        portal_map: dict[str, tractor.Portal]
+        results: dict[str, str]
+
+        # look at this hip new syntax!
+        async with (
+
+            tractor.open_actor_cluster(
+                modules=[__name__]
+            ) as portal_map,
+
+            trio.open_nursery() as n,
+        ):
+
+            for (name, portal) in portal_map.items():
+                n.start_soon(portal.run, sleepy_jane)
+
+            await trio.sleep(0.5)
+
+            # kill the cluster with a cancel
+            raise KeyboardInterrupt
+
+
+    if __name__ == '__main__':
+        try:
+            trio.run(main)
+        except KeyboardInterrupt:
+            pass
+

 .. _full worker pool re-implementation: https://github.com/goodboy/tractor/blob/master/examples/parallelism/concurrent_actors_primes.py

+
 Install
 -------
 From PyPi::
@ -390,12 +565,22 @@ properties of the system.

 What's on the TODO:
 -------------------
-Help us push toward the future.
+Help us push toward the future of distributed `Python`.

- (Soon to land) ``asyncio`` support allowing for "infected" actors where
-  `trio` drives the `asyncio` scheduler via the astounding "`guest mode`_"
- Typed messaging protocols (ex. via ``msgspec``)
- Erlang-style supervisors via composed context managers
+- Erlang-style supervisors via composed context managers (see `#22
+  <https://github.com/goodboy/tractor/issues/22>`_)
+- Typed messaging protocols (ex. via ``msgspec.Struct``, see `#36
+  <https://github.com/goodboy/tractor/issues/36>`_)
+- Typed capability-based (dialog) protocols ( see `#196
+  <https://github.com/goodboy/tractor/issues/196>`_ with draft work
+  started in `#311 <https://github.com/goodboy/tractor/pull/311>`_)
+- We **recently disabled CI-testing on windows** and need help getting
+  it running again! (see `#327
+  <https://github.com/goodboy/tractor/pull/327>`_). **We do have windows
+  support** (and have for quite a while) but since no active hacker
+  exists in the user-base to help test on that OS, for now we're not
+  actively maintaining testing due to the added hassle and general
+  latency..


 Feel like saying hi?
@ -407,26 +592,32 @@ say hi, please feel free to reach us in our `matrix channel`_.  If
 matrix seems too hip, we're also mostly all in the the `trio gitter
 channel`_!

+.. _structured concurrent: https://trio.discourse.group/t/concise-definition-of-structured-concurrency/228
+.. _multi-processing: https://en.wikipedia.org/wiki/Multiprocessing
+.. _trio: https://github.com/python-trio/trio
 .. _nurseries: https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/#nurseries-a-structured-replacement-for-go-statements
 .. _actor model: https://en.wikipedia.org/wiki/Actor_model
-.. _trio: https://github.com/python-trio/trio
-.. _multi-processing: https://en.wikipedia.org/wiki/Multiprocessing
 .. _trionic: https://trio.readthedocs.io/en/latest/design.html#high-level-design-principles
 .. _async sandwich: https://trio.readthedocs.io/en/latest/tutorial.html#async-sandwich
-.. _structured concurrent: https://trio.discourse.group/t/concise-definition-of-structured-concurrency/228
 .. _3 axioms: https://www.youtube.com/watch?v=7erJ1DV_Tlo&t=162s
+.. .. _3 axioms: https://en.wikipedia.org/wiki/Actor_model#Fundamental_concepts
 .. _adherance to: https://www.youtube.com/watch?v=7erJ1DV_Tlo&t=1821s
 .. _trio gitter channel: https://gitter.im/python-trio/general
 .. _matrix channel: https://matrix.to/#/!tractor:matrix.org
+.. _pdbp: https://github.com/mdmintz/pdbp
 .. _pdb++: https://github.com/pdbpp/pdbpp
 .. _guest mode: https://trio.readthedocs.io/en/stable/reference-lowlevel.html?highlight=guest%20mode#using-guest-mode-to-run-trio-on-top-of-other-event-loops
 .. _messages: https://en.wikipedia.org/wiki/Message_passing
 .. _trio docs: https://trio.readthedocs.io/en/latest/
 .. _blog post: https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/
-.. _structured concurrency: https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/
+.. _structured concurrency: https://en.wikipedia.org/wiki/Structured_concurrency
+.. _structured chadcurrency: https://en.wikipedia.org/wiki/Structured_concurrency
+.. _structured concurrency: https://en.wikipedia.org/wiki/Structured_concurrency
 .. _unrequirements: https://en.wikipedia.org/wiki/Actor_model#Direct_communication_and_asynchrony
 .. _async generators: https://www.python.org/dev/peps/pep-0525/
 .. _trio-parallel: https://github.com/richardsheridan/trio-parallel
+.. _msgspec: https://jcristharif.com/msgspec/
+.. _guest-mode: https://trio.readthedocs.io/en/stable/reference-lowlevel.html?highlight=guest%20mode#using-guest-mode-to-run-trio-on-top-of-other-event-loops


 .. |gh_actions| image:: https://img.shields.io/endpoint.svg?url=https%3A%2F%2Factions-badge.atrox.dev%2Fgoodboy%2Ftractor%2Fbadge&style=popout-square
--- a/docs/dev_tips.rst
+++ b/docs/dev_tips.rst
@ -0,0 +1,51 @@
+Hot tips for ``tractor`` hackers
+================================
+
+This is a WIP guide for newcomers to the project mostly to do with
+dev, testing, CI and release gotchas, reminders and best practises.
+
+``tractor`` is a fairly novel project compared to most since it is
+effectively a new way of doing distributed computing in Python and is
+much closer to working with an "application level runtime" (like erlang
+OTP or scala's akka project) then it is a traditional Python library.
+As such, having an arsenal of tools and recipes for figuring out the
+right way to debug problems when they do arise is somewhat of
+a necessity.
+
+
+Making a Release
+----------------
+We currently do nothing special here except the traditional
+PyPa release recipe as in `documented by twine`_. I personally
+create sub-dirs within the generated `dist/` with an explicit
+release name such as `alpha3/` when there's been a sequence of
+releases I've made, but it really is up to you how you like to
+organize generated sdists locally.
+
+The resulting build cmds are approximately:
+
+.. code:: bash
+
+    python setup.py sdist -d ./dist/XXX.X/
+
+    twine upload -r testpypi dist/XXX.X/*
+
+    twine upload dist/XXX.X/*
+
+
+
+.. _documented by twine: https://twine.readthedocs.io/en/latest/#using-twine
+
+
+Debugging and monitoring actor trees
+------------------------------------
+TODO: but there are tips in the readme for some terminal commands
+which can be used to see the process trees easily on Linux.
+
+
+Using the log system to trace `trio` task flow
+----------------------------------------------
+TODO: the logging system is meant to be oriented around
+stack "layers" of the runtime such that you can track
+"logical abstraction layers" in the code such as errors, cancellation,
+IPC and streaming, and the low level transport and wire protocols.
--- a/docs/index.rst
+++ b/docs/index.rst
@ -396,7 +396,7 @@ tasks spawned via multiple RPC calls to an actor can modify


        # a per process cache
-        _actor_cache: Dict[str, bool] = {}
+        _actor_cache: dict[str, bool] = {}


        def ping_endpoints(endpoints: List[str]):
--- a/examples/init.py
+++ b/examples/init.py
--- a/examples/advanced_faults/ipc_failure_during_stream.py
+++ b/examples/advanced_faults/ipc_failure_during_stream.py
@ -0,0 +1,151 @@
+'''
+Complex edge case where during real-time streaming the IPC tranport
+channels are wiped out (purposely in this example though it could have
+been an outage) and we want to ensure that despite being in debug mode
+(or not) the user can sent SIGINT once they notice the hang and the
+actor tree will eventually be cancelled without leaving any zombies.
+
+'''
+import trio
+from tractor import (
+    open_nursery,
+    context,
+    Context,
+    MsgStream,
+)
+
+
+async def break_channel_silently_then_error(
+    stream: MsgStream,
+):
+    async for msg in stream:
+        await stream.send(msg)
+
+        # XXX: close the channel right after an error is raised
+        # purposely breaking the IPC transport to make sure the parent
+        # doesn't get stuck in debug or hang on the connection join.
+        # this more or less simulates an infinite msg-receive hang on
+        # the other end.
+        await stream._ctx.chan.send(None)
+        assert 0
+
+
+async def close_stream_and_error(
+    stream: MsgStream,
+):
+    async for msg in stream:
+        await stream.send(msg)
+
+        # wipe out channel right before raising
+        await stream._ctx.chan.send(None)
+        await stream.aclose()
+        assert 0
+
+
+@context
+async def recv_and_spawn_net_killers(
+
+    ctx: Context,
+    break_ipc_after: bool | int = False,
+
+) -> None:
+    '''
+    Receive stream msgs and spawn some IPC killers mid-stream.
+
+    '''
+    await ctx.started()
+    async with (
+        ctx.open_stream() as stream,
+        trio.open_nursery() as n,
+    ):
+        async for i in stream:
+            print(f'child echoing {i}')
+            await stream.send(i)
+            if (
+                break_ipc_after
+                and i > break_ipc_after
+            ):
+                '#################################\n'
+                'Simulating child-side IPC BREAK!\n'
+                '#################################'
+                n.start_soon(break_channel_silently_then_error, stream)
+                n.start_soon(close_stream_and_error, stream)
+
+
+async def main(
+    debug_mode: bool = False,
+    start_method: str = 'trio',
+
+    # by default we break the parent IPC first (if configured to break
+    # at all), but this can be changed so the child does first (even if
+    # both are set to break).
+    break_parent_ipc_after: int | bool = False,
+    break_child_ipc_after: int | bool = False,
+
+) -> None:
+
+    async with (
+        open_nursery(
+            start_method=start_method,
+
+            # NOTE: even debugger is used we shouldn't get
+            # a hang since it never engages due to broken IPC
+            debug_mode=debug_mode,
+            loglevel='warning',
+
+        ) as an,
+    ):
+        portal = await an.start_actor(
+            'chitty_hijo',
+            enable_modules=[__name__],
+        )
+
+        async with portal.open_context(
+            recv_and_spawn_net_killers,
+            break_ipc_after=break_child_ipc_after,
+
+        ) as (ctx, sent):
+            async with ctx.open_stream() as stream:
+                for i in range(1000):
+
+                    if (
+                        break_parent_ipc_after
+                        and i > break_parent_ipc_after
+                    ):
+                        print(
+                            '#################################\n'
+                            'Simulating parent-side IPC BREAK!\n'
+                            '#################################'
+                        )
+                        await stream._ctx.chan.send(None)
+
+                    # it actually breaks right here in the
+                    # mp_spawn/forkserver backends and thus the zombie
+                    # reaper never even kicks in?
+                    print(f'parent sending {i}')
+                    await stream.send(i)
+
+                    with trio.move_on_after(2) as cs:
+
+                        # NOTE: in the parent side IPC failure case this
+                        # will raise an ``EndOfChannel`` after the child
+                        # is killed and sends a stop msg back to it's
+                        # caller/this-parent.
+                        rx = await stream.receive()
+
+                        print(f"I'm a happy user and echoed to me is {rx}")
+
+                    if cs.cancelled_caught:
+                        # pretend to be a user seeing no streaming action
+                        # thinking it's a hang, and then hitting ctl-c..
+                        print("YOO i'm a user anddd thingz hangin..")
+
+                print(
+                    "YOO i'm mad send side dun but thingz hangin..\n"
+                    'MASHING CTlR-C Ctl-c..'
+                )
+                raise KeyboardInterrupt
+
+
+if __name__ == '__main__':
+    trio.run(main)
--- a/examples/asynchronous_generators.py
+++ b/examples/asynchronous_generators.py
@ -1,39 +1,41 @@
+from typing import AsyncIterator
 from itertools import repeat
+
 import trio
 import tractor

-tractor.log.get_console_log("INFO")

+async def stream_forever() -> AsyncIterator[int]:

-async def stream_forever():
    for i in repeat("I can see these little future bubble things"):
-        # each yielded value is sent over the ``Channel`` to the
-        # parent actor
+        # each yielded value is sent over the ``Channel`` to the parent actor
        yield i
        await trio.sleep(0.01)


 async def main():

-    # stream for at most 1 seconds
-    with trio.move_on_after(1) as cancel_scope:
+    async with tractor.open_nursery() as n:

-        async with tractor.open_nursery() as n:
+        portal = await n.start_actor(
+            'donny',
+            enable_modules=[__name__],
+        )

-            portal = await n.start_actor(
-                'donny',
-                enable_modules=[__name__],
-            )
+        # this async for loop streams values from the above
+        # async generator running in a separate process
+        async with portal.open_stream_from(stream_forever) as stream:
+            count = 0
+            async for letter in stream:
+                print(letter)
+                count += 1

-            # this async for loop streams values from the above
-            # async generator running in a separate process
-            async with portal.open_stream_from(stream_forever) as stream:
-                async for letter in stream:
-                    print(letter)
+                if count > 50:
+                    break

-    # we support trio's cancellation system
-    assert cancel_scope.cancelled_caught
-    assert n.cancelled
+        print('stream terminated')
+
+        await portal.cancel_actor()


 if __name__ == '__main__':
--- a/examples/debugging/fast_error_in_root_after_spawn.py
+++ b/examples/debugging/fast_error_in_root_after_spawn.py
@ -20,7 +20,7 @@ async def sleep(


 async def open_ctx(
-    n: tractor._trionics.ActorNursery
+    n: tractor._supervise.ActorNursery
 ):

    # spawn both actors
--- a/examples/debugging/multi_daemon_subactors.py
+++ b/examples/debugging/multi_daemon_subactors.py
@ -27,7 +27,18 @@ async def main():

        # retreive results
        async with p0.open_stream_from(breakpoint_forever) as stream:
-            await p1.run(name_error)
+
+            # triggers the first name error
+            try:
+                await p1.run(name_error)
+            except tractor.RemoteActorError as rae:
+                assert rae.type is NameError
+
+            async for i in stream:
+
+                # a second time try the failing subactor and this tie
+                # let error propagate up to the parent/nursery.
+                await p1.run(name_error)


 if __name__ == '__main__':
--- a/examples/debugging/multi_nested_subactors_error_up_through_nurseries.py
+++ b/examples/debugging/multi_nested_subactors_error_up_through_nurseries.py
@ -12,18 +12,31 @@ async def breakpoint_forever():
    while True:
        await tractor.breakpoint()

+        # NOTE: if the test never sent 'q'/'quit' commands
+        # on the pdb repl, without this checkpoint line the
+        # repl would spin in this actor forever.
+        # await trio.sleep(0)
+

 async def spawn_until(depth=0):
    """"A nested nursery that triggers another ``NameError``.
    """
    async with tractor.open_nursery() as n:
        if depth < 1:
-            # await n.run_in_actor('breakpoint_forever', breakpoint_forever)
-            await n.run_in_actor(
+
+            await n.run_in_actor(breakpoint_forever)
+
+            p = await n.run_in_actor(
                name_error,
                name='name_error'
            )
+            await trio.sleep(0.5)
+            # rx and propagate error from child
+            await p.result()
+
        else:
+            # recusrive call to spawn another process branching layer of
+            # the tree
            depth -= 1
            await n.run_in_actor(
                spawn_until,
@ -53,6 +66,7 @@ async def main():
    """
    async with tractor.open_nursery(
        debug_mode=True,
+        # loglevel='cancel',
    ) as n:

        # spawn both actors
@ -67,8 +81,16 @@ async def main():
            name='spawner1',
        )

+        # TODO: test this case as well where the parent don't see
+        # the sub-actor errors by default and instead expect a user
+        # ctrl-c to kill the root.
+        with trio.move_on_after(3):
+            await trio.sleep_forever()
+
        # gah still an issue here.
        await portal.result()
+
+        # should never get here
        await portal1.result()


--- a/examples/debugging/multi_subactor_root_errors.py
+++ b/examples/debugging/multi_subactor_root_errors.py
@ -1,3 +1,8 @@
+'''
+Test that a nested nursery will avoid clobbering
+the debugger latched by a broken child.
+
+'''
 import trio
 import tractor

@ -35,6 +40,7 @@ async def main():
    """
    async with tractor.open_nursery(
        debug_mode=True,
+        # loglevel='cancel',
    ) as n:

        # spawn both actors
--- a/examples/debugging/open_ctx_modnofound.py
+++ b/examples/debugging/open_ctx_modnofound.py
@ -0,0 +1,40 @@
+import trio
+import tractor
+
+
+@tractor.context
+async def just_sleep(
+
+    ctx: tractor.Context,
+    **kwargs,
+
+) -> None:
+    '''
+    Start and sleep.
+
+    '''
+    await ctx.started()
+    await trio.sleep_forever()
+
+
+async def main() -> None:
+
+    async with tractor.open_nursery(
+        debug_mode=True,
+    ) as n:
+        portal = await n.start_actor(
+            'ctx_child',
+
+            # XXX: we don't enable the current module in order
+            # to trigger `ModuleNotFound`.
+            enable_modules=[],
+        )
+
+        async with portal.open_context(
+            just_sleep,  # taken from pytest parameterization
+        ) as (ctx, sent):
+            raise KeyboardInterrupt
+
+
+if __name__ == '__main__':
+    trio.run(main)
--- a/examples/debugging/per_actor_debug.py
+++ b/examples/debugging/per_actor_debug.py
@ -0,0 +1,27 @@
+import trio
+import tractor
+
+async def die():
+    raise RuntimeError
+
+
+async def main():
+    async with tractor.open_nursery() as tn:
+
+        debug_actor = await tn.start_actor(
+            'debugged_boi',
+            enable_modules=[__name__],
+            debug_mode=True,
+        )
+        crash_boi = await tn.start_actor(
+            'crash_boi',
+            enable_modules=[__name__],
+            # debug_mode=True,
+        )
+
+        async with trio.open_nursery() as n:
+            n.start_soon(debug_actor.run, die)
+            n.start_soon(crash_boi.run, die)
+
+if __name__ == '__main__':
+    trio.run(main)
--- a/examples/debugging/restore_builtin_breakpoint.py
+++ b/examples/debugging/restore_builtin_breakpoint.py
@ -0,0 +1,24 @@
+import os
+import sys
+
+import trio
+import tractor
+
+
+async def main() -> None:
+    async with tractor.open_nursery(debug_mode=True) as an:
+
+        assert os.environ['PYTHONBREAKPOINT'] == 'tractor._debug._set_trace'
+
+        # TODO: an assert that verifies the hook has indeed been, hooked
+        # XD
+        assert sys.breakpointhook is not tractor._debug._set_trace
+
+        breakpoint()
+
+    # TODO: an assert that verifies the hook is unhooked..
+    assert sys.breakpointhook
+    breakpoint()
+
+if __name__ == '__main__':
+    trio.run(main)
--- a/examples/debugging/subactor_bp_in_ctx.py
+++ b/examples/debugging/subactor_bp_in_ctx.py
@ -0,0 +1,50 @@
+import tractor
+import trio
+
+
+async def gen():
+    yield 'yo'
+    await tractor.breakpoint()
+    yield 'yo'
+    await tractor.breakpoint()
+
+
+@tractor.context
+async def just_bp(
+    ctx: tractor.Context,
+) -> None:
+
+    await ctx.started()
+    await tractor.breakpoint()
+
+    # TODO: bps and errors in this call..
+    async for val in gen():
+        print(val)
+
+    # await trio.sleep(0.5)
+
+    # prematurely destroy the connection
+    await ctx.chan.aclose()
+
+    # THIS CAUSES AN UNRECOVERABLE HANG
+    # without latest ``pdbpp``:
+    assert 0
+
+
+
+async def main():
+    async with tractor.open_nursery(
+        debug_mode=True,
+    ) as n:
+        p = await n.start_actor(
+            'bp_boi',
+            enable_modules=[__name__],
+        )
+        async with p.open_context(
+            just_bp,
+        ) as (ctx, first):
+            await trio.sleep_forever()
+
+
+if __name__ == '__main__':
+    trio.run(main)
--- a/examples/full_fledged_streaming_service.py
+++ b/examples/full_fledged_streaming_service.py
@ -7,7 +7,7 @@ import tractor
 async def stream_data(seed):
    for i in range(seed):
        yield i
-        await trio.sleep(0)  # trigger scheduler
+        await trio.sleep(0.0001)  # trigger scheduler


 # this is the third actor; the aggregator
--- a/examples/infected_asyncio_echo_server.py
+++ b/examples/infected_asyncio_echo_server.py
@ -0,0 +1,92 @@
+'''
+An SC compliant infected ``asyncio`` echo server.
+
+'''
+import asyncio
+from statistics import mean
+import time
+
+import trio
+import tractor
+
+
+async def aio_echo_server(
+    to_trio: trio.MemorySendChannel,
+    from_trio: asyncio.Queue,
+
+) -> None:
+
+    # a first message must be sent **from** this ``asyncio``
+    # task or the ``trio`` side will never unblock from
+    # ``tractor.to_asyncio.open_channel_from():``
+    to_trio.send_nowait('start')
+
+    # XXX: this uses an ``from_trio: asyncio.Queue`` currently but we
+    # should probably offer something better.
+    while True:
+        # echo the msg back
+        to_trio.send_nowait(await from_trio.get())
+        await asyncio.sleep(0)
+
+
+@tractor.context
+async def trio_to_aio_echo_server(
+    ctx: tractor.Context,
+):
+    # this will block until the ``asyncio`` task sends a "first"
+    # message.
+    async with tractor.to_asyncio.open_channel_from(
+        aio_echo_server,
+    ) as (first, chan):
+
+        assert first == 'start'
+        await ctx.started(first)
+
+        async with ctx.open_stream() as stream:
+
+            async for msg in stream:
+                await chan.send(msg)
+
+                out = await chan.receive()
+                # echo back to parent actor-task
+                await stream.send(out)
+
+
+async def main():
+
+    async with tractor.open_nursery() as n:
+        p = await n.start_actor(
+            'aio_server',
+            enable_modules=[__name__],
+            infect_asyncio=True,
+        )
+        async with p.open_context(
+            trio_to_aio_echo_server,
+        ) as (ctx, first):
+
+            assert first == 'start'
+
+            count = 0
+            async with ctx.open_stream() as stream:
+
+                delays = []
+                send = time.time()
+
+                await stream.send(count)
+                async for msg in stream:
+                    recv = time.time()
+                    delays.append(recv - send)
+                    assert msg == count
+                    count += 1
+                    send = time.time()
+                    await stream.send(count)
+
+                    if count >= 1e3:
+                        break
+
+        print(f'mean round trip rate (Hz): {1/mean(delays)}')
+        await p.cancel_actor()
+
+
+if __name__ == '__main__':
+    trio.run(main)
--- a/examples/integration/open_context_and_sleep.py
+++ b/examples/integration/open_context_and_sleep.py
@ -0,0 +1,49 @@
+import trio
+import click
+import tractor
+import pydantic
+# from multiprocessing import shared_memory
+
+
+@tractor.context
+async def just_sleep(
+
+    ctx: tractor.Context,
+    **kwargs,
+
+) -> None:
+    '''
+    Test a small ping-pong 2-way streaming server.
+
+    '''
+    await ctx.started()
+    await trio.sleep_forever()
+
+
+async def main() -> None:
+
+    proc = await trio.open_process( (
+        'python',
+        '-c',
+        'import trio; trio.run(trio.sleep_forever)',
+    ))
+    await proc.wait()
+    # await trio.sleep_forever()
+    # async with tractor.open_nursery() as n:
+
+    #     portal = await n.start_actor(
+    #         'rpc_server',
+    #         enable_modules=[__name__],
+    #     )
+
+    #     async with portal.open_context(
+    #         just_sleep,  # taken from pytest parameterization
+    #     ) as (ctx, sent):
+    #         await trio.sleep_forever()
+
+
+
+if __name__ == '__main__':
+    import time
+    # time.sleep(999)
+    trio.run(main)
--- a/examples/parallelism/concurrent_actors_primes.py
+++ b/examples/parallelism/concurrent_actors_primes.py
@ -9,7 +9,7 @@ is ``tractor``'s channels.

 """
 from contextlib import asynccontextmanager
-from typing import List, Callable
+from typing import Callable
 import itertools
 import math
 import time
@ -71,8 +71,8 @@ async def worker_pool(workers=4):

        async def _map(
            worker_func: Callable[[int], bool],
-            sequence: List[int]
-        ) -> List[bool]:
+            sequence: list[int]
+        ) -> list[bool]:

            # define an async (local) task to collect results from workers
            async def send_result(func, value, portal):
--- a/examples/quick_cluster.py
+++ b/examples/quick_cluster.py
@ -0,0 +1,44 @@
+
+import trio
+import tractor
+
+
+async def sleepy_jane():
+    uid = tractor.current_actor().uid
+    print(f'Yo i am actor {uid}')
+    await trio.sleep_forever()
+
+
+async def main():
+    '''
+    Spawn a flat actor cluster, with one process per
+    detected core.
+
+    '''
+    portal_map: dict[str, tractor.Portal]
+    results: dict[str, str]
+
+    # look at this hip new syntax!
+    async with (
+
+        tractor.open_actor_cluster(
+            modules=[__name__]
+        ) as portal_map,
+
+        trio.open_nursery() as n,
+    ):
+
+        for (name, portal) in portal_map.items():
+            n.start_soon(portal.run, sleepy_jane)
+
+        await trio.sleep(0.5)
+
+        # kill the cluster with a cancel
+        raise KeyboardInterrupt
+
+
+if __name__ == '__main__':
+    try:
+        trio.run(main)
+    except KeyboardInterrupt:
+        pass
--- a/nooz/.gitignore
+++ b/nooz/.gitignore
@ -0,0 +1 @@
+!.gitignore
--- a/nooz/322.trivial.rst
+++ b/nooz/322.trivial.rst
@ -0,0 +1,16 @@
+Strictly support Python 3.10+, start runtime machinery reorg
+
+Since we want to push forward using the new `match:` syntax for our
+internal RPC-msg loops, we officially drop 3.9 support for the next
+release which should coincide well with the first release of 3.11.
+
+This patch set also officially removes the ``tractor.run()`` API (which
+has been deprecated for some time) as well as starts an initial re-org
+of the internal runtime core by:
+- renaming ``tractor._actor`` -> ``._runtime``
+- moving the ``._runtime.ActorActor._process_messages()`` and
+  ``._async_main()`` to be module level singleton-task-functions since
+  they are only started once for each connection and actor spawn
+  respectively; this internal API thus looks more similar to (at the
+  time of writing) the ``trio``-internals in ``trio._core._run``.
+- officially remove ``tractor.run()``, now deprecated for some time.
--- a/nooz/324.bugfix.rst
+++ b/nooz/324.bugfix.rst
@ -0,0 +1,4 @@
+Only set `._debug.Lock.local_pdb_complete` if has been created.
+
+This can be triggered by a very rare race condition (and thus we have no
+working test yet) but it is known to exist in (a) consumer project(s).
--- a/nooz/333.feature.rst
+++ b/nooz/333.feature.rst
@ -0,0 +1,25 @@
+Add support for ``trio >= 0.22`` and support for the new Python 3.11
+``[Base]ExceptionGroup`` from `pep 654`_ via the backported
+`exceptiongroup`_ package and some final fixes to the debug mode
+subsystem.
+
+This port ended up driving some (hopefully) final fixes to our debugger
+subsystem including the solution to all lingering stdstreams locking
+race-conditions and deadlock scenarios. This includes extending the
+debugger tests suite as well as cancellation and ``asyncio`` mode cases.
+Some of the notable details:
+
+- always reverting to the ``trio`` SIGINT handler when leaving debug
+  mode.
+- bypassing child attempts to acquire the debug lock when detected
+  to be amdist actor-runtime-cancellation.
+- allowing the root actor to cancel local but IPC-stale subactor
+  requests-tasks for the debug lock when in a "no IPC peers" state.
+
+Further we refined our ``ActorNursery`` semantics to be more similar to
+``trio`` in the sense that parent task errors are always packed into the
+actor-nursery emitted exception group and adjusted all tests and
+examples accordingly.
+
+.. _pep 654: https://peps.python.org/pep-0654/#handling-exception-groups
+.. _exceptiongroup: https://github.com/python-trio/exceptiongroup
--- a/nooz/335.trivial.rst
+++ b/nooz/335.trivial.rst
@ -0,0 +1,5 @@
+Establish an explicit "backend spawning" method table; use it from CI
+
+More clearly lays out the current set of (3) backends: ``['trio',
+'mp_spawn', 'mp_forkserver']`` and adjusts the ``._spawn.py`` internals
+as well as the test suite to accommodate.
--- a/nooz/336.trivial.rst
+++ b/nooz/336.trivial.rst
@ -0,0 +1,4 @@
+Add ``key: Callable[..., Hashable]`` support to ``.trionics.maybe_open_context()``
+
+Gives users finer grained control over cache hit behaviour using
+a callable which receives the input ``kwargs: dict``.
--- a/nooz/337.feature.rst
+++ b/nooz/337.feature.rst
@ -0,0 +1,41 @@
+Add support for debug-lock blocking using a ``._debug.Lock._blocked:
+set[tuple]`` and add ids when no-more IPC connections with the
+root actor are detected.
+
+This is an enhancement which (mostly) solves a lingering debugger
+locking race case we needed to handle:
+
+- child crashes acquires TTY lock in root and attaches to ``pdb``
+- child IPC goes down such that all channels to the root are broken
+  / non-functional.
+- root is stuck thinking the child is still in debug even though it
+  can't be contacted and the child actor machinery hasn't been
+  cancelled by its parent.
+- root get's stuck in deadlock with child since it won't send a cancel
+  request until the child is finished debugging (to avoid clobbering
+  a child that is actually using the debugger), but the child can't
+  unlock the debugger bc IPC is down and it can't contact the root.
+
+To avoid this scenario add debug lock blocking list via
+`._debug.Lock._blocked: set[tuple]` which holds actor uids for any actor
+that is detected by the root as having no transport channel connections
+(of which at least one should exist if this sub-actor at some point
+acquired the debug lock). The root consequently checks this list for any
+actor that tries to (re)acquire the lock and blocks with
+a ``ContextCancelled``. Further, when a debug condition is tested in
+``._runtime._invoke``, the context's ``._enter_debugger_on_cancel`` is
+set to `False` if the actor was put on the block list then all
+post-mortem / crash handling will be bypassed for that task.
+
+In theory this approach to block list management may cause problems
+where some nested child actor acquires and releases the lock multiple
+times and it gets stuck on the block list after the first use? If this
+turns out to be an issue we can try changing the strat so blocks are
+only added when the root has zero IPC peers left?
+
+Further, this adds a root-locking-task side cancel scope,
+``Lock._root_local_task_cs_in_debug``, which can be ``.cancel()``-ed by the root
+runtime when a stale lock is detected during the IPC channel testing.
+However, right now we're NOT using this since it seems to cause test
+failures likely due to causing pre-mature cancellation and maybe needs
+a bit more experimenting?
--- a/nooz/343.trivial.rst
+++ b/nooz/343.trivial.rst
@ -0,0 +1,19 @@
+Rework our ``.trionics.BroadcastReceiver`` internals to avoid method
+recursion and approach a design and interface closer to ``trio``'s
+``MemoryReceiveChannel``.
+
+The details of the internal changes include:
+
+- implementing a ``BroadcastReceiver.receive_nowait()`` and using it
+  within the async ``.receive()`` thus avoiding recursion from
+  ``.receive()``.
+- failing over to an internal ``._receive_from_underlying()`` when the
+  ``_nowait()`` call raises ``trio.WouldBlock``
+- adding ``BroadcastState.statistics()`` for debugging and testing both
+  internals and by users.
+- add an internal ``BroadcastReceiver._raise_on_lag: bool`` which can be
+  set to avoid ``Lagged`` raising for possible use cases where a user
+  wants to choose between a [cheap or nasty
+  pattern](https://zguide.zeromq.org/docs/chapter7/#The-Cheap-or-Nasty-Pattern)
+  the the particular stream (we use this in ``piker``'s dark clearing
+  engine to avoid fast feeds breaking during HFT periods).
--- a/nooz/344.bugfix.rst
+++ b/nooz/344.bugfix.rst
@ -0,0 +1,11 @@
+Always ``list``-cast the ``mngrs`` input to
+``.trionics.gather_contexts()`` and ensure its size otherwise raise
+a ``ValueError``.
+
+Turns out that trying to pass an inline-style generator comprehension
+doesn't seem to work inside the ``async with`` expression? Further, in
+such a case we can get a hang waiting on the all-entered event
+completion when the internal mngrs iteration is a noop. Instead we
+always greedily check a size and error on empty input; the lazy
+iteration of a generator input is not beneficial anyway since we're
+entering all manager instances in concurrent tasks.
--- a/nooz/346.bugfix.rst
+++ b/nooz/346.bugfix.rst
@ -0,0 +1,15 @@
+Fixes to ensure IPC (channel) breakage doesn't result in hung actor
+trees; the zombie reaping and general supervision machinery will always
+clean up and terminate.
+
+This includes not only the (mostly minor) fixes to solve these cases but
+also a new extensive test suite in `test_advanced_faults.py` with an
+accompanying highly configurable example module-script in
+`examples/advanced_faults/ipc_failure_during_stream.py`. Tests ensure we
+never get hang or zombies despite operating in debug mode and attempt to
+simulate all possible IPC transport failure cases for a local-host actor
+tree.
+
+Further we simplify `Context.open_stream.__aexit__()` to just call
+`MsgStream.aclose()` directly more or less avoiding a pure duplicate
+code path.
--- a/nooz/349.trivial.rst
+++ b/nooz/349.trivial.rst
@ -0,0 +1,10 @@
+Always redraw the `pdbpp` prompt on `SIGINT` during REPL use.
+
+There was recent changes todo with Python 3.10 that required us to pin
+to a specific commit in `pdbpp` which have recently been fixed minus
+this last issue with `SIGINT` shielding: not clobbering or not
+showing the `(Pdb++)` prompt on ctlr-c by the user. This repairs all
+that by firstly removing the standard KBI intercepting of the std lib's
+`pdb.Pdb._cmdloop()` as well as ensuring that only the actor with REPL
+control ever reports `SIGINT` handler log msgs and prompt redraws. With
+this we move back to using pypi `pdbpp` release.
--- a/nooz/356.trivial.rst
+++ b/nooz/356.trivial.rst
@ -0,0 +1,7 @@
+Drop `trio.Process.aclose()` usage, copy into our spawning code.
+
+The details are laid out in https://github.com/goodboy/tractor/issues/330.
+`trio` changed is process running quite some time ago, this just copies
+out the small bit we needed (from the old `.aclose()`) for hard kills
+where a soft runtime cancel request fails and our "zombie killer"
+implementation kicks in.
--- a/nooz/358.feature.rst
+++ b/nooz/358.feature.rst
@ -0,0 +1,15 @@
+Switch to using the fork & fix of `pdb++`, `pdbp`:
+https://github.com/mdmintz/pdbp
+
+Allows us to sidestep a variety of issues that aren't being maintained
+in the upstream project thanks to the hard work of @mdmintz!
+
+We also include some default settings adjustments as per recent
+development on the fork:
+
+- sticky mode is still turned on by default but now activates when
+  a using the `ll` repl command.
+- turn off line truncation by default to avoid inter-line gaps when
+  resizing the terimnal during use.
+- when using the backtrace cmd either by `w` or `bt`, the config
+  automatically switches to non-sticky mode.
--- a/nooz/HOWTO.rst
+++ b/nooz/HOWTO.rst
@ -0,0 +1,8 @@
+See both the `towncrier docs`_ and the `pluggy release readme`_ for hot
+tips. We basically have the most minimal setup and release process right
+now and use the default `fragment set`_.
+
+
+.. _towncrier docs: https://github.com/twisted/towncrier#quick-start
+.. _pluggy release readme: https://github.com/pytest-dev/pluggy/blob/main/changelog/README.rst
+.. _fragment set: https://github.com/twisted/towncrier#news-fragments
--- a/nooz/_template.rst
+++ b/nooz/_template.rst
@ -0,0 +1,37 @@
+{% for section in sections %}
+{% set underline = "-" %}
+{% if section %}
+{{section}}
+{{ underline * section|length }}{% set underline = "~" %}
+
+{% endif %}
+{% if sections[section] %}
+{% for category, val in definitions.items() if category in sections[section] %}
+
+{{ definitions[category]['name'] }}
+{{ underline * definitions[category]['name']|length }}
+
+{% if definitions[category]['showcontent'] %}
+{% for text, values in sections[section][category]|dictsort(by='value') %}
+{% set issue_joiner = joiner(', ') %}
+- {% for value in values|sort %}{{ issue_joiner() }}`{{ value }} <https://github.com/goodboy/tractor/issues/{{ value[1:] }}>`_{% endfor %}: {{ text }}
+
+{% endfor %}
+{% else %}
+- {{ sections[section][category]['']|sort|join(', ') }}
+
+
+{% endif %}
+{% if sections[section][category]|length == 0 %}
+
+No significant changes.
+
+{% else %}
+{% endif %}
+{% endfor %}
+{% else %}
+
+No significant changes.
+
+{% endif %}
+{% endfor %}
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,28 @@
+[tool.towncrier]
+package = "tractor"
+filename = "NEWS.rst"
+directory = "nooz/"
+version = "0.1.0a6"
+title_format = "tractor {version} ({project_date})"
+template = "nooz/_template.rst"
+all_bullets = true
+
+  [[tool.towncrier.type]]
+  directory = "feature"
+  name = "Features"
+  showcontent = true
+
+  [[tool.towncrier.type]]
+  directory = "bugfix"
+  name = "Bug Fixes"
+  showcontent = true
+
+  [[tool.towncrier.type]]
+  directory = "doc"
+  name = "Improved Documentation"
+  showcontent = true
+
+  [[tool.towncrier.type]]
+  directory = "trivial"
+  name = "Trivial/Internal Changes"
+  showcontent = true
--- a/requirements-test.txt
+++ b/requirements-test.txt
@ -1,6 +1,8 @@
 pytest
 pytest-trio
-pdbpp
+pytest-timeout
+pdbp
 mypy
 trio_typing
 pexpect
+towncrier
--- a/setup.py
+++ b/setup.py
@ -1,21 +1,22 @@
 #!/usr/bin/env python
 #
-# tractor: a trionic actor model built on `multiprocessing` and `trio`
+# tractor: structured concurrent "actors".
 #
-# Copyright (C) 2018-2020  Tyler Goodlet
+# Copyright 2018-eternity Tyler Goodlet.

 # This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
+# it under the terms of the GNU Affero General Public License as published by
 # the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.

 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.

-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 from setuptools import setup

 with open('docs/README.rst', encoding='utf-8') as f:
@ -24,43 +25,62 @@ with open('docs/README.rst', encoding='utf-8') as f:

 setup(
    name="tractor",
-    version='0.1.0a1',  # first ever alpha
-    description='structured concurrrent "actors"',
+    version='0.1.0a6dev0',  # alpha zone
+    description='structured concurrrent `trio`-"actors"',
    long_description=readme,
-    license='GPLv3',
+    license='AGPLv3',
    author='Tyler Goodlet',
    maintainer='Tyler Goodlet',
-    maintainer_email='jgbt@protonmail.com',
+    maintainer_email='goodboy_foss@protonmail.com',
    url='https://github.com/goodboy/tractor',
    platforms=['linux', 'windows'],
    packages=[
        'tractor',
-        'tractor.testing',
+        'tractor.experimental',
+        'tractor.trionics',
    ],
    install_requires=[

        # trio related
-        'trio>0.8',
+        # proper range spec:
+        # https://packaging.python.org/en/latest/discussions/install-requires-vs-requirements/#id5
+        'trio >= 0.22',
        'async_generator',
        'trio_typing',
+        'exceptiongroup',

        # tooling
+        'tricycle',
+        'trio_typing',
        'colorlog',
        'wrapt',
-        'pdbpp',

-        # serialization
-        'msgpack',
+        # IPC serialization
+        'msgspec',
+
+        # debug mode REPL
+        'pdbp',
+
+        # pip ref docs on these specs:
+        # https://pip.pypa.io/en/stable/reference/requirement-specifiers/#examples
+        # and pep:
+        # https://peps.python.org/pep-0440/#version-specifiers
+
+        # windows deps workaround for ``pdbpp``
+        # https://github.com/pdbpp/pdbpp/issues/498
+        # https://github.com/pdbpp/fancycompleter/issues/37
+        'pyreadline3 ; platform_system == "Windows"',

    ],
    tests_require=['pytest'],
-    python_requires=">=3.7",
+    python_requires=">=3.10",
    keywords=[
        'trio',
-        "async",
-        "concurrency",
-        "actor model",
-        "distributed",
+        'async',
+        'concurrency',
+        'structured concurrency',
+        'actor model',
+        'distributed',
        'multiprocessing'
    ],
    classifiers=[
@ -68,12 +88,10 @@ setup(
        "Operating System :: POSIX :: Linux",
        "Operating System :: Microsoft :: Windows",
        "Framework :: Trio",
-        "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
+        "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
        "Programming Language :: Python :: Implementation :: CPython",
        "Programming Language :: Python :: 3 :: Only",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
        "Intended Audience :: Science/Research",
        "Intended Audience :: Developers",
        "Topic :: System :: Distributed Computing",
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -7,16 +7,91 @@ import os
 import random
 import signal
 import platform
+import pathlib
 import time
+import inspect
+from functools import partial, wraps

 import pytest
+import trio
 import tractor

-# export for tests
-from tractor.testing import tractor_test  # noqa
-
-
 pytest_plugins = ['pytester']
+
+
+def tractor_test(fn):
+    """
+    Use:
+
+    @tractor_test
+    async def test_whatever():
+        await ...
+
+    If fixtures:
+
+        - ``arb_addr`` (a socket addr tuple where arbiter is listening)
+        - ``loglevel`` (logging level passed to tractor internals)
+        - ``start_method`` (subprocess spawning backend)
+
+    are defined in the `pytest` fixture space they will be automatically
+    injected to tests declaring these funcargs.
+    """
+    @wraps(fn)
+    def wrapper(
+        *args,
+        loglevel=None,
+        arb_addr=None,
+        start_method=None,
+        **kwargs
+    ):
+        # __tracebackhide__ = True
+
+        if 'arb_addr' in inspect.signature(fn).parameters:
+            # injects test suite fixture value to test as well
+            # as `run()`
+            kwargs['arb_addr'] = arb_addr
+
+        if 'loglevel' in inspect.signature(fn).parameters:
+            # allows test suites to define a 'loglevel' fixture
+            # that activates the internal logging
+            kwargs['loglevel'] = loglevel
+
+        if start_method is None:
+            if platform.system() == "Windows":
+                start_method = 'trio'
+
+        if 'start_method' in inspect.signature(fn).parameters:
+            # set of subprocess spawning backends
+            kwargs['start_method'] = start_method
+
+        if kwargs:
+
+            # use explicit root actor start
+
+            async def _main():
+                async with tractor.open_root_actor(
+                    # **kwargs,
+                    arbiter_addr=arb_addr,
+                    loglevel=loglevel,
+                    start_method=start_method,
+
+                    # TODO: only enable when pytest is passed --pdb
+                    # debug_mode=True,
+
+                ):
+                    await fn(*args, **kwargs)
+
+            main = _main
+
+        else:
+            # use implicit root actor start
+            main = partial(fn, *args, **kwargs)
+
+        return trio.run(main)
+
+    return wrapper
+
+
 _arb_addr = '127.0.0.1', random.randint(1000, 9999)


@ -39,20 +114,27 @@ no_windows = pytest.mark.skipif(
 )


-def repodir():
-    """Return the abspath to the repo directory.
-    """
-    dirname = os.path.dirname
-    dirpath = os.path.abspath(
-        dirname(dirname(os.path.realpath(__file__)))
-        )
-    return dirpath
+def repodir() -> pathlib.Path:
+    '''
+    Return the abspath to the repo directory.
+
+    '''
+    # 2 parents up to step up through tests/<repo_dir>
+    return pathlib.Path(__file__).parent.parent.absolute()
+
+
+def examples_dir() -> pathlib.Path:
+    '''
+    Return the abspath to the examples directory as `pathlib.Path`.
+
+    '''
+    return repodir() / 'examples'


 def pytest_addoption(parser):
    parser.addoption(
        "--ll", action="store", dest='loglevel',
-        default=None, help="logging level to set when testing"
+        default='ERROR', help="logging level to set when testing"
    )

    parser.addoption(
@ -64,31 +146,31 @@ def pytest_addoption(parser):

 def pytest_configure(config):
    backend = config.option.spawn_backend
-
-    if backend == 'mp':
-        tractor._spawn.try_set_start_method('spawn')
-    elif backend == 'trio':
-        tractor._spawn.try_set_start_method(backend)
+    tractor._spawn.try_set_start_method(backend)


@pytest.fixture(scope='session', autouse=True)
 def loglevel(request):
    orig = tractor.log._default_loglevel
    level = tractor.log._default_loglevel = request.config.option.loglevel
+    tractor.log.get_console_log(level)
    yield level
    tractor.log._default_loglevel = orig


@pytest.fixture(scope='session')
-def spawn_backend(request):
+def spawn_backend(request) -> str:
    return request.config.option.spawn_backend


+_ci_env: bool = os.environ.get('CI', False)
+
+
@pytest.fixture(scope='session')
 def ci_env() -> bool:
    """Detect CI envoirment.
    """
-    return os.environ.get('TRAVIS', False) or os.environ.get('CI', False)
+    return _ci_env


@pytest.fixture(scope='session')
@ -98,24 +180,24 @@ def arb_addr():

 def pytest_generate_tests(metafunc):
    spawn_backend = metafunc.config.option.spawn_backend
+
    if not spawn_backend:
        # XXX some weird windows bug with `pytest`?
-        spawn_backend = 'mp'
-    assert spawn_backend in ('mp', 'trio')
+        spawn_backend = 'trio'

+    # TODO: maybe just use the literal `._spawn.SpawnMethodKey`?
+    assert spawn_backend in (
+        'mp_spawn',
+        'mp_forkserver',
+        'trio',
+    )
+
+    # NOTE: used to be used to dyanmically parametrize tests for when
+    # you just passed --spawn-backend=`mp` on the cli, but now we expect
+    # that cli input to be manually specified, BUT, maybe we'll do
+    # something like this again in the future?
    if 'start_method' in metafunc.fixturenames:
-        if spawn_backend == 'mp':
-            from multiprocessing import get_all_start_methods
-            methods = get_all_start_methods()
-            if 'fork' in methods:
-                # fork not available on windows, so check before
-                # removing XXX: the fork method is in general
-                # incompatible with trio's global scheduler state
-                methods.remove('fork')
-        elif spawn_backend == 'trio':
-            methods = ['trio']
-
-        metafunc.parametrize("start_method", methods, scope='module')
+        metafunc.parametrize("start_method", [spawn_backend], scope='module')


 def sig_prog(proc, sig):
@ -131,16 +213,22 @@ def sig_prog(proc, sig):


@pytest.fixture
-def daemon(loglevel, testdir, arb_addr):
-    """Run a daemon actor as a "remote arbiter".
-    """
+def daemon(
+    loglevel: str,
+    testdir,
+    arb_addr: tuple[str, int],
+):
+    '''
+    Run a daemon actor as a "remote arbiter".
+
+    '''
    if loglevel in ('trace', 'debug'):
        # too much logging will lock up the subproc (smh)
        loglevel = 'info'

    cmdargs = [
        sys.executable, '-c',
-        "import tractor; tractor.run_daemon([], arbiter_addr={}, loglevel={})"
+        "import tractor; tractor.run_daemon([], registry_addr={}, loglevel={})"
        .format(
            arb_addr,
            "'{}'".format(loglevel) if loglevel else None)
--- a/tests/test_2way.py
+++ b/tests/test_2way.py
@ -1,382 +1,11 @@
 """
-Bidirectional streaming and context API.
+Bidirectional streaming.

 """
 import pytest
 import trio
 import tractor

-from conftest import tractor_test
-
-# the general stream semantics are
-# - normal termination: far end relays a stop message which
-# terminates an ongoing ``MsgStream`` iteration
-# - cancel termination: context is cancelled on either side cancelling
-#  the "linked" inter-actor task context
-
-
-_state: bool = False
-
-
-@tractor.context
-async def simple_setup_teardown(
-
-    ctx: tractor.Context,
-    data: int,
-    block_forever: bool = False,
-
-) -> None:
-
-    # startup phase
-    global _state
-    _state = True
-
-    # signal to parent that we're up
-    await ctx.started(data + 1)
-
-    try:
-        if block_forever:
-            # block until cancelled
-            await trio.sleep_forever()
-        else:
-            return 'yo'
-    finally:
-        _state = False
-
-
-async def assert_state(value: bool):
-    global _state
-    assert _state == value
-
-
-@pytest.mark.parametrize(
-    'error_parent',
-    [False, True],
-)
-@pytest.mark.parametrize(
-    'callee_blocks_forever',
-    [False, True],
-)
-def test_simple_context(
-    error_parent,
-    callee_blocks_forever,
-):
-
-    async def main():
-
-        async with tractor.open_nursery() as n:
-
-            portal = await n.start_actor(
-                'simple_context',
-                enable_modules=[__name__],
-            )
-
-            async with portal.open_context(
-                simple_setup_teardown,
-                data=10,
-                block_forever=callee_blocks_forever,
-            ) as (ctx, sent):
-
-                assert sent == 11
-
-                if callee_blocks_forever:
-                    await portal.run(assert_state, value=True)
-                    await ctx.cancel()
-                else:
-                    assert await ctx.result() == 'yo'
-
-            # after cancellation
-            await portal.run(assert_state, value=False)
-
-            if error_parent:
-                raise ValueError
-
-            # shut down daemon
-            await portal.cancel_actor()
-
-    if error_parent:
-        try:
-            trio.run(main)
-        except ValueError:
-            pass
-    else:
-        trio.run(main)
-
-
-# basic stream terminations:
-# - callee context closes without using stream
-# - caller context closes without using stream
-# - caller context calls `Context.cancel()` while streaming
-#   is ongoing resulting in callee being cancelled
-# - callee calls `Context.cancel()` while streaming and caller
-#   sees stream terminated in `RemoteActorError`
-
-# TODO: future possible features
-# - restart request: far end raises `ContextRestart`
-
-
-@tractor.context
-async def close_ctx_immediately(
-
-    ctx: tractor.Context,
-
-) -> None:
-
-    await ctx.started()
-    global _state
-
-    async with ctx.open_stream():
-        pass
-
-
-@tractor_test
-async def test_callee_closes_ctx_after_stream_open():
-    'callee context closes without using stream'
-
-    async with tractor.open_nursery() as n:
-
-        portal = await n.start_actor(
-            'fast_stream_closer',
-            enable_modules=[__name__],
-        )
-
-        async with portal.open_context(
-            close_ctx_immediately,
-
-            # flag to avoid waiting the final result
-            # cancel_on_exit=True,
-
-        ) as (ctx, sent):
-
-            assert sent is None
-
-            with trio.fail_after(0.5):
-                async with ctx.open_stream() as stream:
-
-                    # should fall through since ``StopAsyncIteration``
-                    # should be raised through translation of
-                    # a ``trio.EndOfChannel`` by
-                    # ``trio.abc.ReceiveChannel.__anext__()``
-                    async for _ in stream:
-                        assert 0
-                    else:
-
-                        # verify stream is now closed
-                        try:
-                            await stream.receive()
-                        except trio.EndOfChannel:
-                            pass
-
-            # TODO: should be just raise the closed resource err
-            # directly here to enforce not allowing a re-open
-            # of a stream to the context (at least until a time of
-            # if/when we decide that's a good idea?)
-            try:
-                async with ctx.open_stream() as stream:
-                    pass
-            except trio.ClosedResourceError:
-                pass
-
-        await portal.cancel_actor()
-
-
-@tractor.context
-async def expect_cancelled(
-
-    ctx: tractor.Context,
-
-) -> None:
-    global _state
-    _state = True
-
-    await ctx.started()
-
-    try:
-        async with ctx.open_stream() as stream:
-            async for msg in stream:
-                await stream.send(msg)  # echo server
-
-    except trio.Cancelled:
-        # expected case
-        _state = False
-        raise
-
-    else:
-        assert 0, "Wasn't cancelled!?"
-
-
-@pytest.mark.parametrize(
-    'use_ctx_cancel_method',
-    [False, True],
-)
-@tractor_test
-async def test_caller_closes_ctx_after_callee_opens_stream(
-    use_ctx_cancel_method: bool,
-):
-    'caller context closes without using stream'
-
-    async with tractor.open_nursery() as n:
-
-        portal = await n.start_actor(
-            'ctx_cancelled',
-            enable_modules=[__name__],
-        )
-
-        async with portal.open_context(
-            expect_cancelled,
-        ) as (ctx, sent):
-            await portal.run(assert_state, value=True)
-
-            assert sent is None
-
-            # call cancel explicitly
-            if use_ctx_cancel_method:
-
-                await ctx.cancel()
-
-                try:
-                    async with ctx.open_stream() as stream:
-                        async for msg in stream:
-                            pass
-
-                except tractor.ContextCancelled:
-                    raise  # XXX: must be propagated to __aexit__
-
-                else:
-                    assert 0, "Should have context cancelled?"
-
-                # channel should still be up
-                assert portal.channel.connected()
-
-                # ctx is closed here
-                await portal.run(assert_state, value=False)
-
-            else:
-                try:
-                    with trio.fail_after(0.2):
-                        await ctx.result()
-                        assert 0, "Callee should have blocked!?"
-                except trio.TooSlowError:
-                    await ctx.cancel()
-        try:
-            async with ctx.open_stream() as stream:
-                async for msg in stream:
-                    pass
-        except tractor.ContextCancelled:
-            pass
-        else:
-            assert 0, "Should have received closed resource error?"
-
-        # ctx is closed here
-        await portal.run(assert_state, value=False)
-
-        # channel should not have been destroyed yet, only the
-        # inter-actor-task context
-        assert portal.channel.connected()
-
-        # teardown the actor
-        await portal.cancel_actor()
-
-
-@tractor_test
-async def test_multitask_caller_cancels_from_nonroot_task():
-
-    async with tractor.open_nursery() as n:
-
-        portal = await n.start_actor(
-            'ctx_cancelled',
-            enable_modules=[__name__],
-        )
-
-        async with portal.open_context(
-            expect_cancelled,
-        ) as (ctx, sent):
-
-            await portal.run(assert_state, value=True)
-            assert sent is None
-
-            async with ctx.open_stream() as stream:
-
-                async def send_msg_then_cancel():
-                    await stream.send('yo')
-                    await portal.run(assert_state, value=True)
-                    await ctx.cancel()
-                    await portal.run(assert_state, value=False)
-
-                async with trio.open_nursery() as n:
-                    n.start_soon(send_msg_then_cancel)
-
-                    try:
-                        async for msg in stream:
-                            assert msg == 'yo'
-
-                    except tractor.ContextCancelled:
-                        raise  # XXX: must be propagated to __aexit__
-
-                # channel should still be up
-                assert portal.channel.connected()
-
-                # ctx is closed here
-                await portal.run(assert_state, value=False)
-
-        # channel should not have been destroyed yet, only the
-        # inter-actor-task context
-        assert portal.channel.connected()
-
-        # teardown the actor
-        await portal.cancel_actor()
-
-
-@tractor.context
-async def cancel_self(
-
-    ctx: tractor.Context,
-
-) -> None:
-    global _state
-    _state = True
-
-    await ctx.cancel()
-    try:
-        with trio.fail_after(0.1):
-            await trio.sleep_forever()
-
-    except trio.Cancelled:
-        raise
-
-    except trio.TooSlowError:
-        # should never get here
-        assert 0
-
-
-@tractor_test
-async def test_callee_cancels_before_started():
-    '''callee calls `Context.cancel()` while streaming and caller
-    sees stream terminated in `ContextCancelled`.
-
-    '''
-    async with tractor.open_nursery() as n:
-
-        portal = await n.start_actor(
-            'cancels_self',
-            enable_modules=[__name__],
-        )
-        try:
-
-            async with portal.open_context(
-                cancel_self,
-            ) as (ctx, sent):
-                async with ctx.open_stream():
-
-                    await trio.sleep_forever()
-
-        # raises a special cancel signal
-        except tractor.ContextCancelled as ce:
-            ce.type == trio.Cancelled
-
-        # teardown the actor
-        await portal.cancel_actor()
-

@tractor.context
 async def simple_rpc(
@ -385,9 +14,10 @@ async def simple_rpc(
    data: int,

 ) -> None:
-    """Test a small ping-pong server.
+    '''
+    Test a small ping-pong server.

-    """
+    '''
    # signal to parent that we're up
    await ctx.started(data + 1)

@ -445,9 +75,10 @@ async def simple_rpc_with_forloop(
    [simple_rpc, simple_rpc_with_forloop],
 )
 def test_simple_rpc(server_func, use_async_for):
-    """The simplest request response pattern.
+    '''
+    The simplest request response pattern.

-    """
+    '''
    async def main():
        async with tractor.open_nursery() as n:

--- a/tests/test_advanced_faults.py
+++ b/tests/test_advanced_faults.py
@ -0,0 +1,193 @@
+'''
+Sketchy network blackoutz, ugly byzantine gens, puedes eschuchar la
+cancelacion?..
+
+'''
+from functools import partial
+
+import pytest
+from _pytest.pathlib import import_path
+import trio
+import tractor
+
+from conftest import (
+    examples_dir,
+)
+
+
+@pytest.mark.parametrize(
+    'debug_mode',
+    [False, True],
+    ids=['no_debug_mode', 'debug_mode'],
+)
+@pytest.mark.parametrize(
+    'ipc_break',
+    [
+        # no breaks
+        {
+            'break_parent_ipc_after': False,
+            'break_child_ipc_after': False,
+        },
+
+        # only parent breaks
+        {
+            'break_parent_ipc_after': 500,
+            'break_child_ipc_after': False,
+        },
+
+        # only child breaks
+        {
+            'break_parent_ipc_after': False,
+            'break_child_ipc_after': 500,
+        },
+
+        # both: break parent first
+        {
+            'break_parent_ipc_after': 500,
+            'break_child_ipc_after': 800,
+        },
+        # both: break child first
+        {
+            'break_parent_ipc_after': 800,
+            'break_child_ipc_after': 500,
+        },
+
+    ],
+    ids=[
+        'no_break',
+        'break_parent',
+        'break_child',
+        'break_both_parent_first',
+        'break_both_child_first',
+    ],
+)
+def test_ipc_channel_break_during_stream(
+    debug_mode: bool,
+    spawn_backend: str,
+    ipc_break: dict | None,
+):
+    '''
+    Ensure we can have an IPC channel break its connection during
+    streaming and it's still possible for the (simulated) user to kill
+    the actor tree using SIGINT.
+
+    We also verify the type of connection error expected in the parent
+    depending on which side if the IPC breaks first.
+
+    '''
+    if spawn_backend != 'trio':
+        if debug_mode:
+            pytest.skip('`debug_mode` only supported on `trio` spawner')
+
+        # non-`trio` spawners should never hit the hang condition that
+        # requires the user to do ctl-c to cancel the actor tree.
+        expect_final_exc = trio.ClosedResourceError
+
+    mod = import_path(
+        examples_dir() / 'advanced_faults' / 'ipc_failure_during_stream.py',
+        root=examples_dir(),
+    )
+
+    expect_final_exc = KeyboardInterrupt
+
+    # when ONLY the child breaks we expect the parent to get a closed
+    # resource error on the next `MsgStream.receive()` and then fail out
+    # and cancel the child from there.
+    if (
+
+        # only child breaks
+        (
+            ipc_break['break_child_ipc_after']
+            and ipc_break['break_parent_ipc_after'] is False
+        )
+
+        # both break but, parent breaks first
+        or (
+            ipc_break['break_child_ipc_after'] is not False
+            and (
+                ipc_break['break_parent_ipc_after']
+                > ipc_break['break_child_ipc_after']
+            )
+        )
+
+    ):
+        expect_final_exc = trio.ClosedResourceError
+
+    # when the parent IPC side dies (even if the child's does as well
+    # but the child fails BEFORE the parent) we expect the channel to be
+    # sent a stop msg from the child at some point which will signal the
+    # parent that the stream has been terminated.
+    # NOTE: when the parent breaks "after" the child you get this same
+    # case as well, the child breaks the IPC channel with a stop msg
+    # before any closure takes place.
+    elif (
+        # only parent breaks
+        (
+            ipc_break['break_parent_ipc_after']
+            and ipc_break['break_child_ipc_after'] is False
+        )
+
+        # both break but, child breaks first
+        or (
+            ipc_break['break_parent_ipc_after'] is not False
+            and (
+                ipc_break['break_child_ipc_after']
+                > ipc_break['break_parent_ipc_after']
+            )
+        )
+    ):
+        expect_final_exc = trio.EndOfChannel
+
+    with pytest.raises(expect_final_exc):
+        trio.run(
+            partial(
+                mod.main,
+                debug_mode=debug_mode,
+                start_method=spawn_backend,
+                **ipc_break,
+            )
+        )
+
+
+@tractor.context
+async def break_ipc_after_started(
+    ctx: tractor.Context,
+) -> None:
+    await ctx.started()
+    async with ctx.open_stream() as stream:
+        await stream.aclose()
+        await trio.sleep(0.2)
+        await ctx.chan.send(None)
+        print('child broke IPC and terminating')
+
+
+def test_stream_closed_right_after_ipc_break_and_zombie_lord_engages():
+    '''
+    Verify that is a subactor's IPC goes down just after bringing up a stream
+    the parent can trigger a SIGINT and the child will be reaped out-of-IPC by
+    the localhost process supervision machinery: aka "zombie lord".
+
+    '''
+    async def main():
+        async with tractor.open_nursery() as n:
+            portal = await n.start_actor(
+                'ipc_breaker',
+                enable_modules=[__name__],
+            )
+
+            with trio.move_on_after(1):
+                async with (
+                    portal.open_context(
+                        break_ipc_after_started
+                    ) as (ctx, sent),
+                ):
+                    async with ctx.open_stream():
+                        await trio.sleep(0.5)
+
+                    print('parent waiting on context')
+
+            print('parent exited context')
+            raise KeyboardInterrupt
+
+    with pytest.raises(KeyboardInterrupt):
+        trio.run(main)
--- a/tests/test_advanced_streaming.py
+++ b/tests/test_advanced_streaming.py
@ -1,15 +1,20 @@
-"""
+'''
 Advanced streaming patterns using bidirectional streams and contexts.

-"""
+'''
+from collections import Counter
 import itertools
-from typing import Set, Dict, List
+import platform

 import trio
 import tractor


-_registry: Dict[str, Set[tractor.ReceiveMsgStream]] = {
+def is_win():
+    return platform.system() == 'Windows'
+
+
+_registry: dict[str, set[tractor.MsgStream]] = {
    'even': set(),
    'odd': set(),
 }
@ -71,7 +76,7 @@ async def subscribe(

 async def consumer(

-    subs: List[str],
+    subs: list[str],

 ) -> None:

@ -172,14 +177,22 @@ async def one_task_streams_and_one_handles_reqresp(


 def test_reqresp_ontopof_streaming():
-    '''Test a subactor that both streams with one task and
+    '''
+    Test a subactor that both streams with one task and
    spawns another which handles a small requests-response
    dialogue over the same bidir-stream.

    '''
    async def main():

-        with trio.move_on_after(2):
+        # flat to make sure we get at least one pong
+        got_pong: bool = False
+        timeout: int = 2
+
+        if is_win():  # smh
+            timeout = 4
+
+        with trio.move_on_after(timeout):
            async with tractor.open_nursery() as n:

                # name of this actor will be same as target func
@ -188,9 +201,6 @@ def test_reqresp_ontopof_streaming():
                    enable_modules=[__name__]
                )

-                # flat to make sure we get at least one pong
-                got_pong: bool = False
-
                async with portal.open_context(
                    one_task_streams_and_one_handles_reqresp,

@ -218,3 +228,153 @@ def test_reqresp_ontopof_streaming():
        trio.run(main)
    except trio.TooSlowError:
        pass
+
+
+async def async_gen_stream(sequence):
+    for i in sequence:
+        yield i
+        await trio.sleep(0.1)
+
+
+@tractor.context
+async def echo_ctx_stream(
+    ctx: tractor.Context,
+) -> None:
+    await ctx.started()
+
+    async with ctx.open_stream() as stream:
+        async for msg in stream:
+            await stream.send(msg)
+
+
+def test_sigint_both_stream_types():
+    '''Verify that running a bi-directional and recv only stream
+    side-by-side will cancel correctly from SIGINT.
+
+    '''
+    timeout: float = 2
+    if is_win():  # smh
+        timeout += 1
+
+    async def main():
+        with trio.fail_after(timeout):
+            async with tractor.open_nursery() as n:
+                # name of this actor will be same as target func
+                portal = await n.start_actor(
+                    '2_way',
+                    enable_modules=[__name__]
+                )
+
+                async with portal.open_context(echo_ctx_stream) as (ctx, _):
+                    async with ctx.open_stream() as stream:
+                        async with portal.open_stream_from(
+                            async_gen_stream,
+                            sequence=list(range(1)),
+                        ) as gen_stream:
+
+                            msg = await gen_stream.receive()
+                            await stream.send(msg)
+                            resp = await stream.receive()
+                            assert resp == msg
+                            raise KeyboardInterrupt
+
+    try:
+        trio.run(main)
+        assert 0, "Didn't receive KBI!?"
+    except KeyboardInterrupt:
+        pass
+
+
+@tractor.context
+async def inf_streamer(
+    ctx: tractor.Context,
+
+) -> None:
+    '''
+    Stream increasing ints until terminated with a 'done' msg.
+
+    '''
+    await ctx.started()
+
+    async with (
+        ctx.open_stream() as stream,
+        trio.open_nursery() as n,
+    ):
+        async def bail_on_sentinel():
+            async for msg in stream:
+                if msg == 'done':
+                    await stream.aclose()
+                else:
+                    print(f'streamer received {msg}')
+
+        # start termination detector
+        n.start_soon(bail_on_sentinel)
+
+        for val in itertools.count():
+            try:
+                await stream.send(val)
+            except trio.ClosedResourceError:
+                # close out the stream gracefully
+                break
+
+    print('terminating streamer')
+
+
+def test_local_task_fanout_from_stream():
+    '''
+    Single stream with multiple local consumer tasks using the
+    ``MsgStream.subscribe()` api.
+
+    Ensure all tasks receive all values after stream completes sending.
+
+    '''
+    consumers = 22
+
+    async def main():
+
+        counts = Counter()
+
+        async with tractor.open_nursery() as tn:
+            p = await tn.start_actor(
+                'inf_streamer',
+                enable_modules=[__name__],
+            )
+            async with (
+                p.open_context(inf_streamer) as (ctx, _),
+                ctx.open_stream() as stream,
+            ):
+
+                async def pull_and_count(name: str):
+                    # name = trio.lowlevel.current_task().name
+                    async with stream.subscribe() as recver:
+                        assert isinstance(
+                            recver,
+                            tractor.trionics.BroadcastReceiver
+                        )
+                        async for val in recver:
+                            # print(f'{name}: {val}')
+                            counts[name] += 1
+
+                        print(f'{name} bcaster ended')
+
+                    print(f'{name} completed')
+
+                with trio.fail_after(3):
+                    async with trio.open_nursery() as nurse:
+                        for i in range(consumers):
+                            nurse.start_soon(pull_and_count, i)
+
+                        await trio.sleep(0.5)
+                        print('\nterminating')
+                        await stream.send('done')
+
+            print('closed stream connection')
+
+            assert len(counts) == consumers
+            mx = max(counts.values())
+            # make sure each task received all stream values
+            assert all(val == mx for val in counts.values())
+
+            await p.cancel_actor()
+
+    trio.run(main)
--- a/tests/test_cancellation.py
+++ b/tests/test_cancellation.py
@ -1,5 +1,6 @@
 """
 Cancellation and error propagation
+
 """
 import os
 import signal
@ -7,6 +8,10 @@ import platform
 import time
 from itertools import repeat

+from exceptiongroup import (
+    BaseExceptionGroup,
+    ExceptionGroup,
+)
 import pytest
 import trio
 import tractor
@ -14,6 +19,10 @@ import tractor
 from conftest import tractor_test, no_windows


+def is_win():
+    return platform.system() == 'Windows'
+
+
 async def assert_err(delay=0):
    await trio.sleep(delay)
    assert 0
@ -51,29 +60,49 @@ def test_remote_error(arb_addr, args_err):
            arbiter_addr=arb_addr,
        ) as nursery:

+            # on a remote type error caused by bad input args
+            # this should raise directly which means we **don't** get
+            # an exception group outside the nursery since the error
+            # here and the far end task error are one in the same?
            portal = await nursery.run_in_actor(
                assert_err, name='errorer', **args
            )

            # get result(s) from main task
            try:
+                # this means the root actor will also raise a local
+                # parent task error and thus an eg will propagate out
+                # of this actor nursery.
                await portal.result()
            except tractor.RemoteActorError as err:
                assert err.type == errtype
                print("Look Maa that actor failed hard, hehh")
                raise

-    with pytest.raises(tractor.RemoteActorError) as excinfo:
-        trio.run(main)
+    # ensure boxed errors
+    if args:
+        with pytest.raises(tractor.RemoteActorError) as excinfo:
+            trio.run(main)

-    # ensure boxed error is correct
-    assert excinfo.value.type == errtype
+        assert excinfo.value.type == errtype
+
+    else:
+        # the root task will also error on the `.result()` call
+        # so we expect an error from there AND the child.
+        with pytest.raises(BaseExceptionGroup) as excinfo:
+            trio.run(main)
+
+        # ensure boxed errors
+        for exc in excinfo.value.exceptions:
+            assert exc.type == errtype


 def test_multierror(arb_addr):
-    """Verify we raise a ``trio.MultiError`` out of a nursery where
+    '''
+    Verify we raise a ``BaseExceptionGroup`` out of a nursery where
    more then one actor errors.
-    """
+
+    '''
    async def main():
        async with tractor.open_nursery(
            arbiter_addr=arb_addr,
@ -90,10 +119,10 @@ def test_multierror(arb_addr):
                print("Look Maa that first actor failed hard, hehh")
                raise

-        # here we should get a `trio.MultiError` containing exceptions
+        # here we should get a ``BaseExceptionGroup`` containing exceptions
        # from both subactors

-    with pytest.raises(trio.MultiError):
+    with pytest.raises(BaseExceptionGroup):
        trio.run(main)


@ -102,7 +131,7 @@ def test_multierror(arb_addr):
    'num_subactors', range(25, 26),
 )
 def test_multierror_fast_nursery(arb_addr, start_method, num_subactors, delay):
-    """Verify we raise a ``trio.MultiError`` out of a nursery where
+    """Verify we raise a ``BaseExceptionGroup`` out of a nursery where
    more then one actor errors and also with a delay before failure
    to test failure during an ongoing spawning.
    """
@ -118,16 +147,21 @@ def test_multierror_fast_nursery(arb_addr, start_method, num_subactors, delay):
                    delay=delay
                )

-    with pytest.raises(trio.MultiError) as exc_info:
+    # with pytest.raises(trio.MultiError) as exc_info:
+    with pytest.raises(BaseExceptionGroup) as exc_info:
        trio.run(main)

-    assert exc_info.type == tractor.MultiError
+    assert exc_info.type == ExceptionGroup
    err = exc_info.value
    exceptions = err.exceptions

    if len(exceptions) == 2:
        # sometimes oddly now there's an embedded BrokenResourceError ?
-        exceptions = exceptions[1].exceptions
+        for exc in exceptions:
+            excs = getattr(exc, 'exceptions', None)
+            if excs:
+                exceptions = excs
+                break

    assert len(exceptions) == num_subactors

@ -205,8 +239,8 @@ async def test_cancel_infinite_streamer(start_method):
    [
        # daemon actors sit idle while single task actors error out
        (1, tractor.RemoteActorError, AssertionError, (assert_err, {}), None),
-        (2, tractor.MultiError, AssertionError, (assert_err, {}), None),
-        (3, tractor.MultiError, AssertionError, (assert_err, {}), None),
+        (2, BaseExceptionGroup, AssertionError, (assert_err, {}), None),
+        (3, BaseExceptionGroup, AssertionError, (assert_err, {}), None),

        # 1 daemon actor errors out while single task actors sleep forever
        (3, tractor.RemoteActorError, AssertionError, (sleep_forever, {}),
@ -217,7 +251,7 @@ async def test_cancel_infinite_streamer(start_method):
         (do_nuthin, {}), (assert_err, {'delay': 1}, True)),
        # daemon complete quickly delay while single task
        # actors error after brief delay
-        (3, tractor.MultiError, AssertionError,
+        (3, BaseExceptionGroup, AssertionError,
         (assert_err, {'delay': 1}), (do_nuthin, {}, False)),
    ],
    ids=[
@ -284,7 +318,7 @@ async def test_some_cancels_all(num_actors_and_errs, start_method, loglevel):
        # should error here with a ``RemoteActorError`` or ``MultiError``

    except first_err as err:
-        if isinstance(err, tractor.MultiError):
+        if isinstance(err, BaseExceptionGroup):
            assert len(err.exceptions) == num_actors
            for exc in err.exceptions:
                if isinstance(exc, tractor.RemoteActorError):
@ -327,10 +361,12 @@ async def spawn_and_error(breadth, depth) -> None:

@tractor_test
 async def test_nested_multierrors(loglevel, start_method):
-    """Test that failed actor sets are wrapped in `trio.MultiError`s.
-    This test goes only 2 nurseries deep but we should eventually have tests
+    '''
+    Test that failed actor sets are wrapped in `BaseExceptionGroup`s. This
+    test goes only 2 nurseries deep but we should eventually have tests
    for arbitrary n-depth actor trees.
-    """
+
+    '''
    if start_method == 'trio':
        depth = 3
        subactor_breadth = 2
@ -354,25 +390,37 @@ async def test_nested_multierrors(loglevel, start_method):
                        breadth=subactor_breadth,
                        depth=depth,
                    )
-        except trio.MultiError as err:
+        except BaseExceptionGroup as err:
            assert len(err.exceptions) == subactor_breadth
            for subexc in err.exceptions:

                # verify first level actor errors are wrapped as remote
-                if platform.system() == 'Windows':
+                if is_win():

                    # windows is often too slow and cancellation seems
                    # to happen before an actor is spawned
                    if isinstance(subexc, trio.Cancelled):
                        continue
-                    else:
+
+                    elif isinstance(subexc, tractor.RemoteActorError):
                        # on windows it seems we can't exactly be sure wtf
                        # will happen..
                        assert subexc.type in (
                            tractor.RemoteActorError,
                            trio.Cancelled,
-                            trio.MultiError
+                            BaseExceptionGroup,
                        )
+
+                    elif isinstance(subexc, BaseExceptionGroup):
+                        for subsub in subexc.exceptions:
+
+                            if subsub in (tractor.RemoteActorError,):
+                                subsub = subsub.type
+
+                            assert type(subsub) in (
+                                trio.Cancelled,
+                                BaseExceptionGroup,
+                            )
                else:
                    assert isinstance(subexc, tractor.RemoteActorError)

@ -380,14 +428,21 @@ async def test_nested_multierrors(loglevel, start_method):
                    # XXX not sure what's up with this..
                    # on windows sometimes spawning is just too slow and
                    # we get back the (sent) cancel signal instead
-                    if platform.system() == 'Windows':
-                        assert (subexc.type is trio.MultiError) or (
-                            subexc.type is tractor.RemoteActorError)
+                    if is_win():
+                        if isinstance(subexc, tractor.RemoteActorError):
+                            assert subexc.type in (
+                                BaseExceptionGroup,
+                                tractor.RemoteActorError
+                            )
+                        else:
+                            assert isinstance(subexc, BaseExceptionGroup)
                    else:
-                        assert subexc.type is trio.MultiError
+                        assert subexc.type is ExceptionGroup
                else:
-                    assert (subexc.type is tractor.RemoteActorError) or (
-                        subexc.type is trio.Cancelled)
+                    assert subexc.type in (
+                        tractor.RemoteActorError,
+                        trio.Cancelled
+                    )


@no_windows
@ -405,7 +460,7 @@ def test_cancel_via_SIGINT(
        with trio.fail_after(2):
            async with tractor.open_nursery() as tn:
                await tn.start_actor('sucka')
-                if spawn_backend == 'mp':
+                if 'mp' in spawn_backend:
                    time.sleep(0.1)
                os.kill(pid, signal.SIGINT)
                await trio.sleep_forever()
@ -425,6 +480,9 @@ def test_cancel_via_SIGINT_other_task(
    from a seperate ``trio`` child  task.
    """
    pid = os.getpid()
+    timeout: float = 2
+    if is_win():  # smh
+        timeout += 1

    async def spawn_and_sleep_forever(task_status=trio.TASK_STATUS_IGNORED):
        async with tractor.open_nursery() as tn:
@ -438,16 +496,17 @@ def test_cancel_via_SIGINT_other_task(

    async def main():
        # should never timeout since SIGINT should cancel the current program
-        with trio.fail_after(2):
+        with trio.fail_after(timeout):
            async with trio.open_nursery() as n:
                await n.start(spawn_and_sleep_forever)
-                if spawn_backend == 'mp':
+                if 'mp' in spawn_backend:
                    time.sleep(0.1)
                os.kill(pid, signal.SIGINT)

    with pytest.raises(KeyboardInterrupt):
        trio.run(main)

+
 async def spin_for(period=3):
    "Sync sleep."
    time.sleep(period)
@ -486,3 +545,57 @@ def test_cancel_while_childs_child_in_sync_sleep(

    with pytest.raises(AssertionError):
        trio.run(main)
+
+
+def test_fast_graceful_cancel_when_spawn_task_in_soft_proc_wait_for_daemon(
+    start_method,
+):
+    '''
+    This is a very subtle test which demonstrates how cancellation
+    during process collection can result in non-optimal teardown
+    performance on daemon actors. The fix for this test was to handle
+    ``trio.Cancelled`` specially in the spawn task waiting in
+    `proc.wait()` such that ``Portal.cancel_actor()`` is called before
+    executing the "hard reap" sequence (which has an up to 3 second
+    delay currently).
+
+    In other words, if we can cancel the actor using a graceful remote
+    cancellation, and it's faster, we might as well do it.
+
+    '''
+    kbi_delay = 0.5
+    timeout: float = 2.9
+
+    if is_win():  # smh
+        timeout += 1
+
+    async def main():
+        start = time.time()
+        try:
+            async with trio.open_nursery() as nurse:
+                async with tractor.open_nursery() as tn:
+                    p = await tn.start_actor(
+                        'fast_boi',
+                        enable_modules=[__name__],
+                    )
+
+                    async def delayed_kbi():
+                        await trio.sleep(kbi_delay)
+                        print(f'RAISING KBI after {kbi_delay} s')
+                        raise KeyboardInterrupt
+
+                    # start task which raises a kbi **after**
+                    # the actor nursery ``__aexit__()`` has
+                    # been run.
+                    nurse.start_soon(delayed_kbi)
+
+                    await p.run(do_nuthin)
+        finally:
+            duration = time.time() - start
+            if duration > timeout:
+                raise trio.TooSlowError(
+                    'daemon cancel was slower then necessary..'
+                )
+
+    with pytest.raises(KeyboardInterrupt):
+        trio.run(main)
--- a/tests/test_child_manages_service_nursery.py
+++ b/tests/test_child_manages_service_nursery.py
@ -0,0 +1,173 @@
+'''
+Test a service style daemon that maintains a nursery for spawning
+"remote async tasks" including both spawning other long living
+sub-sub-actor daemons.
+
+'''
+from typing import Optional
+import asyncio
+from contextlib import asynccontextmanager as acm
+
+import pytest
+import trio
+from trio_typing import TaskStatus
+import tractor
+from tractor import RemoteActorError
+from async_generator import aclosing
+
+
+async def aio_streamer(
+    from_trio: asyncio.Queue,
+    to_trio: trio.abc.SendChannel,
+) -> trio.abc.ReceiveChannel:
+
+    # required first msg to sync caller
+    to_trio.send_nowait(None)
+
+    from itertools import cycle
+    for i in cycle(range(10)):
+        to_trio.send_nowait(i)
+        await asyncio.sleep(0.01)
+
+
+async def trio_streamer():
+    from itertools import cycle
+    for i in cycle(range(10)):
+        yield i
+        await trio.sleep(0.01)
+
+
+async def trio_sleep_and_err(delay: float = 0.5):
+    await trio.sleep(delay)
+    # name error
+    doggy()  # noqa
+
+
+_cached_stream: Optional[
+    trio.abc.ReceiveChannel
+] = None
+
+
+@acm
+async def wrapper_mngr(
+):
+    from tractor.trionics import broadcast_receiver
+    global _cached_stream
+    in_aio = tractor.current_actor().is_infected_aio()
+
+    if in_aio:
+        if _cached_stream:
+
+            from_aio = _cached_stream
+
+            # if we already have a cached feed deliver a rx side clone
+            # to consumer
+            async with broadcast_receiver(from_aio, 6) as from_aio:
+                yield from_aio
+                return
+        else:
+            async with tractor.to_asyncio.open_channel_from(
+                aio_streamer,
+            ) as (first, from_aio):
+                assert not first
+
+                # cache it so next task uses broadcast receiver
+                _cached_stream = from_aio
+
+                yield from_aio
+    else:
+        async with aclosing(trio_streamer()) as stream:
+            # cache it so next task uses broadcast receiver
+            _cached_stream = stream
+            yield stream
+
+
+_nursery: trio.Nursery = None
+
+
+@tractor.context
+async def trio_main(
+    ctx: tractor.Context,
+):
+    # sync
+    await ctx.started()
+
+    # stash a "service nursery" as "actor local" (aka a Python global)
+    global _nursery
+    n = _nursery
+    assert n
+
+    async def consume_stream():
+        async with wrapper_mngr() as stream:
+            async for msg in stream:
+                print(msg)
+
+    # run 2 tasks to ensure broadcaster chan use
+    n.start_soon(consume_stream)
+    n.start_soon(consume_stream)
+
+    n.start_soon(trio_sleep_and_err)
+
+    await trio.sleep_forever()
+
+
+@tractor.context
+async def open_actor_local_nursery(
+    ctx: tractor.Context,
+):
+    global _nursery
+    async with trio.open_nursery() as n:
+        _nursery = n
+        await ctx.started()
+        await trio.sleep(10)
+        # await trio.sleep(1)
+
+        # XXX: this causes the hang since
+        # the caller does not unblock from its own
+        # ``trio.sleep_forever()``.
+
+        # TODO: we need to test a simple ctx task starting remote tasks
+        # that error and then blocking on a ``Nursery.start()`` which
+        # never yields back.. aka a scenario where the
+        # ``tractor.context`` task IS NOT in the service n's cancel
+        # scope.
+        n.cancel_scope.cancel()
+
+
+@pytest.mark.parametrize(
+    'asyncio_mode',
+    [True, False],
+    ids='asyncio_mode={}'.format,
+)
+def test_actor_managed_trio_nursery_task_error_cancels_aio(
+    asyncio_mode: bool,
+    arb_addr
+):
+    '''
+    Verify that a ``trio`` nursery created managed in a child actor
+    correctly relays errors to the parent actor when one of its spawned
+    tasks errors even when running in infected asyncio mode and using
+    broadcast receivers for multi-task-per-actor subscription.
+
+    '''
+    async def main():
+
+        # cancel the nursery shortly after boot
+        async with tractor.open_nursery() as n:
+            p = await n.start_actor(
+                'nursery_mngr',
+                infect_asyncio=asyncio_mode,
+                enable_modules=[__name__],
+            )
+            async with (
+                p.open_context(open_actor_local_nursery) as (ctx, first),
+                p.open_context(trio_main) as (ctx, first),
+            ):
+                await trio.sleep_forever()
+
+    with pytest.raises(RemoteActorError) as excinfo:
+        trio.run(main)
+
+    # verify boxed error
+    err = excinfo.value
+    assert isinstance(err.type(), NameError)
--- a/tests/test_clustering.py
+++ b/tests/test_clustering.py
@ -0,0 +1,84 @@
+import itertools
+
+import pytest
+import trio
+import tractor
+from tractor import open_actor_cluster
+from tractor.trionics import gather_contexts
+
+from conftest import tractor_test
+
+
+MESSAGE = 'tractoring at full speed'
+
+
+def test_empty_mngrs_input_raises() -> None:
+
+    async def main():
+        with trio.fail_after(1):
+            async with (
+                open_actor_cluster(
+                    modules=[__name__],
+
+                    # NOTE: ensure we can passthrough runtime opts
+                    loglevel='info',
+                    # debug_mode=True,
+
+                ) as portals,
+
+                gather_contexts(
+                    # NOTE: it's the use of inline-generator syntax
+                    # here that causes the empty input.
+                    mngrs=(
+                        p.open_context(worker) for p in portals.values()
+                    ),
+                ),
+            ):
+                assert 0
+
+    with pytest.raises(ValueError):
+        trio.run(main)
+
+
+@tractor.context
+async def worker(
+    ctx: tractor.Context,
+
+) -> None:
+
+    await ctx.started()
+
+    async with ctx.open_stream(
+        backpressure=True,
+    ) as stream:
+
+        # TODO: this with the below assert causes a hang bug?
+        # with trio.move_on_after(1):
+
+        async for msg in stream:
+            # do something with msg
+            print(msg)
+            assert msg == MESSAGE
+
+        # TODO: does this ever cause a hang
+        # assert 0
+
+
+@tractor_test
+async def test_streaming_to_actor_cluster() -> None:
+
+    async with (
+        open_actor_cluster(modules=[__name__]) as portals,
+
+        gather_contexts(
+            mngrs=[p.open_context(worker) for p in portals.values()],
+        ) as contexts,
+
+        gather_contexts(
+            mngrs=[ctx[0].open_stream() for ctx in contexts],
+        ) as streams,
+
+    ):
+        with trio.move_on_after(1):
+            for stream in itertools.cycle(streams):
+                await stream.send(MESSAGE)
--- a/tests/test_context_stream_semantics.py
+++ b/tests/test_context_stream_semantics.py
@ -0,0 +1,798 @@
+'''
+``async with ():`` inlined context-stream cancellation testing.
+
+Verify the we raise errors when streams are opened prior to sync-opening
+a ``tractor.Context`` beforehand.
+
+'''
+from contextlib import asynccontextmanager as acm
+from itertools import count
+import platform
+from typing import Optional
+
+import pytest
+import trio
+import tractor
+from tractor._exceptions import StreamOverrun
+
+from conftest import tractor_test
+
+# ``Context`` semantics are as follows,
+#  ------------------------------------
+
+# - standard setup/teardown:
+#   ``Portal.open_context()`` starts a new
+#   remote task context in another actor. The target actor's task must
+#   call ``Context.started()`` to unblock this entry on the caller side.
+#   the callee task executes until complete and returns a final value
+#   which is delivered to the caller side and retreived via
+#   ``Context.result()``.
+
+# - cancel termination:
+#   context can be cancelled on either side where either end's task can
+#   call ``Context.cancel()`` which raises a local ``trio.Cancelled``
+#   and sends a task cancel request to the remote task which in turn
+#   raises a ``trio.Cancelled`` in that scope, catches it, and re-raises
+#   as ``ContextCancelled``. This is then caught by
+#   ``Portal.open_context()``'s exit and we get a graceful termination
+#   of the linked tasks.
+
+# - error termination:
+#   error is caught after all context-cancel-scope tasks are cancelled
+#   via regular ``trio`` cancel scope semantics, error is sent to other
+#   side and unpacked as a `RemoteActorError`.
+
+
+# ``Context.open_stream() as stream: MsgStream:`` msg semantics are:
+#  -----------------------------------------------------------------
+
+# - either side can ``.send()`` which emits a 'yield' msgs and delivers
+#   a value to the a ``MsgStream.receive()`` call.
+
+# - stream closure: one end relays a 'stop' message which terminates an
+#   ongoing ``MsgStream`` iteration.
+
+# - cancel/error termination: as per the context semantics above but
+#   with implicit stream closure on the cancelling end.
+
+
+_state: bool = False
+
+
+@tractor.context
+async def too_many_starteds(
+    ctx: tractor.Context,
+) -> None:
+    '''
+    Call ``Context.started()`` more then once (an error).
+
+    '''
+    await ctx.started()
+    try:
+        await ctx.started()
+    except RuntimeError:
+        raise
+
+
+@tractor.context
+async def not_started_but_stream_opened(
+    ctx: tractor.Context,
+) -> None:
+    '''
+    Enter ``Context.open_stream()`` without calling ``.started()``.
+
+    '''
+    try:
+        async with ctx.open_stream():
+            assert 0
+    except RuntimeError:
+        raise
+
+
+@pytest.mark.parametrize(
+    'target',
+    [too_many_starteds, not_started_but_stream_opened],
+    ids='misuse_type={}'.format,
+)
+def test_started_misuse(target):
+
+    async def main():
+        async with tractor.open_nursery() as n:
+            portal = await n.start_actor(
+                target.__name__,
+                enable_modules=[__name__],
+            )
+
+            async with portal.open_context(target) as (ctx, sent):
+                await trio.sleep(1)
+
+    with pytest.raises(tractor.RemoteActorError):
+        trio.run(main)
+
+
+@tractor.context
+async def simple_setup_teardown(
+
+    ctx: tractor.Context,
+    data: int,
+    block_forever: bool = False,
+
+) -> None:
+
+    # startup phase
+    global _state
+    _state = True
+
+    # signal to parent that we're up
+    await ctx.started(data + 1)
+
+    try:
+        if block_forever:
+            # block until cancelled
+            await trio.sleep_forever()
+        else:
+            return 'yo'
+    finally:
+        _state = False
+
+
+async def assert_state(value: bool):
+    global _state
+    assert _state == value
+
+
+@pytest.mark.parametrize(
+    'error_parent',
+    [False, ValueError, KeyboardInterrupt],
+)
+@pytest.mark.parametrize(
+    'callee_blocks_forever',
+    [False, True],
+    ids=lambda item: f'callee_blocks_forever={item}'
+)
+@pytest.mark.parametrize(
+    'pointlessly_open_stream',
+    [False, True],
+    ids=lambda item: f'open_stream={item}'
+)
+def test_simple_context(
+    error_parent,
+    callee_blocks_forever,
+    pointlessly_open_stream,
+):
+
+    timeout = 1.5 if not platform.system() == 'Windows' else 4
+
+    async def main():
+
+        with trio.fail_after(timeout):
+            async with tractor.open_nursery() as nursery:
+
+                portal = await nursery.start_actor(
+                    'simple_context',
+                    enable_modules=[__name__],
+                )
+
+                try:
+                    async with portal.open_context(
+                        simple_setup_teardown,
+                        data=10,
+                        block_forever=callee_blocks_forever,
+                    ) as (ctx, sent):
+
+                        assert sent == 11
+
+                        if callee_blocks_forever:
+                            await portal.run(assert_state, value=True)
+                        else:
+                            assert await ctx.result() == 'yo'
+
+                        if not error_parent:
+                            await ctx.cancel()
+
+                        if pointlessly_open_stream:
+                            async with ctx.open_stream():
+                                if error_parent:
+                                    raise error_parent
+
+                                if callee_blocks_forever:
+                                    await ctx.cancel()
+                                else:
+                                    # in this case the stream will send a
+                                    # 'stop' msg to the far end which needs
+                                    # to be ignored
+                                    pass
+                        else:
+                            if error_parent:
+                                raise error_parent
+
+                finally:
+
+                    # after cancellation
+                    if not error_parent:
+                        await portal.run(assert_state, value=False)
+
+                    # shut down daemon
+                    await portal.cancel_actor()
+
+    if error_parent:
+        try:
+            trio.run(main)
+        except error_parent:
+            pass
+        except trio.MultiError as me:
+            # XXX: on windows it seems we may have to expect the group error
+            from tractor._exceptions import is_multi_cancelled
+            assert is_multi_cancelled(me)
+    else:
+        trio.run(main)
+
+
+# basic stream terminations:
+# - callee context closes without using stream
+# - caller context closes without using stream
+# - caller context calls `Context.cancel()` while streaming
+#   is ongoing resulting in callee being cancelled
+# - callee calls `Context.cancel()` while streaming and caller
+#   sees stream terminated in `RemoteActorError`
+
+# TODO: future possible features
+# - restart request: far end raises `ContextRestart`
+
+
+@tractor.context
+async def close_ctx_immediately(
+
+    ctx: tractor.Context,
+
+) -> None:
+
+    await ctx.started()
+    global _state
+
+    async with ctx.open_stream():
+        pass
+
+
+@tractor_test
+async def test_callee_closes_ctx_after_stream_open():
+    'callee context closes without using stream'
+
+    async with tractor.open_nursery() as n:
+
+        portal = await n.start_actor(
+            'fast_stream_closer',
+            enable_modules=[__name__],
+        )
+
+        with trio.fail_after(2):
+            async with portal.open_context(
+                close_ctx_immediately,
+
+                # flag to avoid waiting the final result
+                # cancel_on_exit=True,
+
+            ) as (ctx, sent):
+
+                assert sent is None
+
+                with trio.fail_after(0.5):
+                    async with ctx.open_stream() as stream:
+
+                        # should fall through since ``StopAsyncIteration``
+                        # should be raised through translation of
+                        # a ``trio.EndOfChannel`` by
+                        # ``trio.abc.ReceiveChannel.__anext__()``
+                        async for _ in stream:
+                            assert 0
+                        else:
+
+                            # verify stream is now closed
+                            try:
+                                await stream.receive()
+                            except trio.EndOfChannel:
+                                pass
+
+                # TODO: should be just raise the closed resource err
+                # directly here to enforce not allowing a re-open
+                # of a stream to the context (at least until a time of
+                # if/when we decide that's a good idea?)
+                try:
+                    with trio.fail_after(0.5):
+                        async with ctx.open_stream() as stream:
+                            pass
+                except trio.ClosedResourceError:
+                    pass
+
+        await portal.cancel_actor()
+
+
+@tractor.context
+async def expect_cancelled(
+
+    ctx: tractor.Context,
+
+) -> None:
+    global _state
+    _state = True
+
+    await ctx.started()
+
+    try:
+        async with ctx.open_stream() as stream:
+            async for msg in stream:
+                await stream.send(msg)  # echo server
+
+    except trio.Cancelled:
+        # expected case
+        _state = False
+        raise
+
+    else:
+        assert 0, "Wasn't cancelled!?"
+
+
+@pytest.mark.parametrize(
+    'use_ctx_cancel_method',
+    [False, True],
+)
+@tractor_test
+async def test_caller_closes_ctx_after_callee_opens_stream(
+    use_ctx_cancel_method: bool,
+):
+    'caller context closes without using stream'
+
+    async with tractor.open_nursery() as n:
+
+        portal = await n.start_actor(
+            'ctx_cancelled',
+            enable_modules=[__name__],
+        )
+
+        async with portal.open_context(
+            expect_cancelled,
+        ) as (ctx, sent):
+            await portal.run(assert_state, value=True)
+
+            assert sent is None
+
+            # call cancel explicitly
+            if use_ctx_cancel_method:
+
+                await ctx.cancel()
+
+                try:
+                    async with ctx.open_stream() as stream:
+                        async for msg in stream:
+                            pass
+
+                except tractor.ContextCancelled:
+                    raise  # XXX: must be propagated to __aexit__
+
+                else:
+                    assert 0, "Should have context cancelled?"
+
+                # channel should still be up
+                assert portal.channel.connected()
+
+                # ctx is closed here
+                await portal.run(assert_state, value=False)
+
+            else:
+                try:
+                    with trio.fail_after(0.2):
+                        await ctx.result()
+                        assert 0, "Callee should have blocked!?"
+                except trio.TooSlowError:
+                    await ctx.cancel()
+        try:
+            async with ctx.open_stream() as stream:
+                async for msg in stream:
+                    pass
+        except tractor.ContextCancelled:
+            pass
+        else:
+            assert 0, "Should have received closed resource error?"
+
+        # ctx is closed here
+        await portal.run(assert_state, value=False)
+
+        # channel should not have been destroyed yet, only the
+        # inter-actor-task context
+        assert portal.channel.connected()
+
+        # teardown the actor
+        await portal.cancel_actor()
+
+
+@tractor_test
+async def test_multitask_caller_cancels_from_nonroot_task():
+
+    async with tractor.open_nursery() as n:
+
+        portal = await n.start_actor(
+            'ctx_cancelled',
+            enable_modules=[__name__],
+        )
+
+        async with portal.open_context(
+            expect_cancelled,
+        ) as (ctx, sent):
+
+            await portal.run(assert_state, value=True)
+            assert sent is None
+
+            async with ctx.open_stream() as stream:
+
+                async def send_msg_then_cancel():
+                    await stream.send('yo')
+                    await portal.run(assert_state, value=True)
+                    await ctx.cancel()
+                    await portal.run(assert_state, value=False)
+
+                async with trio.open_nursery() as n:
+                    n.start_soon(send_msg_then_cancel)
+
+                    try:
+                        async for msg in stream:
+                            assert msg == 'yo'
+
+                    except tractor.ContextCancelled:
+                        raise  # XXX: must be propagated to __aexit__
+
+                # channel should still be up
+                assert portal.channel.connected()
+
+                # ctx is closed here
+                await portal.run(assert_state, value=False)
+
+        # channel should not have been destroyed yet, only the
+        # inter-actor-task context
+        assert portal.channel.connected()
+
+        # teardown the actor
+        await portal.cancel_actor()
+
+
+@tractor.context
+async def cancel_self(
+
+    ctx: tractor.Context,
+
+) -> None:
+    global _state
+    _state = True
+
+    await ctx.cancel()
+
+    # should inline raise immediately
+    try:
+        async with ctx.open_stream():
+            pass
+    except tractor.ContextCancelled:
+        # suppress for now so we can do checkpoint tests below
+        pass
+    else:
+        raise RuntimeError('Context didnt cancel itself?!')
+
+    # check a real ``trio.Cancelled`` is raised on a checkpoint
+    try:
+        with trio.fail_after(0.1):
+            await trio.sleep_forever()
+    except trio.Cancelled:
+        raise
+
+    except trio.TooSlowError:
+        # should never get here
+        assert 0
+
+
+@tractor_test
+async def test_callee_cancels_before_started():
+    '''
+    Callee calls `Context.cancel()` while streaming and caller
+    sees stream terminated in `ContextCancelled`.
+
+    '''
+    async with tractor.open_nursery() as n:
+
+        portal = await n.start_actor(
+            'cancels_self',
+            enable_modules=[__name__],
+        )
+        try:
+
+            async with portal.open_context(
+                cancel_self,
+            ) as (ctx, sent):
+                async with ctx.open_stream():
+
+                    await trio.sleep_forever()
+
+        # raises a special cancel signal
+        except tractor.ContextCancelled as ce:
+            ce.type == trio.Cancelled
+
+            # the traceback should be informative
+            assert 'cancelled itself' in ce.msgdata['tb_str']
+
+        # teardown the actor
+        await portal.cancel_actor()
+
+
+@tractor.context
+async def never_open_stream(
+
+    ctx:  tractor.Context,
+
+) -> None:
+    '''
+    Context which never opens a stream and blocks.
+
+    '''
+    await ctx.started()
+    await trio.sleep_forever()
+
+
+@tractor.context
+async def keep_sending_from_callee(
+
+    ctx:  tractor.Context,
+    msg_buffer_size: Optional[int] = None,
+
+) -> None:
+    '''
+    Send endlessly on the calleee stream.
+
+    '''
+    await ctx.started()
+    async with ctx.open_stream(
+        msg_buffer_size=msg_buffer_size,
+    ) as stream:
+        for msg in count():
+            print(f'callee sending {msg}')
+            await stream.send(msg)
+            await trio.sleep(0.01)
+
+
+@pytest.mark.parametrize(
+    'overrun_by',
+    [
+        ('caller', 1, never_open_stream),
+        ('cancel_caller_during_overrun', 1, never_open_stream),
+        ('callee', 0, keep_sending_from_callee),
+    ],
+    ids='overrun_condition={}'.format,
+)
+def test_one_end_stream_not_opened(overrun_by):
+    '''
+    This should exemplify the bug from:
+    https://github.com/goodboy/tractor/issues/265
+
+    '''
+    overrunner, buf_size_increase, entrypoint = overrun_by
+    from tractor._runtime import Actor
+    buf_size = buf_size_increase + Actor.msg_buffer_size
+
+    async def main():
+        async with tractor.open_nursery() as n:
+            portal = await n.start_actor(
+                entrypoint.__name__,
+                enable_modules=[__name__],
+            )
+
+            async with portal.open_context(
+                entrypoint,
+            ) as (ctx, sent):
+                assert sent is None
+
+                if 'caller' in overrunner:
+
+                    async with ctx.open_stream() as stream:
+                        for i in range(buf_size):
+                            print(f'sending {i}')
+                            await stream.send(i)
+
+                        if 'cancel' in overrunner:
+                            # without this we block waiting on the child side
+                            await ctx.cancel()
+
+                        else:
+                            # expect overrun error to be relayed back
+                            # and this sleep interrupted
+                            await trio.sleep_forever()
+
+                else:
+                    # callee overruns caller case so we do nothing here
+                    await trio.sleep_forever()
+
+            await portal.cancel_actor()
+
+    # 2 overrun cases and the no overrun case (which pushes right up to
+    # the msg limit)
+    if overrunner == 'caller' or 'cance' in overrunner:
+        with pytest.raises(tractor.RemoteActorError) as excinfo:
+            trio.run(main)
+
+        assert excinfo.value.type == StreamOverrun
+
+    elif overrunner == 'callee':
+        with pytest.raises(tractor.RemoteActorError) as excinfo:
+            trio.run(main)
+
+        # TODO: embedded remote errors so that we can verify the source
+        # error? the callee delivers an error which is an overrun
+        # wrapped in a remote actor error.
+        assert excinfo.value.type == tractor.RemoteActorError
+
+    else:
+        trio.run(main)
+
+
+@tractor.context
+async def echo_back_sequence(
+
+    ctx:  tractor.Context,
+    seq: list[int],
+    msg_buffer_size: Optional[int] = None,
+
+) -> None:
+    '''
+    Send endlessly on the calleee stream.
+
+    '''
+    await ctx.started()
+    async with ctx.open_stream(
+        msg_buffer_size=msg_buffer_size,
+    ) as stream:
+
+        seq = list(seq)  # bleh, `msgpack`...
+        count = 0
+        while count < 3:
+            batch = []
+            async for msg in stream:
+                batch.append(msg)
+                if batch == seq:
+                    break
+
+            for msg in batch:
+                print(f'callee sending {msg}')
+                await stream.send(msg)
+
+            count += 1
+
+        return 'yo'
+
+
+def test_stream_backpressure():
+    '''
+    Demonstrate small overruns of each task back and forth
+    on a stream not raising any errors by default.
+
+    '''
+    async def main():
+        async with tractor.open_nursery() as n:
+            portal = await n.start_actor(
+                'callee_sends_forever',
+                enable_modules=[__name__],
+            )
+            seq = list(range(3))
+            async with portal.open_context(
+                echo_back_sequence,
+                seq=seq,
+                msg_buffer_size=1,
+            ) as (ctx, sent):
+                assert sent is None
+
+                async with ctx.open_stream(msg_buffer_size=1) as stream:
+                    count = 0
+                    while count < 3:
+                        for msg in seq:
+                            print(f'caller sending {msg}')
+                            await stream.send(msg)
+                            await trio.sleep(0.1)
+
+                        batch = []
+                        async for msg in stream:
+                            batch.append(msg)
+                            if batch == seq:
+                                break
+
+                        count += 1
+
+            # here the context should return
+            assert await ctx.result() == 'yo'
+
+            # cancel the daemon
+            await portal.cancel_actor()
+
+    trio.run(main)
+
+
+@tractor.context
+async def sleep_forever(
+    ctx: tractor.Context,
+) -> None:
+    await ctx.started()
+    async with ctx.open_stream():
+        await trio.sleep_forever()
+
+
+@acm
+async def attach_to_sleep_forever():
+    '''
+    Cancel a context **before** any underlying error is raised in order
+    to trigger a local reception of a ``ContextCancelled`` which **should not**
+    be re-raised in the local surrounding ``Context`` *iff* the cancel was
+    requested by **this** side of the context.
+
+    '''
+    async with tractor.wait_for_actor('sleeper') as p2:
+        async with (
+            p2.open_context(sleep_forever) as (peer_ctx, first),
+            peer_ctx.open_stream(),
+        ):
+            try:
+                yield
+            finally:
+                # XXX: previously this would trigger local
+                # ``ContextCancelled`` to be received and raised in the
+                # local context overriding any local error due to
+                # logic inside ``_invoke()`` which checked for
+                # an error set on ``Context._error`` and raised it in
+                # under a cancellation scenario.
+
+                # The problem is you can have a remote cancellation
+                # that is part of a local error and we shouldn't raise
+                # ``ContextCancelled`` **iff** we weren't the side of
+                # the context to initiate it, i.e.
+                # ``Context._cancel_called`` should **NOT** have been
+                # set. The special logic to handle this case is now
+                # inside ``Context._may_raise_from_remote_msg()`` XD
+                await peer_ctx.cancel()
+
+
+@tractor.context
+async def error_before_started(
+    ctx: tractor.Context,
+) -> None:
+    '''
+    This simulates exactly an original bug discovered in:
+    https://github.com/pikers/piker/issues/244
+
+    '''
+    async with attach_to_sleep_forever():
+        # send an unserializable type which should raise a type error
+        # here and **NOT BE SWALLOWED** by the surrounding acm!!?!
+        await ctx.started(object())
+
+
+def test_do_not_swallow_error_before_started_by_remote_contextcancelled():
+    '''
+    Verify that an error raised in a remote context which itself opens another
+    remote context, which it cancels, does not ovverride the original error that
+    caused the cancellation of the secondardy context.
+
+    '''
+    async def main():
+        async with tractor.open_nursery() as n:
+            portal = await n.start_actor(
+                'errorer',
+                enable_modules=[__name__],
+            )
+            await n.start_actor(
+                'sleeper',
+                enable_modules=[__name__],
+            )
+
+            async with (
+                portal.open_context(
+                    error_before_started
+                ) as (ctx, sent),
+            ):
+                await trio.sleep_forever()
+
+    with pytest.raises(tractor.RemoteActorError) as excinfo:
+        trio.run(main)
+
+    assert excinfo.value.type == TypeError
--- a/tests/test_debugger.py
+++ b/tests/test_debugger.py
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@ -42,7 +42,7 @@ async def test_reg_then_unreg(arb_addr):

        await trio.sleep(0.1)
        assert uid not in aportal.actor._registry
-        sockaddrs = actor._registry[uid]
+        sockaddrs = actor._registry.get(uid)
        assert not sockaddrs


@ -116,11 +116,26 @@ async def stream_from(portal):
            print(value)


+async def unpack_reg(actor_or_portal):
+    '''
+    Get and unpack a "registry" RPC request from the "arbiter" registry
+    system.
+
+    '''
+    if getattr(actor_or_portal, 'get_registry', None):
+        msg = await actor_or_portal.get_registry()
+    else:
+        msg = await actor_or_portal.run_from_ns('self', 'get_registry')
+
+    return {tuple(key.split('.')): val for key, val in msg.items()}
+
+
 async def spawn_and_check_registry(
    arb_addr: tuple,
    use_signal: bool,
    remote_arbiter: bool = False,
    with_streaming: bool = False,
+
 ) -> None:

    async with tractor.open_root_actor(
@ -134,13 +149,11 @@ async def spawn_and_check_registry(
                assert not actor.is_arbiter

            if actor.is_arbiter:
-
-                async def get_reg():
-                    return actor._registry
-
                extra = 1  # arbiter is local root actor
+                get_reg = partial(unpack_reg, actor)
+
            else:
-                get_reg = partial(portal.run_from_ns, 'self', 'get_registry')
+                get_reg = partial(unpack_reg, portal)
                extra = 2  # local root actor + remote arbiter

            # ensure current actor is registered
@ -187,13 +200,12 @@ async def spawn_and_check_registry(
                            await cancel(use_signal)

            finally:
-                with trio.CancelScope(shield=True):
-                    await trio.sleep(0.5)
+                await trio.sleep(0.5)

-                    # all subactors should have de-registered
-                    registry = await get_reg()
-                    assert len(registry) == extra
-                    assert actor.uid in registry
+                # all subactors should have de-registered
+                registry = await get_reg()
+                assert len(registry) == extra
+                assert actor.uid in registry


@pytest.mark.parametrize('use_signal', [False, True])
@ -267,7 +279,7 @@ async def close_chans_before_nursery(
    ):
        async with tractor.get_arbiter(*arb_addr) as aportal:
            try:
-                get_reg = partial(aportal.run_from_ns, 'self', 'get_registry')
+                get_reg = partial(unpack_reg, aportal)

                async with tractor.open_nursery() as tn:
                    portal1 = await tn.start_actor(
@ -277,7 +289,9 @@ async def close_chans_before_nursery(

                    # TODO: compact this back as was in last commit once
                    # 3.9+, see https://github.com/goodboy/tractor/issues/207
-                    async with portal1.open_stream_from(stream_forever) as agen1:
+                    async with portal1.open_stream_from(
+                        stream_forever
+                    ) as agen1:
                        async with portal2.open_stream_from(
                            stream_forever
                        ) as agen2:
@ -293,8 +307,9 @@ async def close_chans_before_nursery(
                                    # reliably triggered by an external SIGINT.
                                    # tractor.current_actor()._root_nursery.cancel_scope.cancel()

-                                    # XXX: THIS IS THE KEY THING that happens
-                                    # **before** exiting the actor nursery block
+                                    # XXX: THIS IS THE KEY THING that
+                                    # happens **before** exiting the
+                                    # actor nursery block

                                    # also kill off channels cuz why not
                                    await agen1.aclose()
--- a/tests/test_docs_examples.py
+++ b/tests/test_docs_examples.py
@ -1,6 +1,7 @@
-"""
+'''
 Let's make sure them docs work yah?
-"""
+
+'''
 from contextlib import contextmanager
 import itertools
 import os
@ -11,17 +12,17 @@ import shutil

 import pytest

-from conftest import repodir
-
-
-def examples_dir():
-    """Return the abspath to the examples directory.
-    """
-    return os.path.join(repodir(), 'examples')
+from conftest import (
+    examples_dir,
+)


@pytest.fixture
-def run_example_in_subproc(loglevel, testdir, arb_addr):
+def run_example_in_subproc(
+    loglevel: str,
+    testdir,
+    arb_addr: tuple[str, int],
+):

    @contextmanager
    def run(script_code):
@ -31,8 +32,8 @@ def run_example_in_subproc(loglevel, testdir, arb_addr):
            # on windows we need to create a special __main__.py which will
            # be executed with ``python -m <modulename>`` on windows..
            shutil.copyfile(
-                os.path.join(examples_dir(), '__main__.py'),
-                os.path.join(str(testdir), '__main__.py')
+                examples_dir() / '__main__.py',
+                str(testdir / '__main__.py'),
            )

            # drop the ``if __name__ == '__main__'`` guard onwards from
@ -80,11 +81,15 @@ def run_example_in_subproc(loglevel, testdir, arb_addr):
    'example_script',

    # walk yields: (dirpath, dirnames, filenames)
-    [(p[0], f) for p in os.walk(examples_dir()) for f in p[2]
+    [
+        (p[0], f) for p in os.walk(examples_dir()) for f in p[2]

        if '__' not in f
        and f[0] != '_'
-        and 'debugging' not in p[0]],
+        and 'debugging' not in p[0]
+        and 'integration' not in p[0]
+        and 'advanced_faults' not in p[0]
+    ],

    ids=lambda t: t[1],
 )
@ -112,9 +117,19 @@ def test_example(run_example_in_subproc, example_script):
            # print(f'STDOUT: {out}')

            # if we get some gnarly output let's aggregate and raise
-            errmsg = err.decode()
-            errlines = errmsg.splitlines()
-            if err and 'Error' in errlines[-1]:
-                raise Exception(errmsg)
+            if err:
+                errmsg = err.decode()
+                errlines = errmsg.splitlines()
+                last_error = errlines[-1]
+                if (
+                    'Error' in last_error
+
+                    # XXX: currently we print this to console, but maybe
+                    # shouldn't eventually once we figure out what's
+                    # a better way to be explicit about aio side
+                    # cancels?
+                    and 'asyncio.exceptions.CancelledError' not in last_error
+                ):
+                    raise Exception(errmsg)

            assert proc.returncode == 0
--- a/tests/test_infected_asyncio.py
+++ b/tests/test_infected_asyncio.py
@ -0,0 +1,564 @@
+'''
+The hipster way to force SC onto the stdlib's "async": 'infection mode'.
+
+'''
+from typing import Optional, Iterable, Union
+import asyncio
+import builtins
+import itertools
+import importlib
+
+from exceptiongroup import BaseExceptionGroup
+import pytest
+import trio
+import tractor
+from tractor import (
+    to_asyncio,
+    RemoteActorError,
+)
+from tractor.trionics import BroadcastReceiver
+
+
+async def sleep_and_err(
+    sleep_for: float = 0.1,
+
+    # just signature placeholders for compat with
+    # ``to_asyncio.open_channel_from()``
+    to_trio: Optional[trio.MemorySendChannel] = None,
+    from_trio: Optional[asyncio.Queue] = None,
+
+):
+    if to_trio:
+        to_trio.send_nowait('start')
+
+    await asyncio.sleep(sleep_for)
+    assert 0
+
+
+async def sleep_forever():
+    await asyncio.sleep(float('inf'))
+
+
+async def trio_cancels_single_aio_task():
+
+    # spawn an ``asyncio`` task to run a func and return result
+    with trio.move_on_after(.2):
+        await tractor.to_asyncio.run_task(sleep_forever)
+
+
+def test_trio_cancels_aio_on_actor_side(arb_addr):
+    '''
+    Spawn an infected actor that is cancelled by the ``trio`` side
+    task using std cancel scope apis.
+
+    '''
+    async def main():
+        async with tractor.open_nursery(
+            arbiter_addr=arb_addr
+        ) as n:
+            await n.run_in_actor(
+                trio_cancels_single_aio_task,
+                infect_asyncio=True,
+            )
+
+    trio.run(main)
+
+
+async def asyncio_actor(
+
+    target: str,
+    expect_err: Optional[Exception] = None
+
+) -> None:
+
+    assert tractor.current_actor().is_infected_aio()
+    target = globals()[target]
+
+    if '.' in expect_err:
+        modpath, _, name = expect_err.rpartition('.')
+        mod = importlib.import_module(modpath)
+        error_type = getattr(mod, name)
+
+    else:  # toplevel builtin error type
+        error_type = builtins.__dict__.get(expect_err)
+
+    try:
+        # spawn an ``asyncio`` task to run a func and return result
+        await tractor.to_asyncio.run_task(target)
+
+    except BaseException as err:
+        if expect_err:
+            assert isinstance(err, error_type)
+
+        raise
+
+
+def test_aio_simple_error(arb_addr):
+    '''
+    Verify a simple remote asyncio error propagates back through trio
+    to the parent actor.
+
+
+    '''
+    async def main():
+        async with tractor.open_nursery(
+            arbiter_addr=arb_addr
+        ) as n:
+            await n.run_in_actor(
+                asyncio_actor,
+                target='sleep_and_err',
+                expect_err='AssertionError',
+                infect_asyncio=True,
+            )
+
+    with pytest.raises(RemoteActorError) as excinfo:
+        trio.run(main)
+
+    err = excinfo.value
+    assert isinstance(err, RemoteActorError)
+    assert err.type == AssertionError
+
+
+def test_tractor_cancels_aio(arb_addr):
+    '''
+    Verify we can cancel a spawned asyncio task gracefully.
+
+    '''
+    async def main():
+        async with tractor.open_nursery() as n:
+            portal = await n.run_in_actor(
+                asyncio_actor,
+                target='sleep_forever',
+                expect_err='trio.Cancelled',
+                infect_asyncio=True,
+            )
+            # cancel the entire remote runtime
+            await portal.cancel_actor()
+
+    trio.run(main)
+
+
+def test_trio_cancels_aio(arb_addr):
+    '''
+    Much like the above test with ``tractor.Portal.cancel_actor()``
+    except we just use a standard ``trio`` cancellation api.
+
+    '''
+    async def main():
+
+        with trio.move_on_after(1):
+            # cancel the nursery shortly after boot
+
+            async with tractor.open_nursery() as n:
+                await n.run_in_actor(
+                    asyncio_actor,
+                    target='sleep_forever',
+                    expect_err='trio.Cancelled',
+                    infect_asyncio=True,
+                )
+
+    trio.run(main)
+
+
+@tractor.context
+async def trio_ctx(
+    ctx: tractor.Context,
+):
+
+    await ctx.started('start')
+
+    # this will block until the ``asyncio`` task sends a "first"
+    # message.
+    with trio.fail_after(2):
+        async with (
+            trio.open_nursery() as n,
+
+            tractor.to_asyncio.open_channel_from(
+                sleep_and_err,
+            ) as (first, chan),
+        ):
+
+            assert first == 'start'
+
+            # spawn another asyncio task for the cuck of it.
+            n.start_soon(
+                tractor.to_asyncio.run_task,
+                sleep_forever,
+            )
+            await trio.sleep_forever()
+
+
+@pytest.mark.parametrize(
+    'parent_cancels', [False, True],
+    ids='parent_actor_cancels_child={}'.format
+)
+def test_context_spawns_aio_task_that_errors(
+    arb_addr,
+    parent_cancels: bool,
+):
+    '''
+    Verify that spawning a task via an intertask channel ctx mngr that
+    errors correctly propagates the error back from the `asyncio`-side
+    task.
+
+    '''
+    async def main():
+
+        with trio.fail_after(2):
+            async with tractor.open_nursery() as n:
+                p = await n.start_actor(
+                    'aio_daemon',
+                    enable_modules=[__name__],
+                    infect_asyncio=True,
+                    # debug_mode=True,
+                    loglevel='cancel',
+                )
+                async with p.open_context(
+                    trio_ctx,
+                ) as (ctx, first):
+
+                    assert first == 'start'
+
+                    if parent_cancels:
+                        await p.cancel_actor()
+
+                    await trio.sleep_forever()
+
+    with pytest.raises(RemoteActorError) as excinfo:
+        trio.run(main)
+
+    err = excinfo.value
+    assert isinstance(err, RemoteActorError)
+    if parent_cancels:
+        assert err.type == trio.Cancelled
+    else:
+        assert err.type == AssertionError
+
+
+async def aio_cancel():
+    ''''
+    Cancel urself boi.
+
+    '''
+    await asyncio.sleep(0.5)
+    task = asyncio.current_task()
+
+    # cancel and enter sleep
+    task.cancel()
+    await sleep_forever()
+
+
+def test_aio_cancelled_from_aio_causes_trio_cancelled(arb_addr):
+
+    async def main():
+        async with tractor.open_nursery() as n:
+            await n.run_in_actor(
+                asyncio_actor,
+                target='aio_cancel',
+                expect_err='tractor.to_asyncio.AsyncioCancelled',
+                infect_asyncio=True,
+            )
+
+    with pytest.raises(RemoteActorError) as excinfo:
+        trio.run(main)
+
+    # ensure boxed error is correct
+    assert excinfo.value.type == to_asyncio.AsyncioCancelled
+
+
+# TODO: verify open_channel_from will fail on this..
+async def no_to_trio_in_args():
+    pass
+
+
+async def push_from_aio_task(
+
+    sequence: Iterable,
+    to_trio: trio.abc.SendChannel,
+    expect_cancel: False,
+    fail_early: bool,
+
+) -> None:
+
+    try:
+        # sync caller ctx manager
+        to_trio.send_nowait(True)
+
+        for i in sequence:
+            print(f'asyncio sending {i}')
+            to_trio.send_nowait(i)
+            await asyncio.sleep(0.001)
+
+            if i == 50 and fail_early:
+                raise Exception
+
+        print('asyncio streamer complete!')
+
+    except asyncio.CancelledError:
+        if not expect_cancel:
+            pytest.fail("aio task was cancelled unexpectedly")
+        raise
+    else:
+        if expect_cancel:
+            pytest.fail("aio task wasn't cancelled as expected!?")
+
+
+async def stream_from_aio(
+
+    exit_early: bool = False,
+    raise_err: bool = False,
+    aio_raise_err: bool = False,
+    fan_out: bool = False,
+
+) -> None:
+    seq = range(100)
+    expect = list(seq)
+
+    try:
+        pulled = []
+
+        async with to_asyncio.open_channel_from(
+            push_from_aio_task,
+            sequence=seq,
+            expect_cancel=raise_err or exit_early,
+            fail_early=aio_raise_err,
+        ) as (first, chan):
+
+            assert first is True
+
+            async def consume(
+                chan: Union[
+                    to_asyncio.LinkedTaskChannel,
+                    BroadcastReceiver,
+                ],
+            ):
+                async for value in chan:
+                    print(f'trio received {value}')
+                    pulled.append(value)
+
+                    if value == 50:
+                        if raise_err:
+                            raise Exception
+                        elif exit_early:
+                            break
+
+            if fan_out:
+                # start second task that get's the same stream value set.
+                async with (
+
+                    # NOTE: this has to come first to avoid
+                    # the channel being closed before the nursery
+                    # tasks are joined..
+                    chan.subscribe() as br,
+
+                    trio.open_nursery() as n,
+                ):
+                    n.start_soon(consume, br)
+                    await consume(chan)
+
+            else:
+                await consume(chan)
+    finally:
+
+        if (
+            not raise_err and
+            not exit_early and
+            not aio_raise_err
+        ):
+            if fan_out:
+                # we get double the pulled values in the
+                # ``.subscribe()`` fan out case.
+                doubled = list(itertools.chain(*zip(expect, expect)))
+                expect = doubled[:len(pulled)]
+                assert list(sorted(pulled)) == expect
+
+            else:
+                assert pulled == expect
+        else:
+            assert not fan_out
+            assert pulled == expect[:51]
+
+        print('trio guest mode task completed!')
+
+
+@pytest.mark.parametrize(
+    'fan_out', [False, True],
+    ids='fan_out_w_chan_subscribe={}'.format
+)
+def test_basic_interloop_channel_stream(arb_addr, fan_out):
+    async def main():
+        async with tractor.open_nursery() as n:
+            portal = await n.run_in_actor(
+                stream_from_aio,
+                infect_asyncio=True,
+                fan_out=fan_out,
+            )
+            await portal.result()
+
+    trio.run(main)
+
+
+# TODO: parametrize the above test and avoid the duplication here?
+def test_trio_error_cancels_intertask_chan(arb_addr):
+    async def main():
+        async with tractor.open_nursery() as n:
+            portal = await n.run_in_actor(
+                stream_from_aio,
+                raise_err=True,
+                infect_asyncio=True,
+            )
+            # should trigger remote actor error
+            await portal.result()
+
+    with pytest.raises(BaseExceptionGroup) as excinfo:
+        trio.run(main)
+
+    # ensure boxed errors
+    for exc in excinfo.value.exceptions:
+        assert exc.type == Exception
+
+
+def test_trio_closes_early_and_channel_exits(arb_addr):
+    async def main():
+        async with tractor.open_nursery() as n:
+            portal = await n.run_in_actor(
+                stream_from_aio,
+                exit_early=True,
+                infect_asyncio=True,
+            )
+            # should trigger remote actor error
+            await portal.result()
+
+    # should be a quiet exit on a simple channel exit
+    trio.run(main)
+
+
+def test_aio_errors_and_channel_propagates_and_closes(arb_addr):
+    async def main():
+        async with tractor.open_nursery() as n:
+            portal = await n.run_in_actor(
+                stream_from_aio,
+                aio_raise_err=True,
+                infect_asyncio=True,
+            )
+            # should trigger remote actor error
+            await portal.result()
+
+    with pytest.raises(BaseExceptionGroup) as excinfo:
+        trio.run(main)
+
+    # ensure boxed errors
+    for exc in excinfo.value.exceptions:
+        assert exc.type == Exception
+
+
+@tractor.context
+async def trio_to_aio_echo_server(
+    ctx: tractor.Context,
+):
+
+    async def aio_echo_server(
+        to_trio: trio.MemorySendChannel,
+        from_trio: asyncio.Queue,
+    ) -> None:
+
+        to_trio.send_nowait('start')
+
+        while True:
+            msg = await from_trio.get()
+
+            # echo the msg back
+            to_trio.send_nowait(msg)
+
+            # if we get the terminate sentinel
+            # break the echo loop
+            if msg is None:
+                print('breaking aio echo loop')
+                break
+
+        print('exiting asyncio task')
+
+    async with to_asyncio.open_channel_from(
+        aio_echo_server,
+    ) as (first, chan):
+
+        assert first == 'start'
+        await ctx.started(first)
+
+        async with ctx.open_stream() as stream:
+
+            async for msg in stream:
+                print(f'asyncio echoing {msg}')
+                await chan.send(msg)
+
+                out = await chan.receive()
+                # echo back to parent actor-task
+                await stream.send(out)
+
+                if out is None:
+                    try:
+                        out = await chan.receive()
+                    except trio.EndOfChannel:
+                        break
+                    else:
+                        raise RuntimeError('aio channel never stopped?')
+
+
+@pytest.mark.parametrize(
+    'raise_error_mid_stream',
+    [False, Exception, KeyboardInterrupt],
+    ids='raise_error={}'.format,
+)
+def test_echoserver_detailed_mechanics(
+    arb_addr,
+    raise_error_mid_stream,
+):
+
+    async def main():
+        async with tractor.open_nursery() as n:
+            p = await n.start_actor(
+                'aio_server',
+                enable_modules=[__name__],
+                infect_asyncio=True,
+            )
+            async with p.open_context(
+                trio_to_aio_echo_server,
+            ) as (ctx, first):
+
+                assert first == 'start'
+
+                async with ctx.open_stream() as stream:
+                    for i in range(100):
+                        await stream.send(i)
+                        out = await stream.receive()
+                        assert i == out
+
+                        if raise_error_mid_stream and i == 50:
+                            raise raise_error_mid_stream
+
+                    # send terminate msg
+                    await stream.send(None)
+                    out = await stream.receive()
+                    assert out is None
+
+                    if out is None:
+                        # ensure the stream is stopped
+                        # with trio.fail_after(0.1):
+                        try:
+                            await stream.receive()
+                        except trio.EndOfChannel:
+                            pass
+                        else:
+                            pytest.fail(
+                                "stream wasn't stopped after sentinel?!")
+
+            # TODO: the case where this blocks and
+            # is cancelled by kbi or out of task cancellation
+            await p.cancel_actor()
+
+    if raise_error_mid_stream:
+        with pytest.raises(raise_error_mid_stream):
+            trio.run(main)
+
+    else:
+        trio.run(main)
--- a/tests/test_legacy_one_way_streaming.py
+++ b/tests/test_legacy_one_way_streaming.py
@ -7,9 +7,10 @@ import platform

 import trio
 import tractor
-from tractor.testing import tractor_test
 import pytest

+from conftest import tractor_test
+

 def test_must_define_ctx():

@ -79,33 +80,36 @@ async def stream_from_single_subactor(

                seq = range(10)

-                async with portal.open_stream_from(
-                    stream_func,
-                    sequence=list(seq),  # has to be msgpack serializable
-                ) as stream:
+                with trio.fail_after(5):
+                    async with portal.open_stream_from(
+                        stream_func,
+                        sequence=list(seq),  # has to be msgpack serializable
+                    ) as stream:

-                    # it'd sure be nice to have an asyncitertools here...
-                    iseq = iter(seq)
-                    ival = next(iseq)
+                        # it'd sure be nice to have an asyncitertools here...
+                        iseq = iter(seq)
+                        ival = next(iseq)

-                    async for val in stream:
-                        assert val == ival
+                        async for val in stream:
+                            assert val == ival

+                            try:
+                                ival = next(iseq)
+                            except StopIteration:
+                                # should cancel far end task which will be
+                                # caught and no error is raised
+                                await stream.aclose()
+
+                        await trio.sleep(0.3)
+
+                        # ensure EOC signalled-state translates
+                        # XXX: not really sure this is correct,
+                        # shouldn't it be a `ClosedResourceError`?
                        try:
-                            ival = next(iseq)
-                        except StopIteration:
-                            # should cancel far end task which will be
-                            # caught and no error is raised
-                            await stream.aclose()
-
-                    await trio.sleep(0.3)
-
-                    try:
-                        await stream.__anext__()
-                    except StopAsyncIteration:
-                        # stop all spawned subactors
-                        await portal.cancel_actor()
-                    # await nursery.cancel()
+                            await stream.__anext__()
+                        except StopAsyncIteration:
+                            # stop all spawned subactors
+                            await portal.cancel_actor()


@pytest.mark.parametrize(
@ -132,7 +136,7 @@ async def stream_data(seed):
        yield i

        # trigger scheduler to simulate practical usage
-        await trio.sleep(0)
+        await trio.sleep(0.0001)


 # this is the third actor; the aggregator
@ -247,7 +251,7 @@ def test_a_quadruple_example(time_quad_ex, ci_env, spawn_backend):

    results, diff = time_quad_ex
    assert results
-    this_fast = 6 if platform.system() in ('Windows', 'Darwin') else 2.5
+    this_fast = 6 if platform.system() in ('Windows', 'Darwin') else 3
    assert diff < this_fast


@ -313,12 +317,12 @@ async def test_respawn_consumer_task(
                    task_status.started(cs)

                    # shield stream's underlying channel from cancellation
-                    with stream.shield():
+                    # with stream.shield():

-                        async for v in stream:
-                            print(f'from stream: {v}')
-                            expect.remove(v)
-                            received.append(v)
+                    async for v in stream:
+                        print(f'from stream: {v}')
+                        expect.remove(v)
+                        received.append(v)

                    print('exited consume')

--- a/tests/test_local.py
+++ b/tests/test_local.py
@ -11,25 +11,18 @@ from conftest import tractor_test


@pytest.mark.trio
-async def test_no_arbitter():
+async def test_no_runtime():
    """An arbitter must be established before any nurseries
    can be created.

    (In other words ``tractor.open_root_actor()`` must be engaged at
    some point?)
    """
-    with pytest.raises(RuntimeError):
-        with tractor.open_nursery():
+    with pytest.raises(RuntimeError) :
+        async with tractor.find_actor('doggy'):
            pass


-def test_no_main():
-    """An async function **must** be passed to ``tractor.run()``.
-    """
-    with pytest.raises(TypeError):
-        tractor.run(None)
-
-
@tractor_test
 async def test_self_is_registered(arb_addr):
    "Verify waiting on the arbiter to register itself using the standard api."
--- a/tests/test_pubsub.py
+++ b/tests/test_pubsub.py
@ -4,20 +4,22 @@ from itertools import cycle
 import pytest
 import trio
 import tractor
-from tractor.testing import tractor_test
+from tractor.experimental import msgpub
+
+from conftest import tractor_test


 def test_type_checks():

    with pytest.raises(TypeError) as err:
-        @tractor.msg.pub
+        @msgpub
        async def no_get_topics(yo):
            yield

    assert "must define a `get_topics`" in str(err.value)

    with pytest.raises(TypeError) as err:
-        @tractor.msg.pub
+        @msgpub
        def not_async_gen(yo):
            pass

@ -32,7 +34,7 @@ def is_even(i):
 _get_topics = None


-@tractor.msg.pub
+@msgpub
 async def pubber(get_topics, seed=10):

    # ensure topic subscriptions are as expected
@ -103,7 +105,7 @@ async def subs(
                await stream.aclose()


-@tractor.msg.pub(tasks=['one', 'two'])
+@msgpub(tasks=['one', 'two'])
 async def multilock_pubber(get_topics):
    yield {'doggy': 10}

@ -180,6 +182,7 @@ def test_multi_actor_subs_arbiter_pub(
                    'streamer',
                    enable_modules=[__name__],
                )
+                name = 'streamer'

            even_portal = await n.run_in_actor(
                subs,
--- a/tests/test_resource_cache.py
+++ b/tests/test_resource_cache.py
@ -0,0 +1,182 @@
+'''
+Async context manager cache api testing: ``trionics.maybe_open_context():``
+
+'''
+from contextlib import asynccontextmanager as acm
+import platform
+from typing import Awaitable
+
+import pytest
+import trio
+import tractor
+
+
+_resource: int = 0
+
+
+@acm
+async def maybe_increment_counter(task_name: str):
+    global _resource
+
+    _resource += 1
+    await trio.lowlevel.checkpoint()
+    yield _resource
+    await trio.lowlevel.checkpoint()
+    _resource -= 1
+
+
+@pytest.mark.parametrize(
+    'key_on',
+    ['key_value', 'kwargs'],
+    ids="key_on={}".format,
+)
+def test_resource_only_entered_once(key_on):
+    global _resource
+    _resource = 0
+
+    kwargs = {}
+    key = None
+    if key_on == 'key_value':
+        key = 'some_common_key'
+
+    async def main():
+        cache_active: bool = False
+
+        async def enter_cached_mngr(name: str):
+            nonlocal cache_active
+
+            if key_on == 'kwargs':
+                # make a common kwargs input to key on it
+                kwargs = {'task_name': 'same_task_name'}
+                assert key is None
+            else:
+                # different task names per task will be used
+                kwargs = {'task_name': name}
+
+            async with tractor.trionics.maybe_open_context(
+                maybe_increment_counter,
+                kwargs=kwargs,
+                key=key,
+
+            ) as (cache_hit, resource):
+                if cache_hit:
+                    try:
+                        cache_active = True
+                        assert resource == 1
+                        await trio.sleep_forever()
+                    finally:
+                        cache_active = False
+                else:
+                    assert resource == 1
+                    await trio.sleep_forever()
+
+        with trio.move_on_after(0.5):
+            async with (
+                tractor.open_root_actor(),
+                trio.open_nursery() as n,
+            ):
+
+                for i in range(10):
+                    n.start_soon(enter_cached_mngr, f'task_{i}')
+                    await trio.sleep(0.001)
+
+    trio.run(main)
+
+
+@tractor.context
+async def streamer(
+    ctx: tractor.Context,
+    seq: list[int] = list(range(1000)),
+) -> None:
+
+    await ctx.started()
+    async with ctx.open_stream() as stream:
+        for val in seq:
+            await stream.send(val)
+            await trio.sleep(0.001)
+
+    print('producer finished')
+
+
+@acm
+async def open_stream() -> Awaitable[tractor.MsgStream]:
+
+    async with tractor.open_nursery() as tn:
+        portal = await tn.start_actor('streamer', enable_modules=[__name__])
+        async with (
+            portal.open_context(streamer) as (ctx, first),
+            ctx.open_stream() as stream,
+        ):
+            yield stream
+
+        await portal.cancel_actor()
+    print('CANCELLED STREAMER')
+
+
+@acm
+async def maybe_open_stream(taskname: str):
+    async with tractor.trionics.maybe_open_context(
+        # NOTE: all secondary tasks should cache hit on the same key
+        acm_func=open_stream,
+    ) as (cache_hit, stream):
+
+        if cache_hit:
+            print(f'{taskname} loaded from cache')
+
+            # add a new broadcast subscription for the quote stream
+            # if this feed is already allocated by the first
+            # task that entereed
+            async with stream.subscribe() as bstream:
+                yield bstream
+        else:
+            # yield the actual stream
+            yield stream
+
+
+def test_open_local_sub_to_stream():
+    '''
+    Verify a single inter-actor stream can can be fanned-out shared to
+    N local tasks using ``trionics.maybe_open_context():``.
+
+    '''
+    timeout = 3 if platform.system() != "Windows" else 10
+
+    async def main():
+
+        full = list(range(1000))
+
+        async def get_sub_and_pull(taskname: str):
+            async with (
+                maybe_open_stream(taskname) as stream,
+            ):
+                if '0' in taskname:
+                    assert isinstance(stream, tractor.MsgStream)
+                else:
+                    assert isinstance(
+                        stream,
+                        tractor.trionics.BroadcastReceiver
+                    )
+
+                first = await stream.receive()
+                print(f'{taskname} started with value {first}')
+                seq = []
+                async for msg in stream:
+                    seq.append(msg)
+
+                assert set(seq).issubset(set(full))
+            print(f'{taskname} finished')
+
+        with trio.fail_after(timeout):
+            # TODO: turns out this isn't multi-task entrant XD
+            # We probably need an indepotent entry semantic?
+            async with tractor.open_root_actor():
+                async with (
+                    trio.open_nursery() as nurse,
+                ):
+                    for i in range(10):
+                        nurse.start_soon(get_sub_and_pull, f'task_{i}')
+                        await trio.sleep(0.001)
+
+                print('all consumer tasks finished')
+
+    trio.run(main)
--- a/tests/test_runtime.py
+++ b/tests/test_runtime.py
@ -0,0 +1,73 @@
+"""
+Verifying internal runtime state and undocumented extras.
+
+"""
+import os
+
+import pytest
+import trio
+import tractor
+
+from conftest import tractor_test
+
+
+_file_path: str = ''
+
+
+def unlink_file():
+    print('Removing tmp file!')
+    os.remove(_file_path)
+
+
+async def crash_and_clean_tmpdir(
+    tmp_file_path: str,
+    error: bool = True,
+):
+    global _file_path
+    _file_path = tmp_file_path
+
+    actor = tractor.current_actor()
+    actor.lifetime_stack.callback(unlink_file)
+
+    assert os.path.isfile(tmp_file_path)
+    await trio.sleep(0.1)
+    if error:
+        assert 0
+    else:
+        actor.cancel_soon()
+
+
+@pytest.mark.parametrize(
+    'error_in_child',
+    [True, False],
+)
+@tractor_test
+async def test_lifetime_stack_wipes_tmpfile(
+    tmp_path,
+    error_in_child: bool,
+):
+    child_tmp_file = tmp_path / "child.txt"
+    child_tmp_file.touch()
+    assert child_tmp_file.exists()
+    path = str(child_tmp_file)
+
+    try:
+        with trio.move_on_after(0.5):
+            async with tractor.open_nursery() as n:
+                    await (  # inlined portal
+                        await n.run_in_actor(
+                            crash_and_clean_tmpdir,
+                            tmp_file_path=path,
+                            error=error_in_child,
+                        )
+                    ).result()
+
+    except (
+        tractor.RemoteActorError,
+        tractor.BaseExceptionGroup,
+    ):
+        pass
+
+    # tmp file should have been wiped by
+    # teardown stack.
+    assert not child_tmp_file.exists()
--- a/tests/test_spawning.py
+++ b/tests/test_spawning.py
@ -1,6 +1,8 @@
 """
 Spawning basics
+
 """
+from typing import Optional

 import pytest
 import trio
@ -11,7 +13,11 @@ from conftest import tractor_test
 data_to_pass_down = {'doggy': 10, 'kitty': 4}


-async def spawn(is_arbiter, data, arb_addr):
+async def spawn(
+    is_arbiter: bool,
+    data: dict,
+    arb_addr: tuple[str, int],
+):
    namespaces = [__name__]

    await trio.sleep(0.1)
@ -88,24 +94,38 @@ async def test_movie_theatre_convo(start_method):
        await portal.cancel_actor()


-async def cellar_door():
-    return "Dang that's beautiful"
+async def cellar_door(return_value: Optional[str]):
+    return return_value


+@pytest.mark.parametrize(
+    'return_value', ["Dang that's beautiful", None],
+    ids=['return_str', 'return_None'],
+)
@tractor_test
-async def test_most_beautiful_word(start_method):
-    """The main ``tractor`` routine.
-    """
-    async with tractor.open_nursery() as n:
+async def test_most_beautiful_word(
+    start_method,
+    return_value
+):
+    '''
+    The main ``tractor`` routine.

-        portal = await n.run_in_actor(
-            cellar_door,
-            name='some_linguist',
-        )
+    '''
+    with trio.fail_after(1):
+        async with tractor.open_nursery() as n:

+            portal = await n.run_in_actor(
+                cellar_door,
+                return_value=return_value,
+                name='some_linguist',
+            )
+
+            print(await portal.result())
    # The ``async with`` will unblock here since the 'some_linguist'
    # actor has completed its main task ``cellar_door``.

+    # this should pull the cached final result already captured during
+    # the nursery block exit.
    print(await portal.result())


@ -122,7 +142,7 @@ def test_loglevel_propagated_to_subactor(
    capfd,
    arb_addr,
 ):
-    if start_method == 'forkserver':
+    if start_method == 'mp_forkserver':
        pytest.skip(
            "a bug with `capfd` seems to make forkserver capture not work?")

@ -131,13 +151,13 @@ def test_loglevel_propagated_to_subactor(
    async def main():
        async with tractor.open_nursery(
            name='arbiter',
-            loglevel=level,
            start_method=start_method,
            arbiter_addr=arb_addr,

        ) as tn:
            await tn.run_in_actor(
                check_loglevel,
+                loglevel=level,
                level=level,
            )

--- a/tests/test_task_broadcasting.py
+++ b/tests/test_task_broadcasting.py
@ -0,0 +1,514 @@
+"""
+Broadcast channels for fan-out to local tasks.
+
+"""
+from contextlib import asynccontextmanager
+from functools import partial
+from itertools import cycle
+import time
+from typing import Optional
+
+import pytest
+import trio
+from trio.lowlevel import current_task
+import tractor
+from tractor.trionics import (
+    broadcast_receiver,
+    Lagged,
+)
+
+
+@tractor.context
+async def echo_sequences(
+
+    ctx:  tractor.Context,
+
+) -> None:
+    '''Bidir streaming endpoint which will stream
+    back any sequence it is sent item-wise.
+
+    '''
+    await ctx.started()
+
+    async with ctx.open_stream() as stream:
+        async for sequence in stream:
+            seq = list(sequence)
+            for value in seq:
+                await stream.send(value)
+                print(f'producer sent {value}')
+
+
+async def ensure_sequence(
+
+    stream: tractor.MsgStream,
+    sequence: list,
+    delay: Optional[float] = None,
+
+) -> None:
+
+    name = current_task().name
+    async with stream.subscribe() as bcaster:
+        assert not isinstance(bcaster, type(stream))
+        async for value in bcaster:
+            print(f'{name} rx: {value}')
+            assert value == sequence[0]
+            sequence.remove(value)
+
+            if delay:
+                await trio.sleep(delay)
+
+            if not sequence:
+                # fully consumed
+                break
+
+
+@asynccontextmanager
+async def open_sequence_streamer(
+
+    sequence: list[int],
+    arb_addr: tuple[str, int],
+    start_method: str,
+
+) -> tractor.MsgStream:
+
+    async with tractor.open_nursery(
+        arbiter_addr=arb_addr,
+        start_method=start_method,
+    ) as tn:
+
+        portal = await tn.start_actor(
+            'sequence_echoer',
+            enable_modules=[__name__],
+        )
+
+        async with portal.open_context(
+            echo_sequences,
+        ) as (ctx, first):
+
+            assert first is None
+            async with ctx.open_stream(backpressure=True) as stream:
+                yield stream
+
+        await portal.cancel_actor()
+
+
+def test_stream_fan_out_to_local_subscriptions(
+    arb_addr,
+    start_method,
+):
+
+    sequence = list(range(1000))
+
+    async def main():
+
+        async with open_sequence_streamer(
+            sequence,
+            arb_addr,
+            start_method,
+        ) as stream:
+
+            async with trio.open_nursery() as n:
+                for i in range(10):
+                    n.start_soon(
+                        ensure_sequence,
+                        stream,
+                        sequence.copy(),
+                        name=f'consumer_{i}',
+                    )
+
+                await stream.send(tuple(sequence))
+
+                async for value in stream:
+                    print(f'source stream rx: {value}')
+                    assert value == sequence[0]
+                    sequence.remove(value)
+
+                    if not sequence:
+                        # fully consumed
+                        break
+
+    trio.run(main)
+
+
+@pytest.mark.parametrize(
+    'task_delays',
+    [
+        (0.01, 0.001),
+        (0.001, 0.01),
+    ]
+)
+def test_consumer_and_parent_maybe_lag(
+    arb_addr,
+    start_method,
+    task_delays,
+):
+
+    async def main():
+
+        sequence = list(range(300))
+        parent_delay, sub_delay = task_delays
+
+        async with open_sequence_streamer(
+            sequence,
+            arb_addr,
+            start_method,
+        ) as stream:
+
+            try:
+                async with trio.open_nursery() as n:
+
+                    n.start_soon(
+                        ensure_sequence,
+                        stream,
+                        sequence.copy(),
+                        sub_delay,
+                        name='consumer_task',
+                    )
+
+                    await stream.send(tuple(sequence))
+
+                    # async for value in stream:
+                    lagged = False
+                    lag_count = 0
+
+                    while True:
+                        try:
+                            value = await stream.receive()
+                            print(f'source stream rx: {value}')
+
+                            if lagged:
+                                # re set the sequence starting at our last
+                                # value
+                                sequence = sequence[sequence.index(value) + 1:]
+                            else:
+                                assert value == sequence[0]
+                                sequence.remove(value)
+
+                            lagged = False
+
+                        except Lagged:
+                            lagged = True
+                            print(f'source stream lagged after {value}')
+                            lag_count += 1
+                            continue
+
+                        # lag the parent
+                        await trio.sleep(parent_delay)
+
+                        if not sequence:
+                            # fully consumed
+                            break
+                    print(f'parent + source stream lagged: {lag_count}')
+
+                    if parent_delay > sub_delay:
+                        assert lag_count > 0
+
+            except Lagged:
+                # child was lagged
+                assert parent_delay < sub_delay
+
+    trio.run(main)
+
+
+def test_faster_task_to_recv_is_cancelled_by_slower(
+    arb_addr,
+    start_method,
+):
+    '''
+    Ensure that if a faster task consuming from a stream is cancelled
+    the slower task can continue to receive all expected values.
+
+    '''
+    async def main():
+
+        sequence = list(range(1000))
+
+        async with open_sequence_streamer(
+            sequence,
+            arb_addr,
+            start_method,
+
+        ) as stream:
+
+            async with trio.open_nursery() as n:
+                n.start_soon(
+                    ensure_sequence,
+                    stream,
+                    sequence.copy(),
+                    0,
+                    name='consumer_task',
+                )
+
+                await stream.send(tuple(sequence))
+
+                # pull 3 values, cancel the subtask, then
+                # expect to be able to pull all values still
+                for i in range(20):
+                    try:
+                        value = await stream.receive()
+                        print(f'source stream rx: {value}')
+                        await trio.sleep(0.01)
+                    except Lagged:
+                        print(f'parent overrun after {value}')
+                        continue
+
+                print('cancelling faster subtask')
+                n.cancel_scope.cancel()
+
+            try:
+                value = await stream.receive()
+                print(f'source stream after cancel: {value}')
+            except Lagged:
+                print(f'parent overrun after {value}')
+
+            # expect to see all remaining values
+            with trio.fail_after(0.5):
+                async for value in stream:
+                    assert stream._broadcaster._state.recv_ready is None
+                    print(f'source stream rx: {value}')
+                    if value == 999:
+                        # fully consumed and we missed no values once
+                        # the faster subtask was cancelled
+                        break
+
+                # await tractor.breakpoint()
+                # await stream.receive()
+                print(f'final value: {value}')
+
+    trio.run(main)
+
+
+def test_subscribe_errors_after_close():
+
+    async def main():
+
+        size = 1
+        tx, rx = trio.open_memory_channel(size)
+        async with broadcast_receiver(rx, size) as brx:
+            pass
+
+        try:
+            # open and close
+            async with brx.subscribe():
+                pass
+
+        except trio.ClosedResourceError:
+            assert brx.key not in brx._state.subs
+
+        else:
+            assert 0
+
+    trio.run(main)
+
+
+def test_ensure_slow_consumers_lag_out(
+    arb_addr,
+    start_method,
+):
+    '''This is a pure local task test; no tractor
+    machinery is really required.
+
+    '''
+    async def main():
+
+        # make sure it all works within the runtime
+        async with tractor.open_root_actor():
+
+            num_laggers = 4
+            laggers: dict[str, int] = {}
+            retries = 3
+            size = 100
+            tx, rx = trio.open_memory_channel(size)
+            brx = broadcast_receiver(rx, size)
+
+            async def sub_and_print(
+                delay: float,
+            ) -> None:
+
+                task = current_task()
+                start = time.time()
+
+                async with brx.subscribe() as lbrx:
+                    while True:
+                        print(f'{task.name}: starting consume loop')
+                        try:
+                            async for value in lbrx:
+                                print(f'{task.name}: {value}')
+                                await trio.sleep(delay)
+
+                            if task.name == 'sub_1':
+                                # trigger checkpoint to clean out other subs
+                                await trio.sleep(0.01)
+
+                                # the non-lagger got
+                                # a ``trio.EndOfChannel``
+                                # because the ``tx`` below was closed
+                                assert len(lbrx._state.subs) == 1
+
+                                await lbrx.aclose()
+
+                                assert len(lbrx._state.subs) == 0
+
+                        except trio.ClosedResourceError:
+                            # only the fast sub will try to re-enter
+                            # iteration on the now closed bcaster
+                            assert task.name == 'sub_1'
+                            return
+
+                        except Lagged:
+                            lag_time = time.time() - start
+                            lags = laggers[task.name]
+                            print(
+                                f'restarting slow task {task.name} '
+                                f'that bailed out on {lags}:{value} '
+                                f'after {lag_time:.3f}')
+                            if lags <= retries:
+                                laggers[task.name] += 1
+                                continue
+                            else:
+                                print(
+                                    f'{task.name} was too slow and terminated '
+                                    f'on {lags}:{value}')
+                                return
+
+            async with trio.open_nursery() as nursery:
+
+                for i in range(1, num_laggers):
+
+                    task_name = f'sub_{i}'
+                    laggers[task_name] = 0
+                    nursery.start_soon(
+                        partial(
+                            sub_and_print,
+                            delay=i*0.001,
+                        ),
+                        name=task_name,
+                    )
+
+                # allow subs to sched
+                await trio.sleep(0.1)
+
+                async with tx:
+                    for i in cycle(range(size)):
+                        await tx.send(i)
+                        if len(brx._state.subs) == 2:
+                            # only one, the non lagger, sub is left
+                            break
+
+                # the non-lagger
+                assert laggers.pop('sub_1') == 0
+
+                for n, v in laggers.items():
+                    assert v == 4
+
+                assert tx._closed
+                assert not tx._state.open_send_channels
+
+                # check that "first" bcaster that we created
+                # above, never was iterated and is thus overrun
+                try:
+                    await brx.receive()
+                except Lagged:
+                    # expect tokio style index truncation
+                    seq = brx._state.subs[brx.key]
+                    assert seq == len(brx._state.queue) - 1
+
+                # all backpressured entries in the underlying
+                # channel should have been copied into the caster
+                # queue trailing-window
+                async for i in rx:
+                    print(f'bped: {i}')
+                    assert i in brx._state.queue
+
+                # should be noop
+                await brx.aclose()
+
+    trio.run(main)
+
+
+def test_first_recver_is_cancelled():
+
+    async def main():
+
+        # make sure it all works within the runtime
+        async with tractor.open_root_actor():
+
+            tx, rx = trio.open_memory_channel(1)
+            brx = broadcast_receiver(rx, 1)
+            cs = trio.CancelScope()
+
+            async def sub_and_recv():
+                with cs:
+                    async with brx.subscribe() as bc:
+                        async for value in bc:
+                            print(value)
+
+            async def cancel_and_send():
+                await trio.sleep(0.2)
+                cs.cancel()
+                await tx.send(1)
+
+            async with trio.open_nursery() as n:
+
+                n.start_soon(sub_and_recv)
+                await trio.sleep(0.1)
+                assert brx._state.recv_ready
+
+                n.start_soon(cancel_and_send)
+
+                # ensure that we don't hang because no-task is now
+                # waiting on the underlying receive..
+                with trio.fail_after(0.5):
+                    value = await brx.receive()
+                    print(f'parent: {value}')
+                    assert value == 1
+
+    trio.run(main)
+
+
+def test_no_raise_on_lag():
+    '''
+    Run a simple 2-task broadcast where one task is slow but configured
+    so that it does not raise `Lagged` on overruns using
+    `raise_on_lasg=False` and verify that the task does not raise.
+
+    '''
+    size = 100
+    tx, rx = trio.open_memory_channel(size)
+    brx = broadcast_receiver(rx, size)
+
+    async def slow():
+        async with brx.subscribe(
+            raise_on_lag=False,
+        ) as br:
+            async for msg in br:
+                print(f'slow task got: {msg}')
+                await trio.sleep(0.1)
+
+    async def fast():
+        async with brx.subscribe() as br:
+            async for msg in br:
+                print(f'fast task got: {msg}')
+
+    async def main():
+        async with (
+            tractor.open_root_actor(
+                # NOTE: so we see the warning msg emitted by the bcaster
+                # internals when the no raise flag is set.
+                loglevel='warning',
+            ),
+            trio.open_nursery() as n,
+        ):
+            n.start_soon(slow)
+            n.start_soon(fast)
+
+            for i in range(1000):
+                await tx.send(i)
+
+            # simulate user nailing ctl-c after realizing
+            # there's a lag in the slow task.
+            await trio.sleep(1)
+            raise KeyboardInterrupt
+
+    with pytest.raises(KeyboardInterrupt):
+        trio.run(main)
--- a/tests/test_trioisms.py
+++ b/tests/test_trioisms.py
@ -0,0 +1,82 @@
+'''
+Reminders for oddities in `trio` that we need to stay aware of and/or
+want to see changed.
+
+'''
+import pytest
+import trio
+from trio_typing import TaskStatus
+
+
+@pytest.mark.parametrize(
+    'use_start_soon', [
+        pytest.param(
+            True,
+            marks=pytest.mark.xfail(reason="see python-trio/trio#2258")
+        ),
+        False,
+    ]
+)
+def test_stashed_child_nursery(use_start_soon):
+
+    _child_nursery = None
+
+    async def waits_on_signal(
+        ev: trio.Event(),
+        task_status: TaskStatus[trio.Nursery] = trio.TASK_STATUS_IGNORED,
+    ):
+        '''
+        Do some stuf, then signal other tasks, then yield back to "starter".
+
+        '''
+        await ev.wait()
+        task_status.started()
+
+    async def mk_child_nursery(
+        task_status: TaskStatus = trio.TASK_STATUS_IGNORED,
+    ):
+        '''
+        Allocate a child sub-nursery and stash it as a global.
+
+        '''
+        nonlocal _child_nursery
+
+        async with trio.open_nursery() as cn:
+            _child_nursery = cn
+            task_status.started(cn)
+
+            # block until cancelled by parent.
+            await trio.sleep_forever()
+
+    async def sleep_and_err(
+        ev: trio.Event,
+        task_status: TaskStatus = trio.TASK_STATUS_IGNORED,
+    ):
+        await trio.sleep(0.5)
+        doggy()  # noqa
+        ev.set()
+        task_status.started()
+
+    async def main():
+
+        async with (
+            trio.open_nursery() as pn,
+        ):
+            cn = await pn.start(mk_child_nursery)
+            assert cn
+
+            ev = trio.Event()
+
+            if use_start_soon:
+                # this causes inf hang
+                cn.start_soon(sleep_and_err, ev)
+
+            else:
+                # this does not.
+                await cn.start(sleep_and_err, ev)
+
+            with trio.fail_after(1):
+                await cn.start(waits_on_signal, ev)
+
+    with pytest.raises(NameError):
+        trio.run(main)
--- a/tractor/init.py
+++ b/tractor/init.py
@ -1,52 +1,86 @@
-"""
-tractor: An actor model micro-framework built on
-         ``trio`` and ``multiprocessing``.
-"""
-from trio import MultiError
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.

+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+"""
+tractor: structured concurrent "actors".
+
+"""
+from exceptiongroup import BaseExceptionGroup
+
+from ._clustering import open_actor_cluster
 from ._ipc import Channel
 from ._streaming import (
    Context,
-    ReceiveMsgStream,
    MsgStream,
    stream,
    context,
 )
-from ._discovery import get_arbiter, find_actor, wait_for_actor
-from ._trionics import open_nursery
-from ._state import current_actor, is_root_process
+from ._discovery import (
+    get_arbiter,
+    find_actor,
+    wait_for_actor,
+    query_actor,
+)
+from ._supervise import open_nursery
+from ._state import (
+    current_actor,
+    is_root_process,
+)
 from ._exceptions import (
    RemoteActorError,
    ModuleNotExposed,
    ContextCancelled,
 )
-from ._debug import breakpoint, post_mortem
+from ._debug import (
+    breakpoint,
+    post_mortem,
+)
 from . import msg
-from ._root import run, run_daemon, open_root_actor
+from ._root import (
+    run_daemon,
+    open_root_actor,
+)
+from ._portal import Portal
+from ._runtime import Actor


 __all__ = [
+    'Actor',
    'Channel',
    'Context',
-    'ModuleNotExposed',
-    'MultiError',
-    'RemoteActorError',
    'ContextCancelled',
+    'ModuleNotExposed',
+    'MsgStream',
+    'BaseExceptionGroup',
+    'Portal',
+    'RemoteActorError',
    'breakpoint',
+    'context',
    'current_actor',
    'find_actor',
    'get_arbiter',
    'is_root_process',
    'msg',
+    'open_actor_cluster',
    'open_nursery',
    'open_root_actor',
    'post_mortem',
-    'run',
+    'query_actor',
    'run_daemon',
    'stream',
-    'context',
-    'ReceiveMsgStream',
-    'MsgStream',
    'to_asyncio',
    'wait_for_actor',
 ]
--- a/tractor/_actor.py
+++ b/tractor/_actor.py
--- a/tractor/_child.py
+++ b/tractor/_child.py
@ -1,4 +1,22 @@
-"""This is the "bootloader" for actors started using the native trio backend.
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+"""
+This is the "bootloader" for actors started using the native trio backend.
+
 """
 import sys
 import trio
@ -6,7 +24,7 @@ import argparse

 from ast import literal_eval

-from ._actor import Actor
+from ._runtime import Actor
 from ._entry import _trio_main


@ -19,12 +37,15 @@ def parse_ipaddr(arg):
    return (str(host), int(port))


+from ._entry import _trio_main
+
 if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument("--uid", type=parse_uid)
    parser.add_argument("--loglevel", type=str)
    parser.add_argument("--parent_addr", type=parse_ipaddr)
+    parser.add_argument("--asyncio", action='store_true')
    args = parser.parse_args()

    subactor = Actor(
@ -36,5 +57,6 @@ if __name__ == "__main__":

    _trio_main(
        subactor,
-        parent_addr=args.parent_addr
-    )
+        parent_addr=args.parent_addr,
+        infect_asyncio=args.asyncio,
+    )
--- a/tractor/_clustering.py
+++ b/tractor/_clustering.py
@ -0,0 +1,74 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+Actor cluster helpers.
+
+'''
+from __future__ import annotations
+
+from contextlib import asynccontextmanager as acm
+from multiprocessing import cpu_count
+from typing import AsyncGenerator, Optional
+
+import trio
+import tractor
+
+
+@acm
+async def open_actor_cluster(
+    modules: list[str],
+    count: int = cpu_count(),
+    names: list[str] | None = None,
+    hard_kill: bool = False,
+
+    # passed through verbatim to ``open_root_actor()``
+    **runtime_kwargs,
+
+) -> AsyncGenerator[
+    dict[str, tractor.Portal],
+    None,
+]:
+
+    portals: dict[str, tractor.Portal] = {}
+
+    if not names:
+        names = [f'worker_{i}' for i in range(count)]
+
+    if not len(names) == count:
+        raise ValueError(
+            'Number of names is {len(names)} but count it {count}')
+
+    async with tractor.open_nursery(
+        **runtime_kwargs,
+    ) as an:
+        async with trio.open_nursery() as n:
+            uid = tractor.current_actor().uid
+
+            async def _start(name: str) -> None:
+                name = f'{uid[0]}.{name}'
+                portals[name] = await an.start_actor(
+                    enable_modules=modules,
+                    name=name,
+                )
+
+            for name in names:
+                n.start_soon(_start, name)
+
+        assert len(portals) == count
+        yield portals
+
+        await an.cancel(hard_kill=hard_kill)
--- a/tractor/_debug.py
+++ b/tractor/_debug.py
--- a/tractor/_discovery.py
+++ b/tractor/_discovery.py
@ -1,9 +1,29 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 """
 Actor discovery API.
+
 """
-import typing
-from typing import Tuple, Optional, Union
-from async_generator import asynccontextmanager
+from typing import (
+    Optional,
+    Union,
+    AsyncGenerator,
+)
+from contextlib import asynccontextmanager as acm

 from ._ipc import _connect_chan, Channel
 from ._portal import (
@ -14,13 +34,13 @@ from ._portal import (
 from ._state import current_actor, _runtime_vars


-@asynccontextmanager
+@acm
 async def get_arbiter(

    host: str,
    port: int,

-) -> typing.AsyncGenerator[Union[Portal, LocalPortal], None]:
+) -> AsyncGenerator[Union[Portal, LocalPortal], None]:
    '''Return a portal instance connected to a local or remote
    arbiter.
    '''
@ -41,10 +61,10 @@ async def get_arbiter(
                yield arb_portal


-@asynccontextmanager
+@acm
 async def get_root(
    **kwargs,
-) -> typing.AsyncGenerator[Union[Portal, LocalPortal], None]:
+) -> AsyncGenerator[Portal, None]:

    host, port = _runtime_vars['_root_mailbox']
    assert host is not None
@ -54,28 +74,56 @@ async def get_root(
            yield portal


-@asynccontextmanager
-async def find_actor(
+@acm
+async def query_actor(
    name: str,
-    arbiter_sockaddr: Tuple[str, int] = None
-) -> typing.AsyncGenerator[Optional[Portal], None]:
-    """Ask the arbiter to find actor(s) by name.
+    arbiter_sockaddr: Optional[tuple[str, int]] = None,

-    Returns a connected portal to the last registered matching actor
-    known to the arbiter.
-    """
+) -> AsyncGenerator[tuple[str, int], None]:
+    '''
+    Simple address lookup for a given actor name.
+
+    Returns the (socket) address or ``None``.
+
+    '''
    actor = current_actor()
-    async with get_arbiter(*arbiter_sockaddr or actor._arb_addr) as arb_portal:
+    async with get_arbiter(
+        *arbiter_sockaddr or actor._arb_addr
+    ) as arb_portal:

-        sockaddr = await arb_portal.run_from_ns('self', 'find_actor', name=name)
+        sockaddr = await arb_portal.run_from_ns(
+            'self',
+            'find_actor',
+            name=name,
+        )

        # TODO: return portals to all available actors - for now just
        # the last one that registered
        if name == 'arbiter' and actor.is_arbiter:
            raise RuntimeError("The current actor is the arbiter")

-        elif sockaddr:
+        yield sockaddr if sockaddr else None

+
+@acm
+async def find_actor(
+    name: str,
+    arbiter_sockaddr: tuple[str, int] | None = None
+
+) -> AsyncGenerator[Optional[Portal], None]:
+    '''
+    Ask the arbiter to find actor(s) by name.
+
+    Returns a connected portal to the last registered matching actor
+    known to the arbiter.
+
+    '''
+    async with query_actor(
+        name=name,
+        arbiter_sockaddr=arbiter_sockaddr,
+    ) as sockaddr:
+
+        if sockaddr:
            async with _connect_chan(*sockaddr) as chan:
                async with open_portal(chan) as portal:
                    yield portal
@ -83,20 +131,25 @@ async def find_actor(
            yield None


-@asynccontextmanager
+@acm
 async def wait_for_actor(
    name: str,
-    arbiter_sockaddr: Tuple[str, int] = None
-) -> typing.AsyncGenerator[Portal, None]:
+    arbiter_sockaddr: tuple[str, int] | None = None
+) -> AsyncGenerator[Portal, None]:
    """Wait on an actor to register with the arbiter.

    A portal to the first registered actor is returned.
    """
    actor = current_actor()

-    async with get_arbiter(*arbiter_sockaddr or actor._arb_addr) as arb_portal:
-
-        sockaddrs = await arb_portal.run_from_ns('self', 'wait_for_actor', name=name)
+    async with get_arbiter(
+        *arbiter_sockaddr or actor._arb_addr,
+    ) as arb_portal:
+        sockaddrs = await arb_portal.run_from_ns(
+            'self',
+            'wait_for_actor',
+            name=name,
+        )
        sockaddr = sockaddrs[-1]

        async with _connect_chan(*sockaddr) as chan:
--- a/tractor/_entry.py
+++ b/tractor/_entry.py
@ -1,28 +1,64 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 """
 Sub-process entry points.
+
 """
+from __future__ import annotations
 from functools import partial
-from typing import Tuple, Any
-import signal
+from typing import (
+    Any,
+    TYPE_CHECKING,
+)

 import trio  # type: ignore

-from .log import get_console_log, get_logger
+from .log import (
+    get_console_log,
+    get_logger,
+)
 from . import _state
+from .to_asyncio import run_as_asyncio_guest
+from ._runtime import (
+    async_main,
+    Actor,
+)
+
+if TYPE_CHECKING:
+    from ._spawn import SpawnMethodKey


 log = get_logger(__name__)


 def _mp_main(
-    actor: 'Actor',  # type: ignore
-    accept_addr: Tuple[str, int],
-    forkserver_info: Tuple[Any, Any, Any, Any, Any],
-    start_method: str,
-    parent_addr: Tuple[str, int] = None,
+
+    actor: Actor,  # type: ignore
+    accept_addr: tuple[str, int],
+    forkserver_info: tuple[Any, Any, Any, Any, Any],
+    start_method: SpawnMethodKey,
+    parent_addr: tuple[str, int] | None = None,
+    infect_asyncio: bool = False,
+
 ) -> None:
-    """The routine called *after fork* which invokes a fresh ``trio.run``
-    """
+    '''
+    The routine called *after fork* which invokes a fresh ``trio.run``
+
+    '''
    actor._forkserver_info = forkserver_info
    from ._spawn import try_set_start_method
    spawn_ctx = try_set_start_method(start_method)
@ -40,12 +76,17 @@ def _mp_main(

    log.debug(f"parent_addr is {parent_addr}")
    trio_main = partial(
-        actor._async_main,
+        async_main,
+        actor,
        accept_addr,
        parent_addr=parent_addr
    )
    try:
-        trio.run(trio_main)
+        if infect_asyncio:
+            actor._infected_aio = True
+            run_as_asyncio_guest(trio_main)
+        else:
+            trio.run(trio_main)
    except KeyboardInterrupt:
        pass  # handle it the same way trio does?

@ -54,16 +95,17 @@ def _mp_main(


 def _trio_main(
-    actor: 'Actor',  # type: ignore
-    *,
-    parent_addr: Tuple[str, int] = None,
-) -> None:
-    """Entry point for a `trio_run_in_process` subactor.
-    """
-    # Disable sigint handling in children;
-    # we don't need it thanks to our cancellation machinery.
-    signal.signal(signal.SIGINT, signal.SIG_IGN)

+    actor: Actor,  # type: ignore
+    *,
+    parent_addr: tuple[str, int] | None = None,
+    infect_asyncio: bool = False,
+
+) -> None:
+    '''
+    Entry point for a `trio_run_in_process` subactor.
+
+    '''
    log.info(f"Started new trio process for {actor.uid}")

    if actor.loglevel is not None:
@ -78,12 +120,17 @@ def _trio_main(

    log.debug(f"parent_addr is {parent_addr}")
    trio_main = partial(
-        actor._async_main,
+        async_main,
+        actor,
        parent_addr=parent_addr
    )

    try:
-        trio.run(trio_main)
+        if infect_asyncio:
+            actor._infected_aio = True
+            run_as_asyncio_guest(trio_main)
+        else:
+            trio.run(trio_main)
    except KeyboardInterrupt:
        log.warning(f"Actor {actor.uid} received KBI")

--- a/tractor/_exceptions.py
+++ b/tractor/_exceptions.py
@ -1,11 +1,33 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 """
 Our classy exception set.
+
 """
-from typing import Dict, Any, Optional, Type
+from typing import (
+    Any,
+    Optional,
+    Type,
+)
 import importlib
 import builtins
 import traceback

+import exceptiongroup as eg
 import trio


@ -31,9 +53,6 @@ class RemoteActorError(Exception):
        self.type = suberror_type
        self.msgdata = msgdata

-    # TODO: a trio.MultiError.catch like context manager
-    # for catching underlying remote errors of a particular type
-

 class InternalActorError(RemoteActorError):
    """Remote internal ``tractor`` error indicating
@ -61,11 +80,24 @@ class NoRuntime(RuntimeError):
    "The root actor has not been initialized yet"


+class StreamOverrun(trio.TooSlowError):
+    "This stream was overrun by sender"
+
+
+class AsyncioCancelled(Exception):
+    '''
+    Asyncio cancelled translation (non-base) error
+    for use with the ``to_asyncio`` module
+    to be raised in the ``trio`` side task
+
+    '''
+
+
 def pack_error(
    exc: BaseException,
    tb=None,

-) -> Dict[str, Any]:
+) -> dict[str, Any]:
    """Create an "error message" for tranmission over
    a channel (aka the wire).
    """
@ -84,15 +116,17 @@ def pack_error(

 def unpack_error(

-    msg: Dict[str, Any],
+    msg: dict[str, Any],
    chan=None,
    err_type=RemoteActorError

 ) -> Exception:
-    """Unpack an 'error' message from the wire
+    '''
+    Unpack an 'error' message from the wire
    into a local ``RemoteActorError``.

-    """
+    '''
+    __tracebackhide__ = True
    error = msg['error']

    tb_str = error.get('tb_str', '')
@ -105,7 +139,12 @@ def unpack_error(
        suberror_type = trio.Cancelled

    else:  # try to lookup a suitable local error type
-        for ns in [builtins, _this_mod, trio]:
+        for ns in [
+            builtins,
+            _this_mod,
+            eg,
+            trio,
+        ]:
            try:
                suberror_type = getattr(ns, type_name)
                break
@ -124,12 +163,15 @@ def unpack_error(


 def is_multi_cancelled(exc: BaseException) -> bool:
-    """Predicate to determine if a ``trio.MultiError`` contains only
-    ``trio.Cancelled`` sub-exceptions (and is likely the result of
+    '''
+    Predicate to determine if a possible ``eg.BaseExceptionGroup`` contains
+    only ``trio.Cancelled`` sub-exceptions (and is likely the result of
    cancelling a collection of subtasks.

-    """
-    return not trio.MultiError.filter(
-        lambda exc: exc if not isinstance(exc, trio.Cancelled) else None,
-        exc,
-    )
+    '''
+    if isinstance(exc, eg.BaseExceptionGroup):
+        return exc.subgroup(
+            lambda exc: isinstance(exc, trio.Cancelled)
+        ) is not None
+
+    return False
--- a/tractor/_forkserver_override.py
+++ b/tractor/_forkserver_override.py
@ -1,3 +1,19 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 """
 This is near-copy of the 3.8 stdlib's ``multiprocessing.forkserver.py``
 with some hackery to prevent any more then a single forkserver and
--- a/tractor/_ipc.py
+++ b/tractor/_ipc.py
@ -1,13 +1,42 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 """
 Inter-process comms abstractions

 """
+from __future__ import annotations
 import platform
+import struct
 import typing
-from typing import Any, Tuple, Optional
-from functools import partial
+from collections.abc import (
+    AsyncGenerator,
+    AsyncIterator,
+)
+from typing import (
+    Any,
+    runtime_checkable,
+    Optional,
+    Protocol,
+    Type,
+    TypeVar,
+)

-import msgpack
+from tricycle import BufferedReceiveStream
+import msgspec
 import trio
 from async_generator import asynccontextmanager

@ -17,105 +46,195 @@ log = get_logger(__name__)


 _is_windows = platform.system() == 'Windows'
-
-# :eyeroll:
-try:
-    import msgpack_numpy
-    Unpacker = msgpack_numpy.Unpacker
-except ImportError:
-    # just plain ``msgpack`` requires tweaking key settings
-    Unpacker = partial(msgpack.Unpacker, strict_map_key=False)
+log = get_logger(__name__)


-class MsgpackTCPStream:
-    '''A ``trio.SocketStream`` delivering ``msgpack`` formatted data
-    using ``msgpack-python``.
+def get_stream_addrs(stream: trio.SocketStream) -> tuple:
+    # should both be IP sockets
+    lsockname = stream.socket.getsockname()
+    rsockname = stream.socket.getpeername()
+    return (
+        tuple(lsockname[:2]),
+        tuple(rsockname[:2]),
+    )
+
+
+MsgType = TypeVar("MsgType")
+
+# TODO: consider using a generic def and indexing with our eventual
+# msg definition/types?
+# - https://docs.python.org/3/library/typing.html#typing.Protocol
+# - https://jcristharif.com/msgspec/usage.html#structs
+
+
+@runtime_checkable
+class MsgTransport(Protocol[MsgType]):
+
+    stream: trio.SocketStream
+    drained: list[MsgType]
+
+    def __init__(self, stream: trio.SocketStream) -> None:
+        ...
+
+    # XXX: should this instead be called `.sendall()`?
+    async def send(self, msg: MsgType) -> None:
+        ...
+
+    async def recv(self) -> MsgType:
+        ...
+
+    def __aiter__(self) -> MsgType:
+        ...
+
+    def connected(self) -> bool:
+        ...
+
+    # defining this sync otherwise it causes a mypy error because it
+    # can't figure out it's a generator i guess?..?
+    def drain(self) -> AsyncIterator[dict]:
+        ...
+
+    @property
+    def laddr(self) -> tuple[str, int]:
+        ...
+
+    @property
+    def raddr(self) -> tuple[str, int]:
+        ...
+
+
+# TODO: not sure why we have to inherit here, but it seems to be an
+# issue with ``get_msg_transport()`` returning a ``Type[Protocol]``;
+# probably should make a `mypy` issue?
+class MsgpackTCPStream(MsgTransport):
+    '''
+    A ``trio.SocketStream`` delivering ``msgpack`` formatted data
+    using the ``msgspec`` codec lib.

    '''
    def __init__(
        self,
        stream: trio.SocketStream,
+        prefix_size: int = 4,

    ) -> None:

        self.stream = stream
        assert self.stream.socket
+
        # should both be IP sockets
-        lsockname = stream.socket.getsockname()
-        assert isinstance(lsockname, tuple)
-        self._laddr = lsockname[:2]
-        rsockname = stream.socket.getpeername()
-        assert isinstance(rsockname, tuple)
-        self._raddr = rsockname[:2]
+        self._laddr, self._raddr = get_stream_addrs(stream)

-        # start and seed first entry to read loop
+        # create read loop instance
        self._agen = self._iter_packets()
-        # self._agen.asend(None) is None
-
        self._send_lock = trio.StrictFIFOLock()

-    async def _iter_packets(self) -> typing.AsyncGenerator[dict, None]:
-        """Yield packets from the underlying stream.
-        """
-        unpacker = Unpacker(
-            raw=False,
-            use_list=False,
-        )
+        # public i guess?
+        self.drained: list[dict] = []
+
+        self.recv_stream = BufferedReceiveStream(transport_stream=stream)
+        self.prefix_size = prefix_size
+
+        # TODO: struct aware messaging coders
+        self.encode = msgspec.msgpack.Encoder().encode
+        self.decode = msgspec.msgpack.Decoder().decode  # dict[str, Any])
+
+    async def _iter_packets(self) -> AsyncGenerator[dict, None]:
+        '''Yield packets from the underlying stream.
+
+        '''
+        import msgspec  # noqa
+        decodes_failed: int = 0
+
        while True:
            try:
-                data = await self.stream.receive_some(2**10)
+                header = await self.recv_stream.receive_exactly(4)

-            except trio.BrokenResourceError as err:
-                msg = err.args[0]
+            except (
+                ValueError,
+                ConnectionResetError,

-                # XXX: handle connection-reset-by-peer the same as a EOF.
-                # we're currently remapping this since we allow
-                # a quick connect then drop for root actors when
-                # checking to see if there exists an "arbiter"
-                # on the chosen sockaddr (``_root.py:108`` or thereabouts)
-                if (
-                    # nix
-                    '[Errno 104]' in msg or
-
-                    # on windows it seems there are a variety of errors
-                    # to handle..
-                    _is_windows
-                ):
-                    raise TransportClosed(
-                        f'{self} was broken with {msg}'
-                    )
-
-                else:
-                    raise
-
-            log.transport(f"received {data}")  # type: ignore
-
-            if data == b'':
+                # not sure entirely why we need this but without it we
+                # seem to be getting racy failures here on
+                # arbiter/registry name subs..
+                trio.BrokenResourceError,
+            ):
                raise TransportClosed(
                    f'transport {self} was already closed prior ro read'
                )

-            unpacker.feed(data)
-            for packet in unpacker:
-                yield packet
+            if header == b'':
+                raise TransportClosed(
+                    f'transport {self} was already closed prior ro read'
+                )
+
+            size, = struct.unpack("<I", header)
+
+            log.transport(f'received header {size}')  # type: ignore
+
+            msg_bytes = await self.recv_stream.receive_exactly(size)
+
+            log.transport(f"received {msg_bytes}")  # type: ignore
+            try:
+                yield self.decode(msg_bytes)
+            except (
+                msgspec.DecodeError,
+                UnicodeDecodeError,
+            ):
+                if decodes_failed < 4:
+                    # ignore decoding errors for now and assume they have to
+                    # do with a channel drop - hope that receiving from the
+                    # channel will raise an expected error and bubble up.
+                    try:
+                        msg_str: str | bytes = msg_bytes.decode()
+                    except UnicodeDecodeError:
+                        msg_str = msg_bytes
+
+                    log.error(
+                        '`msgspec` failed to decode!?\n'
+                        'dumping bytes:\n'
+                        f'{msg_str!r}'
+                    )
+                    decodes_failed += 1
+                else:
+                    raise
+
+    async def send(self, msg: Any) -> None:
+        async with self._send_lock:
+
+            bytes_data: bytes = self.encode(msg)
+
+            # supposedly the fastest says,
+            # https://stackoverflow.com/a/54027962
+            size: bytes = struct.pack("<I", len(bytes_data))
+
+            return await self.stream.send_all(size + bytes_data)

    @property
-    def laddr(self) -> Tuple[Any, ...]:
+    def laddr(self) -> tuple[str, int]:
        return self._laddr

    @property
-    def raddr(self) -> Tuple[Any, ...]:
+    def raddr(self) -> tuple[str, int]:
        return self._raddr

-    # XXX: should this instead be called `.sendall()`?
-    async def send(self, data: Any) -> None:
-        async with self._send_lock:
-            return await self.stream.send_all(
-                msgpack.dumps(data, use_bin_type=True))
-
    async def recv(self) -> Any:
        return await self._agen.asend(None)

+    async def drain(self) -> AsyncIterator[dict]:
+        '''
+        Drain the stream's remaining messages sent from
+        the far end until the connection is closed by
+        the peer.
+
+        '''
+        try:
+            async for msg in self._iter_packets():
+                self.drained.append(msg)
+        except TransportClosed:
+            for msg in self.drained:
+                yield msg
+
    def __aiter__(self):
        return self._agen

@ -123,35 +242,87 @@ class MsgpackTCPStream:
        return self.stream.socket.fileno() != -1


-class Channel:
-    """An inter-process channel for communication between (remote) actors.
+def get_msg_transport(

-    Currently the only supported transport is a ``trio.SocketStream``.
-    """
+    key: tuple[str, str],
+
+) -> Type[MsgTransport]:
+
+    return {
+        ('msgpack', 'tcp'): MsgpackTCPStream,
+    }[key]
+
+
+class Channel:
+    '''
+    An inter-process channel for communication between (remote) actors.
+
+    Wraps a ``MsgStream``: transport + encoding IPC connection.
+
+    Currently we only support ``trio.SocketStream`` for transport
+    (aka TCP) and the ``msgpack`` interchange format via the ``msgspec``
+    codec libary.
+
+    '''
    def __init__(
+
        self,
-        destaddr: Optional[Tuple[str, int]] = None,
-        on_reconnect: typing.Callable[..., typing.Awaitable] = None,
-        auto_reconnect: bool = False,
-        stream: trio.SocketStream = None,  # expected to be active
+        destaddr: Optional[tuple[str, int]],
+
+        msg_transport_type_key: tuple[str, str] = ('msgpack', 'tcp'),
+
+        # TODO: optional reconnection support?
+        # auto_reconnect: bool = False,
+        # on_reconnect: typing.Callable[..., typing.Awaitable] = None,

    ) -> None:
-        self._recon_seq = on_reconnect
-        self._autorecon = auto_reconnect
-        self.msgstream: Optional[MsgpackTCPStream] = MsgpackTCPStream(
-            stream) if stream else None
-        if self.msgstream and destaddr:
-            raise ValueError(
-                f"A stream was provided with local addr {self.laddr}"
-            )
-        self._destaddr = self.msgstream.raddr if self.msgstream else destaddr
-        # set after handshake - always uid of far end
-        self.uid: Optional[Tuple[str, str]] = None
-        # set if far end actor errors internally
-        self._exc: Optional[Exception] = None
-        self._agen = self._aiter_recv()

+        # self._recon_seq = on_reconnect
+        # self._autorecon = auto_reconnect
+
+        self._destaddr = destaddr
+        self._transport_key = msg_transport_type_key
+
+        # Either created in ``.connect()`` or passed in by
+        # user in ``.from_stream()``.
+        self._stream: Optional[trio.SocketStream] = None
+        self.msgstream: Optional[MsgTransport] = None
+
+        # set after handshake - always uid of far end
+        self.uid: Optional[tuple[str, str]] = None
+
+        self._agen = self._aiter_recv()
+        self._exc: Optional[Exception] = None  # set if far end actor errors
        self._closed: bool = False
+        # flag set on ``Portal.cancel_actor()`` indicating
+        # remote (peer) cancellation of the far end actor runtime.
+        self._cancel_called: bool = False  # set on ``Portal.cancel_actor()``
+
+    @classmethod
+    def from_stream(
+        cls,
+        stream: trio.SocketStream,
+        **kwargs,
+
+    ) -> Channel:
+
+        src, dst = get_stream_addrs(stream)
+        chan = Channel(destaddr=dst, **kwargs)
+
+        # set immediately here from provided instance
+        chan._stream = stream
+        chan.set_msg_transport(stream)
+        return chan
+
+    def set_msg_transport(
+        self,
+        stream: trio.SocketStream,
+        type_key: Optional[tuple[str, str]] = None,
+
+    ) -> MsgTransport:
+        type_key = type_key or self._transport_key
+        self.msgstream = get_msg_transport(type_key)(stream)
+        return self.msgstream

    def __repr__(self) -> str:
        if self.msgstream:
@ -161,20 +332,19 @@ class Channel:
        return object.__repr__(self)

    @property
-    def laddr(self) -> Optional[Tuple[Any, ...]]:
+    def laddr(self) -> Optional[tuple[str, int]]:
        return self.msgstream.laddr if self.msgstream else None

    @property
-    def raddr(self) -> Optional[Tuple[Any, ...]]:
+    def raddr(self) -> Optional[tuple[str, int]]:
        return self.msgstream.raddr if self.msgstream else None

    async def connect(
-
        self,
-        destaddr: Tuple[Any, ...] = None,
+        destaddr: tuple[Any, ...] | None = None,
        **kwargs

-    ) -> trio.SocketStream:
+    ) -> MsgTransport:

        if self.connected():
            raise RuntimeError("channel is already connected?")
@ -186,12 +356,12 @@ class Channel:
            *destaddr,
            **kwargs
        )
-        self.msgstream = MsgpackTCPStream(stream)
+        msgstream = self.set_msg_transport(stream)

        log.transport(
-            f'Opened channel to peer {self.laddr} -> {self.raddr}'
+            f'Opened channel[{type(msgstream)}]: {self.laddr} -> {self.raddr}'
        )
-        return stream
+        return msgstream

    async def send(self, item: Any) -> None:

@ -202,16 +372,15 @@ class Channel:

    async def recv(self) -> Any:
        assert self.msgstream
+        return await self.msgstream.recv()

-        try:
-            return await self.msgstream.recv()
-
-        except trio.BrokenResourceError:
-            if self._autorecon:
-                await self._reconnect()
-                return await self.recv()
-
-            raise
+        # try:
+        #     return await self.msgstream.recv()
+        # except trio.BrokenResourceError:
+        #     if self._autorecon:
+        #         await self._reconnect()
+        #         return await self.recv()
+        #     raise

    async def aclose(self) -> None:

@ -233,40 +402,44 @@ class Channel:
    def __aiter__(self):
        return self._agen

-    async def _reconnect(self) -> None:
-        """Handle connection failures by polling until a reconnect can be
-        established.
-        """
-        down = False
-        while True:
-            try:
-                with trio.move_on_after(3) as cancel_scope:
-                    await self.connect()
-                cancelled = cancel_scope.cancelled_caught
-                if cancelled:
-                    log.transport(
-                        "Reconnect timed out after 3 seconds, retrying...")
-                    continue
-                else:
-                    log.transport("Stream connection re-established!")
-                    # run any reconnection sequence
-                    on_recon = self._recon_seq
-                    if on_recon:
-                        await on_recon(self)
-                    break
-            except (OSError, ConnectionRefusedError):
-                if not down:
-                    down = True
-                    log.transport(
-                        f"Connection to {self.raddr} went down, waiting"
-                        " for re-establishment")
-                await trio.sleep(1)
+    # async def _reconnect(self) -> None:
+    #     """Handle connection failures by polling until a reconnect can be
+    #     established.
+    #     """
+    #     down = False
+    #     while True:
+    #         try:
+    #             with trio.move_on_after(3) as cancel_scope:
+    #                 await self.connect()
+    #             cancelled = cancel_scope.cancelled_caught
+    #             if cancelled:
+    #                 log.transport(
+    #                     "Reconnect timed out after 3 seconds, retrying...")
+    #                 continue
+    #             else:
+    #                 log.transport("Stream connection re-established!")
+
+    #                 # TODO: run any reconnection sequence
+    #                 # on_recon = self._recon_seq
+    #                 # if on_recon:
+    #                 #     await on_recon(self)
+
+    #                 break
+    #         except (OSError, ConnectionRefusedError):
+    #             if not down:
+    #                 down = True
+    #                 log.transport(
+    #                     f"Connection to {self.raddr} went down, waiting"
+    #                     " for re-establishment")
+    #             await trio.sleep(1)

    async def _aiter_recv(
        self
-    ) -> typing.AsyncGenerator[Any, None]:
-        """Async iterate items from underlying stream.
-        """
+    ) -> AsyncGenerator[Any, None]:
+        '''
+        Async iterate items from underlying stream.
+
+        '''
        assert self.msgstream
        while True:
            try:
@ -279,16 +452,14 @@ class Channel:
                    #     await self.msgstream.send(sent)
            except trio.BrokenResourceError:

-                if not self._autorecon:
-                    raise
+                # if not self._autorecon:
+                raise

            await self.aclose()

-            if self._autorecon:  # attempt reconnect
-                await self._reconnect()
-                continue
-            else:
-                return
+            # if self._autorecon:  # attempt reconnect
+            #     await self._reconnect()
+            #     continue

    def connected(self) -> bool:
        return self.msgstream.connected() if self.msgstream else False
@ -298,9 +469,11 @@ class Channel:
 async def _connect_chan(
    host: str, port: int
 ) -> typing.AsyncGenerator[Channel, None]:
-    """Create and connect a channel with disconnect on context manager
+    '''
+    Create and connect a channel with disconnect on context manager
    teardown.
-    """
+
+    '''
    chan = Channel((host, port))
    await chan.connect()
    yield chan
--- a/tractor/_mp_fixup_main.py
+++ b/tractor/_mp_fixup_main.py
@ -1,23 +1,39 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 """
 Helpers pulled mostly verbatim from ``multiprocessing.spawn``
 to aid with "fixing up" the ``__main__`` module in subprocesses.

-These helpers are needed for any spawing backend that doesn't already handle this.
-For example when using ``trio_run_in_process`` it is needed but obviously not when
-we're already using ``multiprocessing``.
+These helpers are needed for any spawing backend that doesn't already
+handle this. For example when using ``trio_run_in_process`` it is needed
+but obviously not when we're already using ``multiprocessing``.
+
 """
 import os
 import sys
 import platform
 import types
 import runpy
-from typing import Dict


 ORIGINAL_DIR = os.path.abspath(os.getcwd())


-def _mp_figure_out_main() -> Dict[str, str]:
+def _mp_figure_out_main() -> dict[str, str]:
    """Taken from ``multiprocessing.spawn.get_preparation_data()``.

    Retrieve parent actor `__main__` module data.
--- a/tractor/_portal.py
+++ b/tractor/_portal.py
@ -1,60 +1,80 @@
-"""
-Portal api
-"""
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+Memory boundary "Portals": an API for structured
+concurrency linked tasks running in disparate memory domains.
+
+'''
+from __future__ import annotations
 import importlib
 import inspect
 from typing import (
-    Tuple, Any, Dict, Optional, Set,
-    Callable, AsyncGenerator
+    Any, Optional,
+    Callable, AsyncGenerator,
+    Type,
 )
 from functools import partial
 from dataclasses import dataclass
+from pprint import pformat
 import warnings

 import trio
 from async_generator import asynccontextmanager

+from .trionics import maybe_open_nursery
 from ._state import current_actor
 from ._ipc import Channel
 from .log import get_logger
+from .msg import NamespacePath
 from ._exceptions import (
    unpack_error,
    NoResult,
-    RemoteActorError,
    ContextCancelled,
 )
-from ._streaming import Context, ReceiveMsgStream
+from ._streaming import (
+    Context,
+    MsgStream,
+)


 log = get_logger(__name__)


-@asynccontextmanager
-async def maybe_open_nursery(
-    nursery: trio.Nursery = None,
-    shield: bool = False,
-) -> AsyncGenerator[trio.Nursery, Any]:
-    """Create a new nursery if None provided.
+def _unwrap_msg(
+    msg: dict[str, Any],
+    channel: Channel

-    Blocks on exit as expected if no input nursery is provided.
-    """
-    if nursery is not None:
-        yield nursery
-    else:
-        async with trio.open_nursery() as nursery:
-            nursery.cancel_scope.shield = shield
-            yield nursery
+) -> Any:
+    __tracebackhide__ = True
+    try:
+        return msg['return']
+    except KeyError:
+        # internal error should never get here
+        assert msg.get('cid'), "Received internal error at portal?"
+        raise unpack_error(msg, channel) from None


-def func_deats(func: Callable) -> Tuple[str, str]:
-    return (
-        func.__module__,
-        func.__name__,
-    )
+class MessagingError(Exception):
+    'Some kind of unexpected SC messaging dialog issue'


 class Portal:
-    """A 'portal' to a(n) (remote) ``Actor``.
+    '''
+    A 'portal' to a(n) (remote) ``Actor``.

    A portal is "opened" (and eventually closed) by one side of an
    inter-actor communication context. The side which opens the portal
@ -70,77 +90,56 @@ class Portal:
    function calling semantics are supported transparently; hence it is
    like having a "portal" between the seperate actor memory spaces.

-    """
+    '''
+    # the timeout for a remote cancel request sent to
+    # a(n) (peer) actor.
+    cancel_timeout = 0.5
+
    def __init__(self, channel: Channel) -> None:
        self.channel = channel
-        # when this is set to a tuple returned from ``_submit()`` then
-        # it is expected that ``result()`` will be awaited at some point
        # during the portal's lifetime
-        self._result: Optional[Any] = None
-        # set when _submit_for_result is called
-        self._expect_result: Optional[
-            Tuple[str, Any, str, Dict[str, Any]]
-        ] = None
-        self._streams: Set[ReceiveMsgStream] = set()
+        self._result_msg: Optional[dict] = None
+
+        # When set to a ``Context`` (when _submit_for_result is called)
+        # it is expected that ``result()`` will be awaited at some
+        # point.
+        self._expect_result: Optional[Context] = None
+        self._streams: set[MsgStream] = set()
        self.actor = current_actor()

-    async def _submit(
+    async def _submit_for_result(
        self,
        ns: str,
        func: str,
-        kwargs,
-    ) -> Tuple[str, trio.MemoryReceiveChannel, str, Dict[str, Any]]:
-        """Submit a function to be scheduled and run by actor, return the
-        associated caller id, response queue, response type str,
-        first message packet as a tuple.
-
-        This is an async call.
-        """
-        # ship a function call request to the remote actor
-        cid, recv_chan = await self.actor.send_cmd(
-            self.channel, ns, func, kwargs)
-
-        # wait on first response msg and handle (this should be
-        # in an immediate response)
-
-        first_msg = await recv_chan.receive()
-        functype = first_msg.get('functype')
-
-        if 'error' in first_msg:
-            raise unpack_error(first_msg, self.channel)
-
-        elif functype not in ('asyncfunc', 'asyncgen', 'context'):
-            raise ValueError(f"{first_msg} is an invalid response packet?")
-
-        return cid, recv_chan, functype, first_msg
-
-    async def _submit_for_result(self, ns: str, func: str, **kwargs) -> None:
+        **kwargs
+    ) -> None:

        assert self._expect_result is None, \
                "A pending main result has already been submitted"

-        self._expect_result = await self._submit(ns, func, kwargs)
+        self._expect_result = await self.actor.start_remote_task(
+            self.channel,
+            ns,
+            func,
+            kwargs
+        )

    async def _return_once(
        self,
-        cid: str,
-        recv_chan: trio.abc.ReceiveChannel,
-        resptype: str,
-        first_msg: dict
-    ) -> Any:
-        assert resptype == 'asyncfunc'  # single response
+        ctx: Context,

-        msg = await recv_chan.receive()
-        try:
-            return msg['return']
-        except KeyError:
-            # internal error should never get here
-            assert msg.get('cid'), "Received internal error at portal?"
-            raise unpack_error(msg, self.channel)
+    ) -> dict[str, Any]:
+
+        assert ctx._remote_func_type == 'asyncfunc'  # single response
+        msg = await ctx._recv_chan.receive()
+        return msg

    async def result(self) -> Any:
-        """Return the result(s) from the remote actor's "main" task.
-        """
+        '''
+        Return the result(s) from the remote actor's "main" task.
+
+        '''
+        # __tracebackhide__ = True
        # Check for non-rpc errors slapped on the
        # channel for which we always raise
        exc = self.channel._exc
@ -157,27 +156,22 @@ class Portal:

        # expecting a "main" result
        assert self._expect_result
-        if self._result is None:
-            try:
-                self._result = await self._return_once(*self._expect_result)
-            except RemoteActorError as err:
-                self._result = err

-        # re-raise error on every call
-        if isinstance(self._result, RemoteActorError):
-            raise self._result
+        if self._result_msg is None:
+            self._result_msg = await self._return_once(
+                self._expect_result
+            )

-        return self._result
+        return _unwrap_msg(self._result_msg, self.channel)

    async def _cancel_streams(self):
        # terminate all locally running async generator
        # IPC calls
        if self._streams:
-            log.warning(
+            log.cancel(
                f"Cancelling all streams with {self.channel.uid}")
            for stream in self._streams.copy():
                try:
-                    # with trio.CancelScope(shield=True):
                    await stream.aclose()
                except trio.ClosedResourceError:
                    # don't error the stream having already been closed
@ -193,38 +187,47 @@ class Portal:
        # we'll need to .aclose all those channels here
        await self._cancel_streams()

-    async def cancel_actor(self):
-        """Cancel the actor on the other end of this portal.
-        """
+    async def cancel_actor(
+        self,
+        timeout: float | None = None,
+
+    ) -> bool:
+        '''
+        Cancel the actor on the other end of this portal.
+
+        '''
        if not self.channel.connected():
-            log.warning("This portal is already closed can't cancel")
+            log.cancel("This channel is already closed can't cancel")
            return False

-        await self._cancel_streams()
-
-        log.warning(
+        log.cancel(
            f"Sending actor cancel request to {self.channel.uid} on "
            f"{self.channel}")
+
+        self.channel._cancel_called = True
+
        try:
            # send cancel cmd - might not get response
            # XXX: sure would be nice to make this work with a proper shield
-            # with trio.CancelScope() as cancel_scope:
-            # with trio.CancelScope(shield=True) as cancel_scope:
-            with trio.move_on_after(0.5) as cancel_scope:
-                cancel_scope.shield = True
+            with trio.move_on_after(timeout or self.cancel_timeout) as cs:
+                cs.shield = True

                await self.run_from_ns('self', 'cancel')
                return True

-            if cancel_scope.cancelled_caught:
-                log.warning(f"May have failed to cancel {self.channel.uid}")
+            if cs.cancelled_caught:
+                log.cancel(f"May have failed to cancel {self.channel.uid}")

            # if we get here some weird cancellation case happened
            return False

-        except trio.ClosedResourceError:
-            log.warning(
-                f"{self.channel} for {self.channel.uid} was already closed?")
+        except (
+            trio.ClosedResourceError,
+            trio.BrokenResourceError,
+        ):
+            log.cancel(
+                f"{self.channel} for {self.channel.uid} was already "
+                "closed or broken?")
            return False

    async def run_from_ns(
@ -233,7 +236,9 @@ class Portal:
        function_name: str,
        **kwargs,
    ) -> Any:
-        """Run a function from a (remote) namespace in a new task on the far-end actor.
+        '''
+        Run a function from a (remote) namespace in a new task on the
+        far-end actor.

        This is a more explitcit way to run tasks in a remote-process
        actor using explicit object-path syntax. Hint: this is how
@ -242,12 +247,20 @@ class Portal:
        Note::

            A special namespace `self` can be used to invoke `Actor`
-            instance methods in the remote runtime. Currently this should only
-            be used for `tractor` internals.
-        """
-        return await self._return_once(
-            *(await self._submit(namespace_path, function_name, kwargs))
+            instance methods in the remote runtime. Currently this
+            should only be used solely for ``tractor`` runtime
+            internals.
+
+        '''
+        ctx = await self.actor.start_remote_task(
+            self.channel,
+            namespace_path,
+            function_name,
+            kwargs,
        )
+        ctx._portal = self
+        msg = await self._return_once(ctx)
+        return _unwrap_msg(msg, self.channel)

    async def run(
        self,
@ -255,12 +268,14 @@ class Portal:
        fn_name: Optional[str] = None,
        **kwargs
    ) -> Any:
-        """Submit a remote function to be scheduled and run by actor, in
+        '''
+        Submit a remote function to be scheduled and run by actor, in
        a new task, wrap and return its (stream of) result(s).

        This is a blocking call and returns either a value from the
        remote rpc task or a local async generator instance.
-        """
+
+        '''
        if isinstance(func, str):
            warnings.warn(
                "`Portal.run(namespace: str, funcname: str)` is now"
@ -284,10 +299,18 @@ class Portal:
                raise TypeError(
                    f'{func} must be a non-streaming async function!')

-            fn_mod_path, fn_name = func_deats(func)
+            fn_mod_path, fn_name = NamespacePath.from_ref(func).to_tuple()

-        return await self._return_once(
-            *(await self._submit(fn_mod_path, fn_name, kwargs))
+        ctx = await self.actor.start_remote_task(
+            self.channel,
+            fn_mod_path,
+            fn_name,
+            kwargs,
+        )
+        ctx._portal = self
+        return _unwrap_msg(
+            await self._return_once(ctx),
+            self.channel,
        )

    @asynccontextmanager
@ -296,7 +319,7 @@ class Portal:
        async_gen_func: Callable,  # typing: ignore
        **kwargs,

-    ) -> AsyncGenerator[ReceiveMsgStream, None]:
+    ) -> AsyncGenerator[MsgStream, None]:

        if not inspect.isasyncgenfunction(async_gen_func):
            if not (
@ -306,21 +329,24 @@ class Portal:
                raise TypeError(
                    f'{async_gen_func} must be an async generator function!')

-        fn_mod_path, fn_name = func_deats(async_gen_func)
-        (
-            cid,
-            recv_chan,
-            functype,
-            first_msg
-        ) = await self._submit(fn_mod_path, fn_name, kwargs)
+        fn_mod_path, fn_name = NamespacePath.from_ref(
+            async_gen_func).to_tuple()
+        ctx = await self.actor.start_remote_task(
+            self.channel,
+            fn_mod_path,
+            fn_name,
+            kwargs
+        )
+        ctx._portal = self

-        # receive only stream
-        assert functype == 'asyncgen'
+        # ensure receive-only stream entrypoint
+        assert ctx._remote_func_type == 'asyncgen'

-        ctx = Context(self.channel, cid, _portal=self)
        try:
            # deliver receive only stream
-            async with ReceiveMsgStream(ctx, recv_chan) as rchan:
+            async with MsgStream(
+                ctx, ctx._recv_chan,
+            ) as rchan:
                self._streams.add(rchan)
                yield rchan

@ -334,13 +360,16 @@ class Portal:
            # message right now since there shouldn't be a reason to
            # stop and restart the stream, right?
            try:
-                await ctx.cancel()
+                with trio.CancelScope(shield=True):
+                    await ctx.cancel()

            except trio.ClosedResourceError:
                # if the far end terminates before we send a cancel the
                # underlying transport-channel may already be closed.
-                log.debug(f'Context {ctx} was already closed?')
+                log.cancel(f'Context {ctx} was already closed?')

+            # XXX: should this always be done?
+            # await recv_chan.aclose()
            self._streams.remove(rchan)

    @asynccontextmanager
@ -350,8 +379,9 @@ class Portal:
        func: Callable,
        **kwargs,

-    ) -> AsyncGenerator[Tuple[Context, Any], None]:
-        '''Open an inter-actor task context.
+    ) -> AsyncGenerator[tuple[Context, Any], None]:
+        '''
+        Open an inter-actor task context.

        This is a synchronous API which allows for deterministic
        setup/teardown of a remote task. The yielded ``Context`` further
@ -359,7 +389,6 @@ class Portal:
        and synchronized final result collection. See ``tractor.Context``.

        '''
-
        # conduct target func method structural checks
        if not inspect.iscoroutinefunction(func) and (
            getattr(func, '_tractor_contex_function', False)
@ -367,46 +396,51 @@ class Portal:
            raise TypeError(
                f'{func} must be an async generator function!')

-        fn_mod_path, fn_name = func_deats(func)
+        fn_mod_path, fn_name = NamespacePath.from_ref(func).to_tuple()

-        recv_chan: Optional[trio.MemoryReceiveChannel] = None
+        ctx = await self.actor.start_remote_task(
+            self.channel,
+            fn_mod_path,
+            fn_name,
+            kwargs
+        )

-        cid, recv_chan, functype, first_msg = await self._submit(
-            fn_mod_path, fn_name, kwargs)
-
-        assert functype == 'context'
-        msg = await recv_chan.receive()
+        assert ctx._remote_func_type == 'context'
+        msg = await ctx._recv_chan.receive()

        try:
            # the "first" value here is delivered by the callee's
            # ``Context.started()`` call.
            first = msg['started']
+            ctx._started_called = True

        except KeyError:
            assert msg.get('cid'), ("Received internal error at context?")

            if msg.get('error'):
-                # raise the error message
-                raise unpack_error(msg, self.channel)
+                # raise kerr from unpack_error(msg, self.channel)
+                raise unpack_error(msg, self.channel) from None
            else:
-                raise
+                raise MessagingError(
+                    f'Context for {ctx.cid} was expecting a `started` message'
+                    f' but received a non-error msg:\n{pformat(msg)}'
+                )

        _err: Optional[BaseException] = None
+        ctx._portal = self
+
+        uid = self.channel.uid
+        cid = ctx.cid
+        etype: Optional[Type[BaseException]] = None
+
        # deliver context instance and .started() msg value in open tuple.
        try:
            async with trio.open_nursery() as scope_nursery:
-                ctx = Context(
-                    self.channel,
-                    cid,
-                    _portal=self,
-                    _recv_chan=recv_chan,
-                    _scope_nursery=scope_nursery,
-                )
-
-                # pairs with handling in ``Actor._push_result()``
-                # recv_chan._ctx = ctx
+                ctx._scope_nursery = scope_nursery

+                # do we need this?
                # await trio.lowlevel.checkpoint()
+
                yield ctx, first

        except ContextCancelled as err:
@ -424,59 +458,109 @@ class Portal:
                log.debug(f'Context {ctx} cancelled gracefully')

        except (
-            trio.Cancelled,
-            trio.MultiError,
-            Exception,
+            BaseException,
+
+            # more specifically, we need to handle these but not
+            # sure it's worth being pedantic:
+            # Exception,
+            # trio.Cancelled,
+            # KeyboardInterrupt,
+
        ) as err:
-            _err = err
+            etype = type(err)
            # the context cancels itself on any cancel
            # causing error.
-            log.error(f'Context {ctx} sending cancel to far end')
-            with trio.CancelScope(shield=True):
+
+            if ctx.chan.connected():
+                log.cancel(
+                    'Context cancelled for task, sending cancel request..\n'
+                    f'task:{cid}\n'
+                    f'actor:{uid}'
+                )
                await ctx.cancel()
+            else:
+                log.warning(
+                    'IPC connection for context is broken?\n'
+                    f'task:{cid}\n'
+                    f'actor:{uid}'
+                )
+
            raise

        finally:
-            result = await ctx.result()
+            # in the case where a runtime nursery (due to internal bug)
+            # or a remote actor transmits an error we want to be
+            # sure we get the error the underlying feeder mem chan.
+            # if it's not raised here it *should* be raised from the
+            # msg loop nursery right?
+            if ctx.chan.connected():
+                log.info(
+                    'Waiting on final context-task result for\n'
+                    f'task: {cid}\n'
+                    f'actor: {uid}'
+                )
+                result = await ctx.result()
+                log.runtime(
+                    f'Context {fn_name} returned '
+                    f'value from callee `{result}`'
+                )

            # though it should be impossible for any tasks
            # operating *in* this scope to have survived
            # we tear down the runtime feeder chan last
            # to avoid premature stream clobbers.
-            if recv_chan is not None:
-                await recv_chan.aclose()
+            if ctx._recv_chan is not None:
+                # should we encapsulate this in the context api?
+                await ctx._recv_chan.aclose()

-            if _err:
+            if etype:
                if ctx._cancel_called:
-                    log.warning(
-                        f'Context {fn_name} cancelled by caller with\n{_err}'
+                    log.cancel(
+                        f'Context {fn_name} cancelled by caller with\n{etype}'
                    )
                elif _err is not None:
-                    log.warning(
-                        f'Context {fn_name} cancelled by callee with\n{_err}'
+                    log.cancel(
+                        f'Context for task cancelled by callee with {etype}\n'
+                        f'target: `{fn_name}`\n'
+                        f'task:{cid}\n'
+                        f'actor:{uid}'
                    )
-            else:
-                log.info(
-                    f'Context {fn_name} returned '
-                    f'value from callee `{result}`'
-                )
+            # XXX: (MEGA IMPORTANT) if this is a root opened process we
+            # wait for any immediate child in debug before popping the
+            # context from the runtime msg loop otherwise inside
+            # ``Actor._push_result()`` the msg will be discarded and in
+            # the case where that msg is global debugger unlock (via
+            # a "stop" msg for a stream), this can result in a deadlock
+            # where the root is waiting on the lock to clear but the
+            # child has already cleared it and clobbered IPC.
+            from ._debug import maybe_wait_for_debugger
+            await maybe_wait_for_debugger()
+
+            # remove the context from runtime tracking
+            self.actor._contexts.pop(
+                (self.channel.uid, ctx.cid),
+                None,
+            )


@dataclass
 class LocalPortal:
-    """A 'portal' to a local ``Actor``.
+    '''
+    A 'portal' to a local ``Actor``.

    A compatibility shim for normal portals but for invoking functions
    using an in process actor instance.
-    """
+
+    '''
    actor: 'Actor'  # type: ignore # noqa
    channel: Channel

    async def run_from_ns(self, ns: str, func_name: str, **kwargs) -> Any:
-        """Run a requested local function from a namespace path and
+        '''
+        Run a requested local function from a namespace path and
        return it's result.

-        """
+        '''
        obj = self.actor if ns == 'self' else importlib.import_module(ns)
        func = getattr(obj, func_name)
        return await func(**kwargs)
@ -491,10 +575,13 @@ async def open_portal(
    shield: bool = False,

 ) -> AsyncGenerator[Portal, None]:
-    """Open a ``Portal`` through the provided ``channel``.
+    '''
+    Open a ``Portal`` through the provided ``channel``.

-    Spawns a background task to handle message processing.
-    """
+    Spawns a background task to handle message processing (normally
+    done by the actor-runtime implicitly).
+
+    '''
    actor = current_actor()
    assert actor
    was_connected = False
@ -510,9 +597,11 @@ async def open_portal(

        msg_loop_cs: Optional[trio.CancelScope] = None
        if start_msg_loop:
+            from ._runtime import process_messages
            msg_loop_cs = await nursery.start(
                partial(
-                    actor._process_messages,
+                    process_messages,
+                    actor,
                    channel,
                    # if the local task is cancelled we want to keep
                    # the msg loop running until our block ends
@ -522,7 +611,6 @@ async def open_portal(
        portal = Portal(channel)
        try:
            yield portal
-
        finally:
            await portal.aclose()

--- a/tractor/_root.py
+++ b/tractor/_root.py
@ -1,27 +1,53 @@
-"""
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
 Root actor runtime ignition(s).
-"""
+
+'''
 from contextlib import asynccontextmanager
 from functools import partial
 import importlib
+import logging
+import signal
+import sys
 import os
-from typing import Tuple, Optional, List, Any
 import typing
 import warnings

+
+from exceptiongroup import BaseExceptionGroup
 import trio

-from ._actor import Actor, Arbiter
+from ._runtime import (
+    Actor,
+    Arbiter,
+    async_main,
+)
 from . import _debug
 from . import _spawn
 from . import _state
 from . import log
 from ._ipc import _connect_chan
+from ._exceptions import is_multi_cancelled


 # set at startup and after forks
-_default_arbiter_host = '127.0.0.1'
-_default_arbiter_port = 1616
+_default_arbiter_host: str = '127.0.0.1'
+_default_arbiter_port: int = 1616


 logger = log.get_logger('tractor')
@ -30,37 +56,45 @@ logger = log.get_logger('tractor')
@asynccontextmanager
 async def open_root_actor(

+    *,
    # defaults are above
-    arbiter_addr: Tuple[str, int] = (
-        _default_arbiter_host,
-        _default_arbiter_port,
-    ),
+    arbiter_addr: tuple[str, int] | None = None,

-    name: Optional[str] = 'root',
+    # defaults are above
+    registry_addr: tuple[str, int] | None = None,
+
+    name: str | None = 'root',

    # either the `multiprocessing` start method:
    # https://docs.python.org/3/library/multiprocessing.html#contexts-and-start-methods
    # OR `trio` (the new default).
-    start_method: Optional[str] = None,
+    start_method: _spawn.SpawnMethodKey | None = None,

    # enables the multi-process debugger support
    debug_mode: bool = False,

    # internal logging
-    loglevel: Optional[str] = None,
+    loglevel: str | None = None,

-    enable_modules: Optional[List] = None,
-    rpc_module_paths: Optional[List] = None,
+    enable_modules: list | None = None,
+    rpc_module_paths: list | None = None,

 ) -> typing.Any:
-    """Async entry point for ``tractor``.
+    '''
+    Runtime init entry point for ``tractor``.

-    """
+    '''
    # Override the global debugger hook to make it play nice with
-    # ``trio``, see:
+    # ``trio``, see much discussion in:
    # https://github.com/python-trio/trio/issues/1155#issuecomment-742964018
+    builtin_bp_handler = sys.breakpointhook
+    orig_bp_path: str | None = os.environ.get('PYTHONBREAKPOINT', None)
    os.environ['PYTHONBREAKPOINT'] = 'tractor._debug._set_trace'

+    # attempt to retreive ``trio``'s sigint handler and stash it
+    # on our debugger lock state.
+    _debug.Lock._trio_handler = signal.getsignal(signal.SIGINT)
+
    # mark top most level process as root actor
    _state._runtime_vars['_is_root'] = True

@ -79,6 +113,25 @@ async def open_root_actor(
    if start_method is not None:
        _spawn.try_set_start_method(start_method)

+    if arbiter_addr is not None:
+        warnings.warn(
+            '`arbiter_addr` is now deprecated and has been renamed to'
+            '`registry_addr`.\nUse that instead..',
+            DeprecationWarning,
+            stacklevel=2,
+        )
+
+    registry_addr = (host, port) = (
+        registry_addr
+        or arbiter_addr
+        or (
+            _default_arbiter_host,
+            _default_arbiter_port,
+        )
+    )
+
+    loglevel = (loglevel or log._default_loglevel).upper()
+
    if debug_mode and _spawn._spawn_method == 'trio':
        _state._runtime_vars['_debug_mode'] = True

@ -86,38 +139,41 @@ async def open_root_actor(
        # for use of ``await tractor.breakpoint()``
        enable_modules.append('tractor._debug')

-        if loglevel is None:
-            loglevel = 'pdb'
+        # if debug mode get's enabled *at least* use that level of
+        # logging for some informative console prompts.
+        if (
+            logging.getLevelName(
+                # lul, need the upper case for the -> int map?
+                # sweet "dynamic function behaviour" stdlib...
+                loglevel,
+            ) > logging.getLevelName('PDB')
+        ):
+            loglevel = 'PDB'

    elif debug_mode:
        raise RuntimeError(
            "Debug mode is only supported for the `trio` backend!"
        )

-    arbiter_addr = (host, port) = arbiter_addr or (
-        _default_arbiter_host,
-        _default_arbiter_port
-    )
-
-    loglevel = loglevel or log.get_loglevel()
-    if loglevel is not None:
-        log._default_loglevel = loglevel
-        log.get_console_log(loglevel)
-
-    # make a temporary connection to see if an arbiter exists
-    arbiter_found = False
+    log.get_console_log(loglevel)

    try:
+        # make a temporary connection to see if an arbiter exists,
+        # if one can't be made quickly we assume none exists.
+        arbiter_found = False
+
        # TODO: this connect-and-bail forces us to have to carefully
        # rewrap TCP 104-connection-reset errors as EOF so as to avoid
        # propagating cancel-causing errors to the channel-msg loop
        # machinery.  Likely it would be better to eventually have
        # a "discovery" protocol with basic handshake instead.
-        async with _connect_chan(host, port):
-            arbiter_found = True
+        with trio.move_on_after(1):
+            async with _connect_chan(host, port):
+                arbiter_found = True

    except OSError:
-        logger.warning(f"No actor could be found @ {host}:{port}")
+        # TODO: make this a "discovery" log level?
+        logger.warning(f"No actor registry found @ {host}:{port}")

    # create a local actor and start up its main routine/task
    if arbiter_found:
@ -127,7 +183,7 @@ async def open_root_actor(

        actor = Actor(
            name or 'anonymous',
-            arbiter_addr=arbiter_addr,
+            arbiter_addr=registry_addr,
            loglevel=loglevel,
            enable_modules=enable_modules,
        )
@ -143,7 +199,7 @@ async def open_root_actor(

        actor = Arbiter(
            name or 'arbiter',
-            arbiter_addr=arbiter_addr,
+            arbiter_addr=registry_addr,
            loglevel=loglevel,
            enable_modules=enable_modules,
        )
@ -159,13 +215,14 @@ async def open_root_actor(
        # start the actor runtime in a new task
        async with trio.open_nursery() as nursery:

-            # ``Actor._async_main()`` creates an internal nursery and
+            # ``_runtime.async_main()`` creates an internal nursery and
            # thus blocks here until the entire underlying actor tree has
            # terminated thereby conducting structured concurrency.

            await nursery.start(
                partial(
-                    actor._async_main,
+                    async_main,
+                    actor,
                    accept_addr=(host, port),
                    parent_addr=None
                )
@ -173,82 +230,83 @@ async def open_root_actor(
            try:
                yield actor

-            except (Exception, trio.MultiError) as err:
+            except (
+                Exception,
+                BaseExceptionGroup,
+            ) as err:

                entered = await _debug._maybe_enter_pm(err)

-                if not entered:
+                if not entered and not is_multi_cancelled(err):
                    logger.exception("Root actor crashed:")

                # always re-raise
                raise

            finally:
-                logger.info("Shutting down root actor")
+                # NOTE: not sure if we'll ever need this but it's
+                # possibly better for even more determinism?
+                # logger.cancel(
+                #     f'Waiting on {len(nurseries)} nurseries in root..')
+                # nurseries = actor._actoruid2nursery.values()
+                # async with trio.open_nursery() as tempn:
+                #     for an in nurseries:
+                #         tempn.start_soon(an.exited.wait)
+
+                logger.cancel("Shutting down root actor")
                await actor.cancel()
    finally:
        _state._current_actor = None
-        logger.info("Root actor terminated")
+
+        # restore breakpoint hook state
+        sys.breakpointhook = builtin_bp_handler
+        if orig_bp_path is not None:
+            os.environ['PYTHONBREAKPOINT'] = orig_bp_path
+        else:
+            # clear env back to having no entry
+            os.environ.pop('PYTHONBREAKPOINT')
+
+        logger.runtime("Root actor terminated")


-def run(
-
-    # target
-    async_fn: typing.Callable[..., typing.Awaitable],
-    *args,
+def run_daemon(
+    enable_modules: list[str],

    # runtime kwargs
-    name: Optional[str] = 'root',
-    arbiter_addr: Tuple[str, int] = (
+    name: str | None = 'root',
+    registry_addr: tuple[str, int] = (
        _default_arbiter_host,
        _default_arbiter_port,
    ),

-    start_method: Optional[str] = None,
+    start_method: str | None = None,
    debug_mode: bool = False,
-    **kwargs,
+    **kwargs

-) -> Any:
-    """Run a trio-actor async function in process.
+) -> None:
+    '''
+    Spawn daemon actor which will respond to RPC; the main task simply
+    starts the runtime and then sleeps forever.
+
+    This is a very minimal convenience wrapper around starting
+    a "run-until-cancelled" root actor which can be started with a set
+    of enabled modules for RPC request handling.
+
+    '''
+    kwargs['enable_modules'] = list(enable_modules)
+
+    for path in enable_modules:
+        importlib.import_module(path)

-    This is tractor's main entry and the start point for any async actor.
-    """
    async def _main():

        async with open_root_actor(
-            arbiter_addr=arbiter_addr,
+            registry_addr=registry_addr,
            name=name,
            start_method=start_method,
            debug_mode=debug_mode,
            **kwargs,
        ):
+            return await trio.sleep_forever()

-            return await async_fn(*args)
-
-    warnings.warn(
-        "`tractor.run()` is now deprecated. `tractor` now"
-        " implicitly starts the root actor on first actor nursery"
-        " use. If you want to start the root actor manually, use"
-        " `tractor.open_root_actor()`.",
-        DeprecationWarning,
-        stacklevel=2,
-    )
    return trio.run(_main)
-
-
-def run_daemon(
-    rpc_module_paths: List[str],
-    **kwargs
-) -> None:
-    """Spawn daemon actor which will respond to RPC.
-
-    This is a convenience wrapper around
-    ``tractor.run(trio.sleep(float('inf')))`` such that the first actor spawned
-    is meant to run forever responding to RPC requests.
-    """
-    kwargs['rpc_module_paths'] = list(rpc_module_paths)
-
-    for path in rpc_module_paths:
-        importlib.import_module(path)
-
-    return run(partial(trio.sleep, float('inf')), **kwargs)
--- a/tractor/_runtime.py
+++ b/tractor/_runtime.py
--- a/tractor/_spawn.py
+++ b/tractor/_spawn.py
@ -1,105 +1,145 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 """
 Machinery for actor process spawning using multiple backends.
-"""
-import sys
-import multiprocessing as mp
-import platform
-from typing import Any, Dict, Optional

+"""
+from __future__ import annotations
+import sys
+import platform
+from typing import (
+    Any,
+    Awaitable,
+    Literal,
+    Callable,
+    TypeVar,
+    TYPE_CHECKING,
+)
+
+from exceptiongroup import BaseExceptionGroup
 import trio
 from trio_typing import TaskStatus
-from async_generator import asynccontextmanager

-try:
-    from multiprocessing import semaphore_tracker  # type: ignore
-    resource_tracker = semaphore_tracker
-    resource_tracker._resource_tracker = resource_tracker._semaphore_tracker
-except ImportError:
-    # 3.8 introduces a more general version that also tracks shared mems
-    from multiprocessing import resource_tracker  # type: ignore
-
-from multiprocessing import forkserver  # type: ignore
-from typing import Tuple
-
-from . import _forkserver_override
+from ._debug import (
+    maybe_wait_for_debugger,
+    acquire_debug_lock,
+)
 from ._state import (
    current_actor,
    is_main_process,
+    is_root_process,
+    debug_mode,
 )
-
 from .log import get_logger
 from ._portal import Portal
-from ._actor import Actor
+from ._runtime import Actor
 from ._entry import _mp_main
 from ._exceptions import ActorFailure


+if TYPE_CHECKING:
+    from ._supervise import ActorNursery
+    import multiprocessing as mp
+    ProcessType = TypeVar('ProcessType', mp.Process, trio.Process)
+
 log = get_logger('tractor')

 # placeholder for an mp start context if so using that backend
-_ctx: Optional[mp.context.BaseContext] = None
-_spawn_method: str = "spawn"
+_ctx: mp.context.BaseContext | None = None
+SpawnMethodKey = Literal[
+    'trio',  # supported on all platforms
+    'mp_spawn',
+    'mp_forkserver',  # posix only
+]
+_spawn_method: SpawnMethodKey = 'trio'


 if platform.system() == 'Windows':
-    _spawn_method = "spawn"
+
+    import multiprocessing as mp
    _ctx = mp.get_context("spawn")

    async def proc_waiter(proc: mp.Process) -> None:
        await trio.lowlevel.WaitForSingleObject(proc.sentinel)
 else:
-    # *NIX systems use ``trio`` primitives as our default
-    _spawn_method = "trio"
+    # *NIX systems use ``trio`` primitives as our default as well

    async def proc_waiter(proc: mp.Process) -> None:
        await trio.lowlevel.wait_readable(proc.sentinel)


-def try_set_start_method(name: str) -> Optional[mp.context.BaseContext]:
-    """Attempt to set the method for process starting, aka the "actor
+def try_set_start_method(
+    key: SpawnMethodKey
+
+) -> mp.context.BaseContext | None:
+    '''
+    Attempt to set the method for process starting, aka the "actor
    spawning backend".

    If the desired method is not supported this function will error.
    On Windows only the ``multiprocessing`` "spawn" method is offered
    besides the default ``trio`` which uses async wrapping around
    ``subprocess.Popen``.
-    """
+
+    '''
+    import multiprocessing as mp
    global _ctx
    global _spawn_method

-    methods = mp.get_all_start_methods()
-    if 'fork' in methods:
+    mp_methods = mp.get_all_start_methods()
+    if 'fork' in mp_methods:
        # forking is incompatible with ``trio``s global task tree
-        methods.remove('fork')
+        mp_methods.remove('fork')

-    # supported on all platforms
-    methods += ['trio']
+    match key:
+        case 'mp_forkserver':
+            from . import _forkserver_override
+            _forkserver_override.override_stdlib()
+            _ctx = mp.get_context('forkserver')

-    if name not in methods:
-        raise ValueError(
-            f"Spawn method `{name}` is invalid please choose one of {methods}"
-        )
-    elif name == 'forkserver':
-        _forkserver_override.override_stdlib()
-        _ctx = mp.get_context(name)
-    elif name == 'trio':
-        _ctx = None
-    else:
-        _ctx = mp.get_context(name)
+        case 'mp_spawn':
+            _ctx = mp.get_context('spawn')

-    _spawn_method = name
+        case 'trio':
+            _ctx = None
+
+        case _:
+            raise ValueError(
+                f'Spawn method `{key}` is invalid!\n'
+                f'Please choose one of {SpawnMethodKey}'
+            )
+
+    _spawn_method = key
    return _ctx


 async def exhaust_portal(
+
    portal: Portal,
    actor: Actor
+
 ) -> Any:
-    """Pull final result from portal (assuming it has one).
+    '''
+    Pull final result from portal (assuming it has one).

    If the main task is an async generator do our best to consume
    what's left of it.
-    """
+    '''
+    __tracebackhide__ = True
    try:
        log.debug(f"Waiting on final result from {actor.uid}")

@ -107,8 +147,11 @@ async def exhaust_portal(
        # always be established and shutdown using a context manager api
        final = await portal.result()

-    except (Exception, trio.MultiError) as err:
-        # we reraise in the parent task via a ``trio.MultiError``
+    except (
+        Exception,
+        BaseExceptionGroup,
+    ) as err:
+        # we reraise in the parent task via a ``BaseExceptionGroup``
        return err
    except trio.Cancelled as err:
        # lol, of course we need this too ;P
@ -121,65 +164,190 @@ async def exhaust_portal(


 async def cancel_on_completion(
+
    portal: Portal,
    actor: Actor,
-    errors: Dict[Tuple[str, str], Exception],
-    task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED,
+    errors: dict[tuple[str, str], Exception],
+
 ) -> None:
-    """Cancel actor gracefully once it's "main" portal's
+    '''
+    Cancel actor gracefully once it's "main" portal's
    result arrives.

    Should only be called for actors spawned with `run_in_actor()`.
-    """
-    with trio.CancelScope() as cs:

-        task_status.started(cs)
+    '''
+    # if this call errors we store the exception for later
+    # in ``errors`` which will be reraised inside
+    # an exception group and we still send out a cancel request
+    result = await exhaust_portal(portal, actor)
+    if isinstance(result, Exception):
+        errors[actor.uid] = result
+        log.warning(
+            f"Cancelling {portal.channel.uid} after error {result}"
+        )

-        # if this call errors we store the exception for later
-        # in ``errors`` which will be reraised inside
-        # a MultiError and we still send out a cancel request
-        result = await exhaust_portal(portal, actor)
-        if isinstance(result, Exception):
-            errors[actor.uid] = result
-            log.warning(
-                f"Cancelling {portal.channel.uid} after error {result}"
-            )
+    else:
+        log.runtime(
+            f"Cancelling {portal.channel.uid} gracefully "
+            f"after result {result}")

-        else:
-            log.runtime(
-                f"Cancelling {portal.channel.uid} gracefully "
-                f"after result {result}")
-
-        # cancel the process now that we have a final result
-        await portal.cancel_actor()
+    # cancel the process now that we have a final result
+    await portal.cancel_actor()


 async def do_hard_kill(
    proc: trio.Process,
+    terminate_after: int = 3,
+
 ) -> None:
    # NOTE: this timeout used to do nothing since we were shielding
    # the ``.wait()`` inside ``new_proc()`` which will pretty much
    # never release until the process exits, now it acts as
    # a hard-kill time ultimatum.
-    with trio.move_on_after(3) as cs:
+    log.debug(f"Terminating {proc}")
+    with trio.move_on_after(terminate_after) as cs:

-        # NOTE: This ``__aexit__()`` shields internally.
-        async with proc:  # calls ``trio.Process.aclose()``
-            log.debug(f"Terminating {proc}")
+        # NOTE: code below was copied verbatim from the now deprecated
+        # (in 0.20.0) ``trio._subrocess.Process.aclose()``, orig doc
+        # string:
+        #
+        # Close any pipes we have to the process (both input and output)
+        # and wait for it to exit. If cancelled, kills the process and
+        # waits for it to finish exiting before propagating the
+        # cancellation.
+        with trio.CancelScope(shield=True):
+            if proc.stdin is not None:
+                await proc.stdin.aclose()
+            if proc.stdout is not None:
+                await proc.stdout.aclose()
+            if proc.stderr is not None:
+                await proc.stderr.aclose()
+        try:
+            await proc.wait()
+        finally:
+            if proc.returncode is None:
+                proc.kill()
+                with trio.CancelScope(shield=True):
+                    await proc.wait()

    if cs.cancelled_caught:
        # XXX: should pretty much never get here unless we have
        # to move the bits from ``proc.__aexit__()`` out and
        # into here.
-        log.critical(f"HARD KILLING {proc}")
+        log.critical(f"#ZOMBIE_LORD_IS_HERE: {proc}")
        proc.kill()


-@asynccontextmanager
-async def spawn_subactor(
-    subactor: 'Actor',
-    parent_addr: Tuple[str, int],
-):
+async def soft_wait(
+
+    proc: ProcessType,
+    wait_func: Callable[
+        [ProcessType],
+        Awaitable,
+    ],
+    portal: Portal,
+
+) -> None:
+    # Wait for proc termination but **dont' yet** call
+    # ``trio.Process.__aexit__()`` (it tears down stdio
+    # which will kill any waiting remote pdb trace).
+    # This is a "soft" (cancellable) join/reap.
+    uid = portal.channel.uid
+    try:
+        log.cancel(f'Soft waiting on actor:\n{uid}')
+        await wait_func(proc)
+    except trio.Cancelled:
+        # if cancelled during a soft wait, cancel the child
+        # actor before entering the hard reap sequence
+        # below. This means we try to do a graceful teardown
+        # via sending a cancel message before getting out
+        # zombie killing tools.
+        async with trio.open_nursery() as n:
+            n.cancel_scope.shield = True
+
+            async def cancel_on_proc_deth():
+                '''
+                Cancel the actor cancel request if we detect that
+                that the process terminated.
+
+                '''
+                await wait_func(proc)
+                n.cancel_scope.cancel()
+
+            n.start_soon(cancel_on_proc_deth)
+            await portal.cancel_actor()
+
+            if proc.poll() is None:  # type: ignore
+                log.warning(
+                    'Actor still alive after cancel request:\n'
+                    f'{uid}'
+                )
+
+                n.cancel_scope.cancel()
+        raise
+
+
+async def new_proc(
+    name: str,
+    actor_nursery: ActorNursery,
+    subactor: Actor,
+    errors: dict[tuple[str, str], Exception],
+
+    # passed through to actor main
+    bind_addr: tuple[str, int],
+    parent_addr: tuple[str, int],
+    _runtime_vars: dict[str, Any],  # serialized and sent to _child
+
+    *,
+
+    infect_asyncio: bool = False,
+    task_status: TaskStatus[Portal] = trio.TASK_STATUS_IGNORED
+
+) -> None:
+
+    # lookup backend spawning target
+    target = _methods[_spawn_method]
+
+    # mark the new actor with the global spawn method
+    subactor._spawn_method = _spawn_method
+
+    await target(
+        name,
+        actor_nursery,
+        subactor,
+        errors,
+        bind_addr,
+        parent_addr,
+        _runtime_vars,  # run time vars
+        infect_asyncio=infect_asyncio,
+        task_status=task_status,
+    )
+
+
+async def trio_proc(
+    name: str,
+    actor_nursery: ActorNursery,
+    subactor: Actor,
+    errors: dict[tuple[str, str], Exception],
+
+    # passed through to actor main
+    bind_addr: tuple[str, int],
+    parent_addr: tuple[str, int],
+    _runtime_vars: dict[str, Any],  # serialized and sent to _child
+    *,
+    infect_asyncio: bool = False,
+    task_status: TaskStatus[Portal] = trio.TASK_STATUS_IGNORED
+
+) -> None:
+    '''
+    Create a new ``Process`` using a "spawn method" as (configured using
+    ``try_set_start_method()``).
+
+    This routine should be started in a actor runtime task and the logic
+    here is to be considered the core supervision strategy.
+
+    '''
    spawn_cmd = [
        sys.executable,
        "-m",
@ -202,281 +370,310 @@ async def spawn_subactor(
            "--loglevel",
            subactor.loglevel
        ]
+    # Tell child to run in guest mode on top of ``asyncio`` loop
+    if infect_asyncio:
+        spawn_cmd.append("--asyncio")

-    proc = await trio.open_process(spawn_cmd)
+    cancelled_during_spawn: bool = False
+    proc: trio.Process | None = None
    try:
-        yield proc
-
-    finally:
-        log.runtime(f"Attempting to kill {proc}")
-
-        # XXX: do this **after** cancellation/tearfown
-        # to avoid killing the process too early
-        # since trio does this internally on ``__aexit__()``
-
-        await do_hard_kill(proc)
-
-
-async def new_proc(
-    name: str,
-    actor_nursery: 'ActorNursery',  # type: ignore  # noqa
-    subactor: Actor,
-    errors: Dict[Tuple[str, str], Exception],
-    # passed through to actor main
-    bind_addr: Tuple[str, int],
-    parent_addr: Tuple[str, int],
-    _runtime_vars: Dict[str, Any],  # serialized and sent to _child
-    *,
-    task_status: TaskStatus[Portal] = trio.TASK_STATUS_IGNORED
-) -> None:
-    """Create a new ``multiprocessing.Process`` using the
-    spawn method as configured using ``try_set_start_method()``.
-    """
-    cancel_scope = None
-
-    # mark the new actor with the global spawn method
-    subactor._spawn_method = _spawn_method
-
-    if _spawn_method == 'trio':
-        async with trio.open_nursery() as nursery:
-            async with spawn_subactor(
-                subactor,
-                parent_addr,
-            ) as proc:
-                log.runtime(f"Started {proc}")
-
-                # wait for actor to spawn and connect back to us
-                # channel should have handshake completed by the
-                # local actor by the time we get a ref to it
-                event, chan = await actor_nursery._actor.wait_for_peer(
-                    subactor.uid)
-                portal = Portal(chan)
-                actor_nursery._children[subactor.uid] = (
-                    subactor, proc, portal)
-
-                # send additional init params
-                await chan.send({
-                    "_parent_main_data": subactor._parent_main_data,
-                    "enable_modules": subactor.enable_modules,
-                    "_arb_addr": subactor._arb_addr,
-                    "bind_host": bind_addr[0],
-                    "bind_port": bind_addr[1],
-                    "_runtime_vars": _runtime_vars,
-                })
-
-                # track subactor in current nursery
-                curr_actor = current_actor()
-                curr_actor._actoruid2nursery[subactor.uid] = actor_nursery
-
-                # resume caller at next checkpoint now that child is up
-                task_status.started(portal)
-
-                # wait for ActorNursery.wait() to be called
-                with trio.CancelScope(shield=True):
-                    await actor_nursery._join_procs.wait()
-
-                if portal in actor_nursery._cancel_after_result_on_exit:
-                    cancel_scope = await nursery.start(
-                        cancel_on_completion,
-                        portal,
-                        subactor,
-                        errors
-                    )
-
-                # Wait for proc termination but **dont' yet** call
-                # ``trio.Process.__aexit__()`` (it tears down stdio
-                # which will kill any waiting remote pdb trace).
-
-                # TODO: No idea how we can enforce zombie
-                # reaping more stringently without the shield
-                # we used to have below...
-
-                # with trio.CancelScope(shield=True):
-                # async with proc:
-
-                # Always "hard" join sub procs since no actor zombies
-                # are allowed!
-
-                # this is a "light" (cancellable) join, the hard join is
-                # in the enclosing scope (see above).
-                await proc.wait()
-
-            log.debug(f"Joined {proc}")
-            # pop child entry to indicate we no longer managing this subactor
-            subactor, proc, portal = actor_nursery._children.pop(subactor.uid)
-
-            # cancel result waiter that may have been spawned in
-            # tandem if not done already
-            if cancel_scope:
-                log.warning(
-                    "Cancelling existing result waiter task for "
-                    f"{subactor.uid}")
-                cancel_scope.cancel()
-    else:
-        # `multiprocessing`
-        # async with trio.open_nursery() as nursery:
-        await mp_new_proc(
-            name=name,
-            actor_nursery=actor_nursery,
-            subactor=subactor,
-            errors=errors,
-            # passed through to actor main
-            bind_addr=bind_addr,
-            parent_addr=parent_addr,
-            _runtime_vars=_runtime_vars,
-            task_status=task_status,
-        )
-
-
-async def mp_new_proc(
-
-    name: str,
-    actor_nursery: 'ActorNursery',  # type: ignore  # noqa
-    subactor: Actor,
-    errors: Dict[Tuple[str, str], Exception],
-    # passed through to actor main
-    bind_addr: Tuple[str, int],
-    parent_addr: Tuple[str, int],
-    _runtime_vars: Dict[str, Any],  # serialized and sent to _child
-    *,
-    task_status: TaskStatus[Portal] = trio.TASK_STATUS_IGNORED
-
-) -> None:
-    async with trio.open_nursery() as nursery:
-        assert _ctx
-        start_method = _ctx.get_start_method()
-        if start_method == 'forkserver':
-            # XXX do our hackery on the stdlib to avoid multiple
-            # forkservers (one at each subproc layer).
-            fs = forkserver._forkserver
-            curr_actor = current_actor()
-            if is_main_process() and not curr_actor._forkserver_info:
-                # if we're the "main" process start the forkserver
-                # only once and pass its ipc info to downstream
-                # children
-                # forkserver.set_forkserver_preload(enable_modules)
-                forkserver.ensure_running()
-                fs_info = (
-                    fs._forkserver_address,
-                    fs._forkserver_alive_fd,
-                    getattr(fs, '_forkserver_pid', None),
-                    getattr(
-                        resource_tracker._resource_tracker, '_pid', None),
-                    resource_tracker._resource_tracker._fd,
-                )
-            else:
-                assert curr_actor._forkserver_info
-                fs_info = (
-                    fs._forkserver_address,
-                    fs._forkserver_alive_fd,
-                    fs._forkserver_pid,
-                    resource_tracker._resource_tracker._pid,
-                    resource_tracker._resource_tracker._fd,
-                 ) = curr_actor._forkserver_info
-        else:
-            fs_info = (None, None, None, None, None)
-
-        proc: mp.Process = _ctx.Process(  # type: ignore
-            target=_mp_main,
-            args=(
-                subactor,
-                bind_addr,
-                fs_info,
-                start_method,
-                parent_addr,
-            ),
-            # daemon=True,
-            name=name,
-        )
-        # `multiprocessing` only (since no async interface):
-        # register the process before start in case we get a cancel
-        # request before the actor has fully spawned - then we can wait
-        # for it to fully come up before sending a cancel request
-        actor_nursery._children[subactor.uid] = (subactor, proc, None)
-
-        proc.start()
-        if not proc.is_alive():
-            raise ActorFailure("Couldn't start sub-actor?")
-
-        log.runtime(f"Started {proc}")
-
        try:
+            # TODO: needs ``trio_typing`` patch?
+            proc = await trio.lowlevel.open_process(spawn_cmd)
+
+            log.runtime(f"Started {proc}")
+
            # wait for actor to spawn and connect back to us
            # channel should have handshake completed by the
            # local actor by the time we get a ref to it
            event, chan = await actor_nursery._actor.wait_for_peer(
                subactor.uid)
-            portal = Portal(chan)
-            actor_nursery._children[subactor.uid] = (subactor, proc, portal)

-            # unblock parent task
-            task_status.started(portal)
+        except trio.Cancelled:
+            cancelled_during_spawn = True
+            # we may cancel before the child connects back in which
+            # case avoid clobbering the pdb tty.
+            if debug_mode():
+                with trio.CancelScope(shield=True):
+                    # don't clobber an ongoing pdb
+                    if is_root_process():
+                        await maybe_wait_for_debugger()

-            # wait for ``ActorNursery`` block to signal that
-            # subprocesses can be waited upon.
-            # This is required to ensure synchronization
-            # with user code that may want to manually await results
-            # from nursery spawned sub-actors. We don't want the
-            # containing nurseries here to collect results or error
-            # while user code is still doing it's thing. Only after the
-            # nursery block closes do we allow subactor results to be
-            # awaited and reported upwards to the supervisor.
+                    elif proc is not None:
+                        async with acquire_debug_lock(subactor.uid):
+                            # soft wait on the proc to terminate
+                            with trio.move_on_after(0.5):
+                                await proc.wait()
+            raise
+
+        # a sub-proc ref **must** exist now
+        assert proc
+
+        portal = Portal(chan)
+        actor_nursery._children[subactor.uid] = (
+            subactor,
+            proc,
+            portal,
+        )
+
+        # send additional init params
+        await chan.send({
+            "_parent_main_data": subactor._parent_main_data,
+            "enable_modules": subactor.enable_modules,
+            "_arb_addr": subactor._arb_addr,
+            "bind_host": bind_addr[0],
+            "bind_port": bind_addr[1],
+            "_runtime_vars": _runtime_vars,
+        })
+
+        # track subactor in current nursery
+        curr_actor = current_actor()
+        curr_actor._actoruid2nursery[subactor.uid] = actor_nursery
+
+        # resume caller at next checkpoint now that child is up
+        task_status.started(portal)
+
+        # wait for ActorNursery.wait() to be called
+        with trio.CancelScope(shield=True):
            await actor_nursery._join_procs.wait()

-        finally:
-            # XXX: in the case we were cancelled before the sub-proc
-            # registered itself back we must be sure to try and clean
-            # any process we may have started.
-
-            reaping_cancelled: bool = False
-            cancel_scope: Optional[trio.CancelScope] = None
-            cancel_exc: Optional[trio.Cancelled] = None
-
+        async with trio.open_nursery() as nursery:
            if portal in actor_nursery._cancel_after_result_on_exit:
-                try:
-                    # async with trio.open_nursery() as n:
-                    # n.cancel_scope.shield = True
-                    cancel_scope = await nursery.start(
-                        cancel_on_completion,
-                        portal,
-                        subactor,
-                        errors
-                    )
-                except trio.Cancelled as err:
-                    cancel_exc = err
+                nursery.start_soon(
+                    cancel_on_completion,
+                    portal,
+                    subactor,
+                    errors
+                )

-                    # if the reaping task was cancelled we may have hit
-                    # a race where the subproc disconnected before we
-                    # could send it a message to cancel (classic 2 generals)
-                    # in that case, wait shortly then kill the process.
-                    reaping_cancelled = True
-
-                    if proc.is_alive():
-                        with trio.move_on_after(0.1) as cs:
-                            cs.shield = True
-                            await proc_waiter(proc)
-
-                        if cs.cancelled_caught:
-                            proc.terminate()
-
-            if not reaping_cancelled and proc.is_alive():
-                await proc_waiter(proc)
-
-            # TODO: timeout block here?
-            proc.join()
-
-            log.debug(f"Joined {proc}")
-            # pop child entry to indicate we are no longer managing subactor
-            subactor, proc, portal = actor_nursery._children.pop(subactor.uid)
+            # This is a "soft" (cancellable) join/reap which
+            # will remote cancel the actor on a ``trio.Cancelled``
+            # condition.
+            await soft_wait(
+                proc,
+                trio.Process.wait,
+                portal
+            )

            # cancel result waiter that may have been spawned in
            # tandem if not done already
-            if cancel_scope:
-                log.warning(
-                    "Cancelling existing result waiter task for "
-                    f"{subactor.uid}")
-                cancel_scope.cancel()
+            log.warning(
+                "Cancelling existing result waiter task for "
+                f"{subactor.uid}")
+            nursery.cancel_scope.cancel()

-            elif reaping_cancelled:  # let the cancellation bubble up
-                assert cancel_exc
-                raise cancel_exc
+    finally:
+        # XXX NOTE XXX: The "hard" reap since no actor zombies are
+        # allowed! Do this **after** cancellation/teardown to avoid
+        # killing the process too early.
+        if proc:
+            log.cancel(f'Hard reap sequence starting for {subactor.uid}')
+            with trio.CancelScope(shield=True):
+
+                # don't clobber an ongoing pdb
+                if cancelled_during_spawn:
+                    # Try again to avoid TTY clobbering.
+                    async with acquire_debug_lock(subactor.uid):
+                        with trio.move_on_after(0.5):
+                            await proc.wait()
+
+                if is_root_process():
+                    # TODO: solve the following issue where we need
+                    # to do a similar wait like this but in an
+                    # "intermediary" parent actor that itself isn't
+                    # in debug but has a child that is, and we need
+                    # to hold off on relaying SIGINT until that child
+                    # is complete.
+                    # https://github.com/goodboy/tractor/issues/320
+                    await maybe_wait_for_debugger(
+                        child_in_debug=_runtime_vars.get(
+                            '_debug_mode', False),
+                    )
+
+                if proc.poll() is None:
+                    log.cancel(f"Attempting to hard kill {proc}")
+                    await do_hard_kill(proc)
+
+                log.debug(f"Joined {proc}")
+        else:
+            log.warning('Nursery cancelled before sub-proc started')
+
+        if not cancelled_during_spawn:
+            # pop child entry to indicate we no longer managing this
+            # subactor
+            actor_nursery._children.pop(subactor.uid)
+
+
+async def mp_proc(
+    name: str,
+    actor_nursery: ActorNursery,  # type: ignore  # noqa
+    subactor: Actor,
+    errors: dict[tuple[str, str], Exception],
+    # passed through to actor main
+    bind_addr: tuple[str, int],
+    parent_addr: tuple[str, int],
+    _runtime_vars: dict[str, Any],  # serialized and sent to _child
+    *,
+    infect_asyncio: bool = False,
+    task_status: TaskStatus[Portal] = trio.TASK_STATUS_IGNORED
+
+) -> None:
+
+    # uggh zone
+    try:
+        from multiprocessing import semaphore_tracker  # type: ignore
+        resource_tracker = semaphore_tracker
+        resource_tracker._resource_tracker = resource_tracker._semaphore_tracker  # noqa
+    except ImportError:
+        # 3.8 introduces a more general version that also tracks shared mems
+        from multiprocessing import resource_tracker  # type: ignore
+
+    assert _ctx
+    start_method = _ctx.get_start_method()
+    if start_method == 'forkserver':
+
+        from multiprocessing import forkserver  # type: ignore
+        # XXX do our hackery on the stdlib to avoid multiple
+        # forkservers (one at each subproc layer).
+        fs = forkserver._forkserver
+        curr_actor = current_actor()
+        if is_main_process() and not curr_actor._forkserver_info:
+            # if we're the "main" process start the forkserver
+            # only once and pass its ipc info to downstream
+            # children
+            # forkserver.set_forkserver_preload(enable_modules)
+            forkserver.ensure_running()
+            fs_info = (
+                fs._forkserver_address,  # type: ignore  # noqa
+                fs._forkserver_alive_fd,  # type: ignore  # noqa
+                getattr(fs, '_forkserver_pid', None),
+                getattr(
+                    resource_tracker._resource_tracker, '_pid', None),
+                resource_tracker._resource_tracker._fd,
+            )
+        else:  # request to forkerserver to fork a new child
+            assert curr_actor._forkserver_info
+            fs_info = (
+                fs._forkserver_address,  # type: ignore  # noqa
+                fs._forkserver_alive_fd,  # type: ignore  # noqa
+                fs._forkserver_pid,  # type: ignore  # noqa
+                resource_tracker._resource_tracker._pid,
+                resource_tracker._resource_tracker._fd,
+             ) = curr_actor._forkserver_info
+    else:
+        # spawn method
+        fs_info = (None, None, None, None, None)
+
+    proc: mp.Process = _ctx.Process(  # type: ignore
+        target=_mp_main,
+        args=(
+            subactor,
+            bind_addr,
+            fs_info,
+            _spawn_method,
+            parent_addr,
+            infect_asyncio,
+        ),
+        # daemon=True,
+        name=name,
+    )
+
+    # `multiprocessing` only (since no async interface):
+    # register the process before start in case we get a cancel
+    # request before the actor has fully spawned - then we can wait
+    # for it to fully come up before sending a cancel request
+    actor_nursery._children[subactor.uid] = (subactor, proc, None)
+
+    proc.start()
+    if not proc.is_alive():
+        raise ActorFailure("Couldn't start sub-actor?")
+
+    log.runtime(f"Started {proc}")
+
+    try:
+        # wait for actor to spawn and connect back to us
+        # channel should have handshake completed by the
+        # local actor by the time we get a ref to it
+        event, chan = await actor_nursery._actor.wait_for_peer(
+            subactor.uid)
+
+        # XXX: monkey patch poll API to match the ``subprocess`` API..
+        # not sure why they don't expose this but kk.
+        proc.poll = lambda: proc.exitcode  # type: ignore
+
+    # except:
+        # TODO: in the case we were cancelled before the sub-proc
+        # registered itself back we must be sure to try and clean
+        # any process we may have started.
+
+        portal = Portal(chan)
+        actor_nursery._children[subactor.uid] = (subactor, proc, portal)
+
+        # unblock parent task
+        task_status.started(portal)
+
+        # wait for ``ActorNursery`` block to signal that
+        # subprocesses can be waited upon.
+        # This is required to ensure synchronization
+        # with user code that may want to manually await results
+        # from nursery spawned sub-actors. We don't want the
+        # containing nurseries here to collect results or error
+        # while user code is still doing it's thing. Only after the
+        # nursery block closes do we allow subactor results to be
+        # awaited and reported upwards to the supervisor.
+        with trio.CancelScope(shield=True):
+            await actor_nursery._join_procs.wait()
+
+        async with trio.open_nursery() as nursery:
+            if portal in actor_nursery._cancel_after_result_on_exit:
+                nursery.start_soon(
+                    cancel_on_completion,
+                    portal,
+                    subactor,
+                    errors
+                )
+
+            # This is a "soft" (cancellable) join/reap which
+            # will remote cancel the actor on a ``trio.Cancelled``
+            # condition.
+            await soft_wait(
+                proc,
+                proc_waiter,
+                portal
+            )
+
+            # cancel result waiter that may have been spawned in
+            # tandem if not done already
+            log.warning(
+                "Cancelling existing result waiter task for "
+                f"{subactor.uid}")
+            nursery.cancel_scope.cancel()
+
+    finally:
+        # hard reap sequence
+        if proc.is_alive():
+            log.cancel(f"Attempting to hard kill {proc}")
+            with trio.move_on_after(0.1) as cs:
+                cs.shield = True
+                await proc_waiter(proc)
+
+            if cs.cancelled_caught:
+                proc.terminate()
+
+        proc.join()
+        log.debug(f"Joined {proc}")
+
+        # pop child entry to indicate we are no longer managing subactor
+        actor_nursery._children.pop(subactor.uid)
+
+        # TODO: prolly report to ``mypy`` how this causes all sorts of
+        # false errors..
+        # subactor, proc, portal = actor_nursery._children.pop(subactor.uid)
+
+
+# proc spawning backend target map
+_methods: dict[SpawnMethodKey, Callable] = {
+    'trio': trio_proc,
+    'mp_spawn': mp_proc,
+    'mp_forkserver': mp_proc,
+}
--- a/tractor/_state.py
+++ b/tractor/_state.py
@ -1,9 +1,27 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 """
 Per process state
+
 """
-from typing import Optional, Dict, Any
-from collections.abc import Mapping
-import multiprocessing as mp
+from typing import (
+    Optional,
+    Any,
+)

 import trio

@ -11,7 +29,7 @@ from ._exceptions import NoRuntime


 _current_actor: Optional['Actor'] = None  # type: ignore # noqa
-_runtime_vars: Dict[str, Any] = {
+_runtime_vars: dict[str, Any] = {
    '_debug_mode': False,
    '_is_root': False,
    '_root_mailbox': (None, None)
@ -27,33 +45,10 @@ def current_actor(err_on_no_runtime: bool = True) -> 'Actor':  # type: ignore #
    return _current_actor


-_conc_name_getters = {
-    'task': trio.lowlevel.current_task,
-    'actor': current_actor
-}
-
-
-class ActorContextInfo(Mapping):
-    "Dyanmic lookup for local actor and task names"
-    _context_keys = ('task', 'actor')
-
-    def __len__(self):
-        return len(self._context_keys)
-
-    def __iter__(self):
-        return iter(self._context_keys)
-
-    def __getitem__(self, key: str) -> str:
-        try:
-            return _conc_name_getters[key]().name  # type: ignore
-        except RuntimeError:
-            # no local actor/task context initialized yet
-            return f'no {key} context'
-
-
 def is_main_process() -> bool:
    """Bool determining if this actor is running in the top-most process.
    """
+    import multiprocessing as mp
    return mp.current_process().name == 'MainProcess'


--- a/tractor/_streaming.py
+++ b/tractor/_streaming.py
@ -1,13 +1,33 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 """
 Message stream types and APIs.

 """
+from __future__ import annotations
 import inspect
-from contextlib import contextmanager, asynccontextmanager
+from contextlib import asynccontextmanager
 from dataclasses import dataclass
 from typing import (
-    Any, Iterator, Optional, Callable,
-    AsyncGenerator, Dict,
+    Any,
+    Optional,
+    Callable,
+    AsyncGenerator,
+    AsyncIterator
 )

 import warnings
@ -18,42 +38,50 @@ from ._ipc import Channel
 from ._exceptions import unpack_error, ContextCancelled
 from ._state import current_actor
 from .log import get_logger
+from .trionics import broadcast_receiver, BroadcastReceiver


 log = get_logger(__name__)


-# TODO: generic typing like trio's receive channel
-# but with msgspec messages?
-# class ReceiveChannel(AsyncResource, Generic[ReceiveType]):
+# TODO: the list
+# - generic typing like trio's receive channel but with msgspec
+#   messages? class ReceiveChannel(AsyncResource, Generic[ReceiveType]):
+# - use __slots__ on ``Context``?


-class ReceiveMsgStream(trio.abc.ReceiveChannel):
-    """A wrapper around a ``trio._channel.MemoryReceiveChannel`` with
-    special behaviour for signalling stream termination across an
-    inter-actor ``Channel``. This is the type returned to a local task
-    which invoked a remote streaming function using `Portal.run()`.
+class MsgStream(trio.abc.Channel):
+    '''
+    A bidirectional message stream for receiving logically sequenced
+    values over an inter-actor IPC ``Channel``.
+
+    This is the type returned to a local task which entered either
+    ``Portal.open_stream_from()`` or ``Context.open_stream()``.

    Termination rules:

-    - if the local task signals stop iteration a cancel signal is
-      relayed to the remote task indicating to stop streaming
-    - if the remote task signals the end of a stream, raise
-      a ``StopAsyncIteration`` to terminate the local ``async for``
+    - on cancellation the stream is **not** implicitly closed and the
+      surrounding ``Context`` is expected to handle how that cancel
+      is relayed to any task on the remote side.
+    - if the remote task signals the end of a stream the
+      ``ReceiveChannel`` semantics dictate that a ``StopAsyncIteration``
+      to terminate the local ``async for``.

-    """
+    '''
    def __init__(
        self,
        ctx: 'Context',  # typing: ignore # noqa
-        rx_chan: trio.abc.ReceiveChannel,
-        shield: bool = False,
+        rx_chan: trio.MemoryReceiveChannel,
+        _broadcaster: Optional[BroadcastReceiver] = None,
+
    ) -> None:
        self._ctx = ctx
        self._rx_chan = rx_chan
-        self._shielded = shield
+        self._broadcaster = _broadcaster

        # flag to denote end of stream
        self._eoc: bool = False
+        self._closed: bool = False

    # delegate directly to underlying mem channel
    def receive_nowait(self):
@ -61,17 +89,23 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):
        return msg['yield']

    async def receive(self):
+        '''Async receive a single msg from the IPC transport, the next
+        in sequence for this stream.
+
+        '''
        # see ``.aclose()`` for notes on the old behaviour prior to
        # introducing this
        if self._eoc:
            raise trio.EndOfChannel

-        try:
+        if self._closed:
+            raise trio.ClosedResourceError('This stream was closed')

+        try:
            msg = await self._rx_chan.receive()
            return msg['yield']

-        except KeyError:
+        except KeyError as err:
            # internal error should never get here
            assert msg.get('cid'), ("Received internal error at portal?")

@ -80,9 +114,18 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):
            # - 'error'
            # possibly just handle msg['stop'] here!

-            if msg.get('stop'):
+            if self._closed:
+                raise trio.ClosedResourceError('This stream was closed')
+
+            if msg.get('stop') or self._eoc:
                log.debug(f"{self} was stopped at remote end")

+                # XXX: important to set so that a new ``.receive()``
+                # call (likely by another task using a broadcast receiver)
+                # doesn't accidentally pull the ``return`` message
+                # value out of the underlying feed mem chan!
+                self._eoc = True
+
                # # when the send is closed we assume the stream has
                # # terminated and signal this local iterator to stop
                # await self.aclose()
@ -90,7 +133,7 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):
                # XXX: this causes ``ReceiveChannel.__anext__()`` to
                # raise a ``StopAsyncIteration`` **and** in our catch
                # block below it will trigger ``.aclose()``.
-                raise trio.EndOfChannel
+                raise trio.EndOfChannel from err

            # TODO: test that shows stream raising an expected error!!!
            elif msg.get('error'):
@ -103,7 +146,6 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):
        except (
            trio.ClosedResourceError,  # by self._rx_chan
            trio.EndOfChannel,  # by self._rx_chan or `stop` msg from far end
-            trio.Cancelled,  # by local cancellation
        ):
            # XXX: we close the stream on any of these error conditions:

@ -135,53 +177,24 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):

            raise  # propagate

-    @contextmanager
-    def shield(
-        self
-    ) -> Iterator['ReceiveMsgStream']:  # noqa
-        """Shield this stream's underlying channel such that a local consumer task
-        can be cancelled (and possibly restarted) using ``trio.Cancelled``.
-
-        Note that here, "shielding" here guards against relaying
-        a ``'stop'`` message to the far end of the stream thus keeping
-        the stream machinery active and ready for further use, it does
-        not have anything to do with an internal ``trio.CancelScope``.
-
-        """
-        self._shielded = True
-        yield self
-        self._shielded = False
-
    async def aclose(self):
-        """Cancel associated remote actor task and local memory channel
-        on close.
+        '''
+        Cancel associated remote actor task and local memory channel on
+        close.

-        """
+        '''
        # XXX: keep proper adherance to trio's `.aclose()` semantics:
        # https://trio.readthedocs.io/en/stable/reference-io.html#trio.abc.AsyncResource.aclose
        rx_chan = self._rx_chan

        if rx_chan._closed:
-            log.warning(f"{self} is already closed")
+            log.cancel(f"{self} is already closed")

            # this stream has already been closed so silently succeed as
            # per ``trio.AsyncResource`` semantics.
            # https://trio.readthedocs.io/en/stable/reference-io.html#trio.abc.AsyncResource.aclose
            return

-        # TODO: broadcasting to multiple consumers
-        # stats = rx_chan.statistics()
-        # if stats.open_receive_channels > 1:
-        #     # if we've been cloned don't kill the stream
-        #     log.debug(
-        #       "there are still consumers running keeping stream alive")
-        #     return
-
-        if self._shielded:
-            log.warning(f"{self} is shielded, portal channel being kept alive")
-            return
-
-        # XXX: This must be set **AFTER** the shielded test above!
        self._eoc = True

        # NOTE: this is super subtle IPC messaging stuff:
@ -199,27 +212,36 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):

        # In the bidirectional case, `Context.open_stream()` will create
        # the `Actor._cids2qs` entry from a call to
-        # `Actor.get_memchans()` and will send the stop message in
-        # ``__aexit__()`` on teardown so it **does not** need to be
-        # called here.
-        if not self._ctx._portal:
-            try:
-                # only for 2 way streams can we can send
-                # stop from the caller side
+        # `Actor.get_context()` and will call us here to send the stop
+        # msg in ``__aexit__()`` on teardown.
+        try:
+            # NOTE: if this call is cancelled we expect this end to
+            # handle as though the stop was never sent (though if it
+            # was it shouldn't matter since it's unlikely a user
+            # will try to re-use a stream after attemping to close
+            # it).
+            with trio.CancelScope(shield=True):
                await self._ctx.send_stop()

-            except (
-                trio.BrokenResourceError,
-                trio.ClosedResourceError
-            ):
-                # the underlying channel may already have been pulled
-                # in which case our stop message is meaningless since
-                # it can't traverse the transport.
-                log.debug(f'Channel for {self} was already closed')
+        except (
+            trio.BrokenResourceError,
+            trio.ClosedResourceError
+        ):
+            # the underlying channel may already have been pulled
+            # in which case our stop message is meaningless since
+            # it can't traverse the transport.
+            ctx = self._ctx
+            log.warning(
+                f'Stream was already destroyed?\n'
+                f'actor: {ctx.chan.uid}\n'
+                f'ctx id: {ctx.cid}'
+            )

-        # close the local mem chan ``self._rx_chan`` ??!?
+        self._closed = True

-        # DEFINITELY NOT if we're a bi-dir ``MsgStream``!
+        # Do we close the local mem chan ``self._rx_chan`` ??!?
+
+        # NO, DEFINITELY NOT if we're a bi-dir ``MsgStream``!
        # BECAUSE this same core-msg-loop mem recv-chan is used to deliver
        # the potential final result from the surrounding inter-actor
        # `Context` so we don't want to close it until that context has
@ -253,61 +275,123 @@ class ReceiveMsgStream(trio.abc.ReceiveChannel):
        # still need to consume msgs that are "in transit" from the far
        # end (eg. for ``Context.result()``).

+    @asynccontextmanager
+    async def subscribe(
+        self,

-class MsgStream(ReceiveMsgStream, trio.abc.Channel):
-    """
-    Bidirectional message stream for use within an inter-actor actor
-    ``Context```.
+    ) -> AsyncIterator[BroadcastReceiver]:
+        '''
+        Allocate and return a ``BroadcastReceiver`` which delegates
+        to this message stream.
+
+        This allows multiple local tasks to receive each their own copy
+        of this message stream.
+
+        This operation is indempotent and and mutates this stream's
+        receive machinery to copy and window-length-store each received
+        value from the far end via the internally created broudcast
+        receiver wrapper.
+
+        '''
+        # NOTE: This operation is indempotent and non-reversible, so be
+        # sure you can deal with any (theoretical) overhead of the the
+        # allocated ``BroadcastReceiver`` before calling this method for
+        # the first time.
+        if self._broadcaster is None:
+
+            bcast = self._broadcaster = broadcast_receiver(
+                self,
+                # use memory channel size by default
+                self._rx_chan._state.max_buffer_size,  # type: ignore
+                receive_afunc=self.receive,
+            )
+
+            # NOTE: we override the original stream instance's receive
+            # method to now delegate to the broadcaster's ``.receive()``
+            # such that new subscribers will be copied received values
+            # and this stream doesn't have to expect it's original
+            # consumer(s) to get a new broadcast rx handle.
+            self.receive = bcast.receive  # type: ignore
+            # seems there's no graceful way to type this with ``mypy``?
+            # https://github.com/python/mypy/issues/708
+
+        async with self._broadcaster.subscribe() as bstream:
+            assert bstream.key != self._broadcaster.key
+            assert bstream._recv == self._broadcaster._recv
+
+            # NOTE: we patch on a `.send()` to the bcaster so that the
+            # caller can still conduct 2-way streaming using this
+            # ``bstream`` handle transparently as though it was the msg
+            # stream instance.
+            bstream.send = self.send  # type: ignore
+
+            yield bstream

-    """
    async def send(
        self,
        data: Any
    ) -> None:
-        '''Send a message over this stream to the far end.
+        '''
+        Send a message over this stream to the far end.

        '''
+        if self._ctx._error:
+            raise self._ctx._error  # from None
+
+        if self._closed:
+            raise trio.ClosedResourceError('This stream was already closed')
+
        await self._ctx.chan.send({'yield': data, 'cid': self._ctx.cid})

-    # TODO: but make it broadcasting to consumers
-    def clone(self):
-        """Clone this receive channel allowing for multi-task
-        consumption from the same channel.
-
-        """
-        return MsgStream(
-            self._ctx,
-            self._rx_chan.clone(),
-        )
-

@dataclass
 class Context:
-    '''An inter-actor task communication context.
+    '''
+    An inter-actor, ``trio`` task communication context.
+
+    NB: This class should never be instatiated directly, it is delivered
+    by either runtime machinery to a remotely started task or by entering
+    ``Portal.open_context()``.

    Allows maintaining task or protocol specific state between
    2 communicating actor tasks. A unique context is created on the
    callee side/end for every request to a remote actor from a portal.

    A context can be cancelled and (possibly eventually restarted) from
-    either side of the underlying IPC channel.
-
-    A context can be used to open task oriented message streams and can
-    be thought of as an IPC aware inter-actor cancel scope.
+    either side of the underlying IPC channel, open task oriented
+    message streams and acts as an IPC aware inter-actor-task cancel
+    scope.

    '''
    chan: Channel
    cid: str

+    # these are the "feeder" channels for delivering
+    # message values to the local task from the runtime
+    # msg processing loop.
+    _recv_chan: trio.MemoryReceiveChannel
+    _send_chan: trio.MemorySendChannel
+
+    _remote_func_type: Optional[str] = None
+
    # only set on the caller side
    _portal: Optional['Portal'] = None    # type: ignore # noqa
-    _recv_chan: Optional[trio.MemoryReceiveChannel] = None
    _result: Optional[Any] = False
+    _error: Optional[BaseException] = None
+
+    # status flags
    _cancel_called: bool = False
+    _cancel_msg: Optional[str] = None
+    _enter_debugger_on_cancel: bool = True
+    _started_called: bool = False
+    _started_received: bool = False
+    _stream_opened: bool = False

    # only set on the callee side
    _scope_nursery: Optional[trio.Nursery] = None

+    _backpressure: bool = False
+
    async def send_yield(self, data: Any) -> None:

        warnings.warn(
@ -321,34 +405,82 @@ class Context:
    async def send_stop(self) -> None:
        await self.chan.send({'stop': True, 'cid': self.cid})

-    def _error_from_remote_msg(
+    async def _maybe_raise_from_remote_msg(
        self,
-        msg: Dict[str, Any],
+        msg: dict[str, Any],

    ) -> None:
-        '''Unpack and raise a msg error into the local scope
+        '''
+        (Maybe) unpack and raise a msg error into the local scope
        nursery for this context.

        Acts as a form of "relay" for a remote error raised
        in the corresponding remote callee task.
+
        '''
-        assert self._scope_nursery
+        error = msg.get('error')
+        if error:
+            # If this is an error message from a context opened by
+            # ``Portal.open_context()`` we want to interrupt any ongoing
+            # (child) tasks within that context to be notified of the remote
+            # error relayed here.
+            #
+            # The reason we may want to raise the remote error immediately
+            # is that there is no guarantee the associated local task(s)
+            # will attempt to read from any locally opened stream any time
+            # soon.
+            #
+            # NOTE: this only applies when
+            # ``Portal.open_context()`` has been called since it is assumed
+            # (currently) that other portal APIs (``Portal.run()``,
+            # ``.run_in_actor()``) do their own error checking at the point
+            # of the call and result processing.
+            log.error(
+                f'Remote context error for {self.chan.uid}:{self.cid}:\n'
+                f'{msg["error"]["tb_str"]}'
+            )
+            error = unpack_error(msg, self.chan)
+            if (
+                isinstance(error, ContextCancelled) and
+                self._cancel_called
+            ):
+                # this is an expected cancel request response message
+                # and we don't need to raise it in scope since it will
+                # potentially override a real error
+                return

-        async def raiser():
-            raise unpack_error(msg, self.chan)
+            self._error = error

-        self._scope_nursery.start_soon(raiser)
+            # TODO: tempted to **not** do this by-reraising in a
+            # nursery and instead cancel a surrounding scope, detect
+            # the cancellation, then lookup the error that was set?
+            if self._scope_nursery:

-    async def cancel(self) -> None:
-        '''Cancel this inter-actor-task context.
+                async def raiser():
+                    raise self._error from None
+
+                # from trio.testing import wait_all_tasks_blocked
+                # await wait_all_tasks_blocked()
+                if not self._scope_nursery._closed:  # type: ignore
+                    self._scope_nursery.start_soon(raiser)
+
+    async def cancel(
+        self,
+        msg: Optional[str] = None,
+
+    ) -> None:
+        '''
+        Cancel this inter-actor-task context.

        Request that the far side cancel it's current linked context,
        Timeout quickly in an attempt to sidestep 2-generals...

        '''
        side = 'caller' if self._portal else 'callee'
+        if msg:
+            assert side == 'callee', 'Only callee side can provide cancel msg'

-        log.warning(f'Cancelling {side} side of context to {self.chan}')
+        log.cancel(f'Cancelling {side} side of context to {self.chan.uid}')

        self._cancel_called = True

@ -361,7 +493,7 @@ class Context:
            cid = self.cid
            with trio.move_on_after(0.5) as cs:
                cs.shield = True
-                log.warning(
+                log.cancel(
                    f"Cancelling stream {cid} to "
                    f"{self._portal.channel.uid}")

@ -376,11 +508,17 @@ class Context:
                # some other network error occurred.
                # if not self._portal.channel.connected():
                if not self.chan.connected():
-                    log.warning(
+                    log.cancel(
                        "May have failed to cancel remote task "
                        f"{cid} for {self._portal.channel.uid}")
+                else:
+                    log.cancel(
+                        "Timed out on cancelling remote task "
+                        f"{cid} for {self._portal.channel.uid}")
+
+        # callee side remote task
        else:
-            # callee side remote task
+            self._cancel_msg = msg

            # TODO: should we have an explicit cancel message
            # or is relaying the local `trio.Cancelled` as an
@ -397,10 +535,12 @@ class Context:
    async def open_stream(

        self,
-        shield: bool = False,
+        backpressure: Optional[bool] = True,
+        msg_buffer_size: Optional[int] = None,

    ) -> AsyncGenerator[MsgStream, None]:
-        '''Open a ``MsgStream``, a bi-directional stream connected to the
+        '''
+        Open a ``MsgStream``, a bi-directional stream connected to the
        cross-actor (far end) task for this ``Context``.

        This context manager must be entered on both the caller and
@ -423,16 +563,6 @@ class Context:
        # here we create a mem chan that corresponds to the
        # far end caller / callee.

-        # NOTE: in one way streaming this only happens on the
-        # caller side inside `Actor.send_cmd()` so if you try
-        # to send a stop from the caller to the callee in the
-        # single-direction-stream case you'll get a lookup error
-        # currently.
-        _, recv_chan = actor.get_memchans(
-            self.chan.uid,
-            self.cid
-        )
-
        # Likewise if the surrounding context has been cancelled we error here
        # since it likely means the surrounding block was exited or
        # killed
@ -443,47 +573,68 @@ class Context:
                f'Context around {actor.uid[0]}:{task} was already cancelled!'
            )

+        if not self._portal and not self._started_called:
+            raise RuntimeError(
+                'Context.started()` must be called before opening a stream'
+            )
+
+        # NOTE: in one way streaming this only happens on the
+        # caller side inside `Actor.start_remote_task()` so if you try
+        # to send a stop from the caller to the callee in the
+        # single-direction-stream case you'll get a lookup error
+        # currently.
+        ctx = actor.get_context(
+            self.chan,
+            self.cid,
+            msg_buffer_size=msg_buffer_size,
+        )
+        ctx._backpressure = backpressure
+        assert ctx is self
+
        # XXX: If the underlying channel feeder receive mem chan has
        # been closed then likely client code has already exited
        # a ``.open_stream()`` block prior or there was some other
        # unanticipated error or cancellation from ``trio``.

-        if recv_chan._closed:
+        if ctx._recv_chan._closed:
            raise trio.ClosedResourceError(
                'The underlying channel for this stream was already closed!?')

        async with MsgStream(
            ctx=self,
-            rx_chan=recv_chan,
-            shield=shield,
-        ) as rchan:
+            rx_chan=ctx._recv_chan,
+        ) as stream:

            if self._portal:
-                self._portal._streams.add(rchan)
+                self._portal._streams.add(stream)

            try:
-                # ensure we aren't cancelled before delivering
-                # the stream
+                self._stream_opened = True
+
+                # XXX: do we need this?
+                # ensure we aren't cancelled before yielding the stream
                # await trio.lowlevel.checkpoint()
-                yield rchan
+                yield stream

-            except trio.EndOfChannel:
-                # likely the far end sent us a 'stop' message to
-                # terminate the stream.
-                raise
-
-            else:
-                # XXX: Make the stream "one-shot use".  On exit, signal
+                # NOTE: Make the stream "one-shot use".  On exit, signal
                # ``trio.EndOfChannel``/``StopAsyncIteration`` to the
                # far end.
-                await self.send_stop()
+                await stream.aclose()

            finally:
                if self._portal:
-                    self._portal._streams.remove(rchan)
+                    try:
+                        self._portal._streams.remove(stream)
+                    except KeyError:
+                        log.warning(
+                            f'Stream was already destroyed?\n'
+                            f'actor: {self.chan.uid}\n'
+                            f'ctx id: {self.cid}'
+                        )

    async def result(self) -> Any:
-        '''From a caller side, wait for and return the final result from
+        '''
+        From a caller side, wait for and return the final result from
        the callee side task.

        '''
@ -503,12 +654,11 @@ class Context:
                    try:
                        self._result = msg['return']
                        break
-                    except KeyError:
+                    except KeyError as msgerr:

                        if 'yield' in msg:
-                            # far end task is still streaming to us..
-                            log.warning(f'Remote stream deliverd {msg}')
-                            # do disard
+                            # far end task is still streaming to us so discard
+                            log.warning(f'Discarding stream delivered {msg}')
                            continue

                        elif 'stop' in msg:
@ -518,17 +668,36 @@ class Context:
                        # internal error should never get here
                        assert msg.get('cid'), (
                            "Received internal error at portal?")
-                        raise unpack_error(msg, self._portal.channel)
+
+                        raise unpack_error(
+                            msg, self._portal.channel
+                        ) from msgerr

        return self._result

-    async def started(self, value: Optional[Any] = None) -> None:
+    async def started(
+        self,
+        value: Optional[Any] = None

+    ) -> None:
+        '''
+        Indicate to calling actor's task that this linked context
+        has started and send ``value`` to the other side.
+
+        On the calling side ``value`` is the second item delivered
+        in the tuple returned by ``Portal.open_context()``.
+
+        '''
        if self._portal:
            raise RuntimeError(
                f"Caller side context {self} can not call started!")

+        elif self._started_called:
+            raise RuntimeError(
+                f"called 'started' twice on context with {self.chan.uid}")
+
        await self.chan.send({'started': value, 'cid': self.cid})
+        self._started_called = True

    # TODO: do we need a restart api?
    # async def restart(self) -> None:
--- a/tractor/_supervise.py
+++ b/tractor/_supervise.py
@ -1,20 +1,40 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 """
 ``trio`` inspired apis and helpers
+
 """
+from contextlib import asynccontextmanager as acm
 from functools import partial
 import inspect
-import multiprocessing as mp
-from typing import Tuple, List, Dict, Optional
+from typing import (
+    Optional,
+    TYPE_CHECKING,
+)
 import typing
 import warnings

+from exceptiongroup import BaseExceptionGroup
 import trio
-from async_generator import asynccontextmanager

-from . import _debug
-from ._state import current_actor, is_main_process, is_root_process
+from ._debug import maybe_wait_for_debugger
+from ._state import current_actor, is_main_process
 from .log import get_logger, get_loglevel
-from ._actor import Actor
+from ._runtime import Actor
 from ._portal import Portal
 from ._exceptions import is_multi_cancelled
 from ._root import open_root_actor
@ -22,52 +42,98 @@ from . import _state
 from . import _spawn


+if TYPE_CHECKING:
+    import multiprocessing as mp
+
 log = get_logger(__name__)

-_default_bind_addr: Tuple[str, int] = ('127.0.0.1', 0)
+_default_bind_addr: tuple[str, int] = ('127.0.0.1', 0)


 class ActorNursery:
-    """Spawn scoped subprocess actors.
-    """
+    '''
+    The fundamental actor supervision construct: spawn and manage
+    explicit lifetime and capability restricted, bootstrapped,
+    ``trio.run()`` scheduled sub-processes.
+
+    Though the concept of a "process nursery" is different in complexity
+    and slightly different in semantics then a tradtional single
+    threaded task nursery, much of the interface is the same. New
+    processes each require a top level "parent" or "root" task which is
+    itself no different then any task started by a tradtional
+    ``trio.Nursery``. The main difference is that each "actor" (a
+    process + ``trio.run()``) contains a full, paralell executing
+    ``trio``-task-tree. The following super powers ensue:
+
+    - starting tasks in a child actor are completely independent of
+      tasks started in the current process. They execute in *parallel*
+      relative to tasks in the current process and are scheduled by their
+      own actor's ``trio`` run loop.
+    - tasks scheduled in a remote process still maintain an SC protocol
+      across memory boundaries using a so called "structured concurrency
+      dialogue protocol" which ensures task-hierarchy-lifetimes are linked.
+    - remote tasks (in another actor) can fail and relay failure back to
+      the caller task (in some other actor) via a seralized
+      ``RemoteActorError`` which means no zombie process or RPC
+      initiated task can ever go off on its own.
+
+    '''
    def __init__(
        self,
        actor: Actor,
        ria_nursery: trio.Nursery,
        da_nursery: trio.Nursery,
-        errors: Dict[Tuple[str, str], Exception],
+        errors: dict[tuple[str, str], BaseException],
    ) -> None:
        # self.supervisor = supervisor  # TODO
        self._actor: Actor = actor
        self._ria_nursery = ria_nursery
        self._da_nursery = da_nursery
-        self._children: Dict[
-            Tuple[str, str],
-            Tuple[Actor, mp.Process, Optional[Portal]]
+        self._children: dict[
+            tuple[str, str],
+            tuple[
+                Actor,
+                trio.Process | mp.Process,
+                Optional[Portal],
+            ]
        ] = {}
        # portals spawned with ``run_in_actor()`` are
        # cancelled when their "main" result arrives
        self._cancel_after_result_on_exit: set = set()
        self.cancelled: bool = False
        self._join_procs = trio.Event()
+        self._at_least_one_child_in_debug: bool = False
        self.errors = errors
+        self.exited = trio.Event()

    async def start_actor(
        self,
        name: str,
        *,
-        bind_addr: Tuple[str, int] = _default_bind_addr,
-        rpc_module_paths: List[str] = None,
-        enable_modules: List[str] = None,
-        loglevel: str = None,  # set log level per subactor
-        nursery: trio.Nursery = None,
+        bind_addr: tuple[str, int] = _default_bind_addr,
+        rpc_module_paths: list[str] | None = None,
+        enable_modules: list[str] | None = None,
+        loglevel: str | None = None,  # set log level per subactor
+        nursery: trio.Nursery | None = None,
+        debug_mode: Optional[bool] | None = None,
+        infect_asyncio: bool = False,
    ) -> Portal:
+        '''
+        Start a (daemon) actor: an process that has no designated
+        "main task" besides the runtime.
+
+        '''
        loglevel = loglevel or self._actor.loglevel or get_loglevel()

        # configure and pass runtime state
        _rtv = _state._runtime_vars.copy()
        _rtv['_is_root'] = False

+        # allow setting debug policy per actor
+        if debug_mode is not None:
+            _rtv['_debug_mode'] = debug_mode
+            self._at_least_one_child_in_debug = True
+
        enable_modules = enable_modules or []

        if rpc_module_paths:
@ -104,19 +170,25 @@ class ActorNursery:
                bind_addr,
                parent_addr,
                _rtv,  # run time vars
+                infect_asyncio=infect_asyncio,
            )
        )

    async def run_in_actor(
        self,
+
        fn: typing.Callable,
        *,
+
        name: Optional[str] = None,
-        bind_addr: Tuple[str, int] = _default_bind_addr,
-        rpc_module_paths: Optional[List[str]] = None,
-        enable_modules: List[str] = None,
-        loglevel: str = None,  # set log level per subactor
+        bind_addr: tuple[str, int] = _default_bind_addr,
+        rpc_module_paths: list[str] | None = None,
+        enable_modules: list[str] | None = None,
+        loglevel: str | None = None,  # set log level per subactor
+        infect_asyncio: bool = False,
+
        **kwargs,  # explicit args to ``fn``
+
    ) -> Portal:
        """Spawn a new actor, run a lone task, then terminate the actor and
        return its result.
@ -140,6 +212,7 @@ class ActorNursery:
            loglevel=loglevel,
            # use the run_in_actor nursery
            nursery=self._ria_nursery,
+            infect_asyncio=infect_asyncio,
        )

        # XXX: don't allow stream funcs
@ -168,7 +241,7 @@ class ActorNursery:
        """
        self.cancelled = True

-        log.warning(f"Cancelling nursery in {self._actor.uid}")
+        log.cancel(f"Cancelling nursery in {self._actor.uid}")
        with trio.move_on_after(3) as cs:

            async with trio.open_nursery() as nursery:
@ -206,7 +279,8 @@ class ActorNursery:

                        # spawn cancel tasks for each sub-actor
                        assert portal
-                        nursery.start_soon(portal.cancel_actor)
+                        if portal.channel.connected():
+                            nursery.start_soon(portal.cancel_actor)

        # if we cancelled the cancel (we hung cancelling remote actors)
        # then hard kill all sub-processes
@ -221,13 +295,17 @@ class ActorNursery:
        self._join_procs.set()


-@asynccontextmanager
+@acm
 async def _open_and_supervise_one_cancels_all_nursery(
    actor: Actor,
+
 ) -> typing.AsyncGenerator[ActorNursery, None]:

+    # TODO: yay or nay?
+    __tracebackhide__ = True
+
    # the collection of errors retreived from spawned sub-actors
-    errors: Dict[Tuple[str, str], Exception] = {}
+    errors: dict[tuple[str, str], BaseException] = {}

    # This is the outermost level "deamon actor" nursery. It is awaited
    # **after** the below inner "run in actor nursery". This allows for
@ -260,19 +338,17 @@ async def _open_and_supervise_one_cancels_all_nursery(
                    # after we yield upwards
                    yield anursery

+                    # When we didn't error in the caller's scope,
+                    # signal all process-monitor-tasks to conduct
+                    # the "hard join phase".
                    log.runtime(
                        f"Waiting on subactors {anursery._children} "
                        "to complete"
                    )
-
-                    # Last bit before first nursery block ends in the case
-                    # where we didn't error in the caller's scope
-
-                    # signal all process monitor tasks to conduct
-                    # hard join phase.
                    anursery._join_procs.set()

-                except BaseException as err:
+                except BaseException as inner_err:
+                    errors[actor.uid] = inner_err

                    # If we error in the root but the debugger is
                    # engaged we don't want to prematurely kill (and
@ -280,85 +356,66 @@ async def _open_and_supervise_one_cancels_all_nursery(
                    # will make the pdb repl unusable.
                    # Instead try to wait for pdb to be released before
                    # tearing down.
-                    if is_root_process():
-                        log.exception(f"we're root with {err}")
-
-                        # TODO: could this make things more deterministic?
-                        # wait to see if a sub-actor task will be
-                        # scheduled and grab the tty lock on the next
-                        # tick?
-                        # await trio.testing.wait_all_tasks_blocked()
-
-                        debug_complete = _debug._no_remote_has_tty
-                        if (
-                            debug_complete and
-                            not debug_complete.is_set()
-                        ):
-                            log.warning(
-                                'Root has errored but pdb is in use by '
-                                f'child {_debug._global_actor_in_debug}\n'
-                                'Waiting on tty lock to release..')
-
-                            with trio.CancelScope(shield=True):
-                                await debug_complete.wait()
+                    await maybe_wait_for_debugger(
+                        child_in_debug=anursery._at_least_one_child_in_debug
+                    )

                    # if the caller's scope errored then we activate our
                    # one-cancels-all supervisor strategy (don't
                    # worry more are coming).
                    anursery._join_procs.set()

-                    try:
-                        # XXX: hypothetically an error could be
-                        # raised and then a cancel signal shows up
-                        # slightly after in which case the `else:`
-                        # block here might not complete?  For now,
-                        # shield both.
-                        with trio.CancelScope(shield=True):
-                            etype = type(err)
-                            if etype in (
-                                trio.Cancelled,
-                                KeyboardInterrupt
-                            ) or (
-                                is_multi_cancelled(err)
-                            ):
-                                log.warning(
-                                    f"Nursery for {current_actor().uid} "
-                                    f"was cancelled with {etype}")
-                            else:
-                                log.exception(
-                                    f"Nursery for {current_actor().uid} "
-                                    f"errored with {err}, ")
+                    # XXX: hypothetically an error could be
+                    # raised and then a cancel signal shows up
+                    # slightly after in which case the `else:`
+                    # block here might not complete?  For now,
+                    # shield both.
+                    with trio.CancelScope(shield=True):
+                        etype = type(inner_err)
+                        if etype in (
+                            trio.Cancelled,
+                            KeyboardInterrupt
+                        ) or (
+                            is_multi_cancelled(inner_err)
+                        ):
+                            log.cancel(
+                                f"Nursery for {current_actor().uid} "
+                                f"was cancelled with {etype}")
+                        else:
+                            log.exception(
+                                f"Nursery for {current_actor().uid} "
+                                f"errored with")

-                            # cancel all subactors
-                            await anursery.cancel()
+                        # cancel all subactors
+                        await anursery.cancel()

-                    except trio.MultiError as merr:
-                        # If we receive additional errors while waiting on
-                        # remaining subactors that were cancelled,
-                        # aggregate those errors with the original error
-                        # that triggered this teardown.
-                        if err not in merr.exceptions:
-                            raise trio.MultiError(merr.exceptions + [err])
-                    else:
-                        raise
+            # ria_nursery scope end

-                # ria_nursery scope end
-
-        # XXX: do we need a `trio.Cancelled` catch here as well?
-        # this is the catch around the ``.run_in_actor()`` nursery
+        # TODO: this is the handler around the ``.run_in_actor()``
+        # nursery. Ideally we can drop this entirely in the future as
+        # the whole ``.run_in_actor()`` API should be built "on top of"
+        # this lower level spawn-request-cancel "daemon actor" API where
+        # a local in-actor task nursery is used with one-to-one task
+        # + `await Portal.run()` calls and the results/errors are
+        # handled directly (inline) and errors by the local nursery.
        except (
-
            Exception,
-            trio.MultiError,
+            BaseExceptionGroup,
            trio.Cancelled

        ) as err:

+            # XXX: yet another guard before allowing the cancel
+            # sequence in case a (single) child is in debug.
+            await maybe_wait_for_debugger(
+                child_in_debug=anursery._at_least_one_child_in_debug
+            )
+
            # If actor-local error was raised while waiting on
            # ".run_in_actor()" actors then we also want to cancel all
            # remaining sub-actors (due to our lone strategy:
            # one-cancels-all).
-            log.warning(f"Nursery cancelling due to {err}")
+            log.cancel(f"Nursery cancelling due to {err}")
            if anursery._children:
                with trio.CancelScope(shield=True):
                    await anursery.cancel()
@ -375,22 +432,26 @@ async def _open_and_supervise_one_cancels_all_nursery(
                    with trio.CancelScope(shield=True):
                        await anursery.cancel()

-                # use `MultiError` as needed
+                # use `BaseExceptionGroup` as needed
                if len(errors) > 1:
-                    raise trio.MultiError(tuple(errors.values()))
+                    raise BaseExceptionGroup(
+                        'tractor.ActorNursery errored with',
+                        tuple(errors.values()),
+                    )
                else:
                    raise list(errors.values())[0]

-        # ria_nursery scope end - nursery checkpoint
-
-    # after nursery exit
+        # da_nursery scope end - nursery checkpoint
+    # final exit


-@asynccontextmanager
+@acm
 async def open_nursery(
    **kwargs,
+
 ) -> typing.AsyncGenerator[ActorNursery, None]:
-    """Create and yield a new ``ActorNursery`` to be used for spawning
+    '''
+    Create and yield a new ``ActorNursery`` to be used for spawning
    structured concurrent subactors.

    When an actor is spawned a new trio task is started which
@ -402,7 +463,8 @@ async def open_nursery(
    close it. It turns out this approach is probably more correct
    anyway since it is more clear from the following nested nurseries
    which cancellation scopes correspond to each spawned subactor set.
-    """
+
+    '''
    implicit_runtime = False

    actor = current_actor(err_on_no_runtime=False)
@ -420,18 +482,23 @@ async def open_nursery(
            async with open_root_actor(**kwargs) as actor:
                assert actor is current_actor()

-                # try:
+                try:
+                    async with _open_and_supervise_one_cancels_all_nursery(
+                        actor
+                    ) as anursery:
+                        yield anursery
+                finally:
+                    anursery.exited.set()
+
+        else:  # sub-nursery case
+
+            try:
                async with _open_and_supervise_one_cancels_all_nursery(
                    actor
                ) as anursery:
                    yield anursery
-
-        else:  # sub-nursery case
-
-            async with _open_and_supervise_one_cancels_all_nursery(
-                actor
-            ) as anursery:
-                yield anursery
+            finally:
+                anursery.exited.set()

    finally:
        log.debug("Nursery teardown complete")
--- a/tractor/experimental/init.py
+++ b/tractor/experimental/init.py
@ -0,0 +1,29 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+Experimental APIs and subsystems not yet validated to be included as
+built-ins.
+
+This is a staging area for ``tractor.builtin``.
+
+'''
+from ._pubsub import pub as msgpub
+
+
+__all__ = [
+    'msgpub',
+]
--- a/tractor/experimental/_pubsub.py
+++ b/tractor/experimental/_pubsub.py
@ -0,0 +1,332 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+"""
+Single target entrypoint, remote-task, dynamic (no push if no consumer)
+pubsub API using async an generator which muli-plexes to consumers by
+key.
+
+NOTE: this module is likely deprecated by the new bi-directional streaming
+support provided by ``tractor.Context.open_stream()`` and friends.
+
+"""
+from __future__ import annotations
+import inspect
+import typing
+from typing import (
+    Any,
+    Callable,
+)
+from functools import partial
+from async_generator import aclosing
+
+import trio
+import wrapt
+
+from ..log import get_logger
+from .._streaming import Context
+
+
+__all__ = ['pub']
+
+log = get_logger('messaging')
+
+
+async def fan_out_to_ctxs(
+    pub_async_gen_func: typing.Callable,  # it's an async gen ... gd mypy
+    topics2ctxs: dict[str, list],
+    packetizer: typing.Callable | None = None,
+) -> None:
+    '''
+    Request and fan out quotes to each subscribed actor channel.
+
+    '''
+
+    def get_topics():
+        return tuple(topics2ctxs.keys())
+
+    agen = pub_async_gen_func(get_topics=get_topics)
+
+    async with aclosing(agen) as pub_gen:
+
+        async for published in pub_gen:
+
+            ctx_payloads: list[tuple[Context, Any]] = []
+
+            for topic, data in published.items():
+                log.debug(f"publishing {topic, data}")
+
+                # build a new dict packet or invoke provided packetizer
+                if packetizer is None:
+                    packet = {topic: data}
+
+                else:
+                    packet = packetizer(topic, data)
+
+                for ctx in topics2ctxs.get(topic, list()):
+                    ctx_payloads.append((ctx, packet))
+
+            if not ctx_payloads:
+                log.debug(f"Unconsumed values:\n{published}")
+
+            # deliver to each subscriber (fan out)
+            if ctx_payloads:
+                for ctx, payload in ctx_payloads:
+                    try:
+                        await ctx.send_yield(payload)
+                    except (
+                        # That's right, anything you can think of...
+                        trio.ClosedResourceError, ConnectionResetError,
+                        ConnectionRefusedError,
+                    ):
+                        log.warning(f"{ctx.chan} went down?")
+                        for ctx_list in topics2ctxs.values():
+                            try:
+                                ctx_list.remove(ctx)
+                            except ValueError:
+                                continue
+
+            if not get_topics():
+                log.warning(f"No subscribers left for {pub_gen}")
+                break
+
+
+def modify_subs(
+
+    topics2ctxs: dict[str, list[Context]],
+    topics: set[str],
+    ctx: Context,
+
+) -> None:
+    """Absolute symbol subscription list for each quote stream.
+
+    Effectively a symbol subscription api.
+    """
+    log.info(f"{ctx.chan.uid} changed subscription to {topics}")
+
+    # update map from each symbol to requesting client's chan
+    for topic in topics:
+        topics2ctxs.setdefault(topic, list()).append(ctx)
+
+    # remove any existing symbol subscriptions if symbol is not
+    # found in ``symbols``
+    # TODO: this can likely be factored out into the pub-sub api
+    for topic in filter(
+        lambda topic: topic not in topics, topics2ctxs.copy()
+    ):
+        ctx_list = topics2ctxs.get(topic)
+        if ctx_list:
+            try:
+                ctx_list.remove(ctx)
+            except ValueError:
+                pass
+
+        if not ctx_list:
+            # pop empty sets which will trigger bg quoter task termination
+            topics2ctxs.pop(topic)
+
+
+_pub_state: dict[str, dict] = {}
+_pubtask2lock: dict[str, trio.StrictFIFOLock] = {}
+
+
+def pub(
+    wrapped: typing.Callable | None = None,
+    *,
+    tasks: set[str] = set(),
+):
+    """Publisher async generator decorator.
+
+    A publisher can be called multiple times from different actors but
+    will only spawn a finite set of internal tasks to stream values to
+    each caller. The ``tasks: set[str]`` argument to the decorator
+    specifies the names of the mutex set of publisher tasks.  When the
+    publisher function is called, an argument ``task_name`` must be
+    passed to specify which task (of the set named in ``tasks``) should
+    be used. This allows for using the same publisher with different
+    input (arguments) without allowing more concurrent tasks then
+    necessary.
+
+    Values yielded from the decorated async generator must be
+    ``dict[str, dict[str, Any]]`` where the fist level key is the topic
+    string and determines which subscription the packet will be
+    delivered to and the value is a packet ``dict[str, Any]`` by default
+    of the form:
+
+    .. ::python
+
+        {topic: str: value: Any}
+
+    The caller can instead opt to pass a ``packetizer`` callback who's
+    return value will be delivered as the published response.
+
+    The decorated async generator function must accept an argument
+    :func:`get_topics` which dynamically returns the tuple of current
+    subscriber topics:
+
+    .. code:: python
+
+        from tractor.msg import pub
+
+        @pub(tasks={'source_1', 'source_2'})
+        async def pub_service(get_topics):
+            data = await web_request(endpoints=get_topics())
+            for item in data:
+                yield data['key'], data
+
+
+    The publisher must be called passing in the following arguments:
+    - ``topics: set[str]`` the topic sequence or "subscriptions"
+    - ``task_name: str`` the task to use (if ``tasks`` was passed)
+    - ``ctx: Context`` the tractor context (only needed if calling the
+      pub func without a nursery, otherwise this is provided implicitly)
+    - packetizer: ``Callable[[str, Any], Any]`` a callback who receives
+      the topic and value from the publisher function each ``yield`` such that
+      whatever is returned is sent as the published value to subscribers of
+      that topic.  By default this is a dict ``{topic: str: value: Any}``.
+
+    As an example, to make a subscriber call the above function:
+
+    .. code:: python
+
+        from functools import partial
+        import tractor
+
+        async with tractor.open_nursery() as n:
+            portal = n.run_in_actor(
+                'publisher',  # actor name
+                partial(      # func to execute in it
+                    pub_service,
+                    topics=('clicks', 'users'),
+                    task_name='source1',
+                )
+            )
+            async for value in await portal.result():
+                print(f"Subscriber received {value}")
+
+
+    Here, you don't need to provide the ``ctx`` argument since the
+    remote actor provides it automatically to the spawned task. If you
+    were to call ``pub_service()`` directly from a wrapping function you
+    would need to provide this explicitly.
+
+    Remember you only need this if you need *a finite set of tasks*
+    running in a single actor to stream data to an arbitrary number of
+    subscribers. If you are ok to have a new task running for every call
+    to ``pub_service()`` then probably don't need this.
+    """
+    global _pubtask2lock
+
+    # handle the decorator not called with () case
+    if wrapped is None:
+        return partial(pub, tasks=tasks)
+
+    task2lock: dict[str, trio.StrictFIFOLock] = {}
+
+    for name in tasks:
+        task2lock[name] = trio.StrictFIFOLock()
+
+    @wrapt.decorator
+    async def wrapper(agen, instance, args, kwargs):
+
+        # XXX: this is used to extract arguments properly as per the
+        # `wrapt` docs
+        async def _execute(
+            ctx: Context,
+            topics: set[str],
+            *args,
+            # *,
+            task_name: str | None = None,  # default: only one task allocated
+            packetizer: Callable | None = None,
+            **kwargs,
+        ):
+            if task_name is None:
+                task_name = trio.lowlevel.current_task().name
+
+            if tasks and task_name not in tasks:
+                raise TypeError(
+                    f"{agen} must be called with a `task_name` named "
+                    f"argument with a value from {tasks}")
+
+            elif not tasks and not task2lock:
+                # add a default root-task lock if none defined
+                task2lock[task_name] = trio.StrictFIFOLock()
+
+            _pubtask2lock.update(task2lock)
+
+            topics = set(topics)
+            lock = _pubtask2lock[task_name]
+
+            all_subs = _pub_state.setdefault('_subs', {})
+            topics2ctxs = all_subs.setdefault(task_name, {})
+
+            try:
+                modify_subs(topics2ctxs, topics, ctx)
+                # block and let existing feed task deliver
+                # stream data until it is cancelled in which case
+                # the next waiting task will take over and spawn it again
+                async with lock:
+                    # no data feeder task yet; so start one
+                    respawn = True
+                    while respawn:
+                        respawn = False
+                        log.info(
+                            f"Spawning data feed task for {funcname}")
+                        try:
+                            # unblocks when no more symbols subscriptions exist
+                            # and the streamer task terminates
+                            await fan_out_to_ctxs(
+                                pub_async_gen_func=partial(
+                                    agen, *args, **kwargs),
+                                topics2ctxs=topics2ctxs,
+                                packetizer=packetizer,
+                            )
+                            log.info(
+                                f"Terminating stream task {task_name or ''}"
+                                f" for {agen.__name__}")
+                        except trio.BrokenResourceError:
+                            log.exception("Respawning failed data feed task")
+                            respawn = True
+            finally:
+                # remove all subs for this context
+                modify_subs(topics2ctxs, set(), ctx)
+
+                # if there are truly no more subscriptions with this broker
+                # drop from broker subs dict
+                if not any(topics2ctxs.values()):
+                    log.info(
+                        f"No more subscriptions for publisher {task_name}")
+
+        # invoke it
+        await _execute(*args, **kwargs)
+
+    funcname = wrapped.__name__
+    if not inspect.isasyncgenfunction(wrapped):
+        raise TypeError(
+            f"Publisher {funcname} must be an async generator function"
+        )
+    if 'get_topics' not in inspect.signature(wrapped).parameters:
+        raise TypeError(
+            f"Publisher async gen {funcname} must define a "
+            "`get_topics` argument"
+        )
+
+    # XXX: manually monkey the wrapped function since
+    # ``wrapt.decorator`` doesn't seem to want to play nice with its
+    # whole "adapter" thing...
+    wrapped._tractor_stream_function = True  # type: ignore
+
+    return wrapper(wrapped)
--- a/tractor/log.py
+++ b/tractor/log.py
@ -1,17 +1,35 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 """
 Log like a forester!
+
 """
+from collections.abc import Mapping
 import sys
-from functools import partial
 import logging
 import colorlog  # type: ignore
-from typing import Optional

-from ._state import ActorContextInfo
+import trio
+
+from ._state import current_actor


-_proj_name = 'tractor'
-_default_loglevel = 'ERROR'
+_proj_name: str = 'tractor'
+_default_loglevel: str = 'ERROR'

 # Super sexy formatting thanks to ``colorlog``.
 # (NOTE: we use the '{' format style)
@ -20,7 +38,8 @@ LOG_FORMAT = (
    # "{bold_white}{log_color}{asctime}{reset}"
    "{log_color}{asctime}{reset}"
    " {bold_white}{thin_white}({reset}"
-    "{thin_white}{actor}, {process}, {task}){reset}{bold_white}{thin_white})"
+    "{thin_white}{actor_name}[{actor_uid}], "
+    "{process}, {task}){reset}{bold_white}{thin_white})"
    " {reset}{log_color}[{reset}{bold_log_color}{levelname}{reset}{log_color}]"
    " {log_color}{name}"
    " {thin_white}{filename}{log_color}:{reset}{thin_white}{lineno}{log_color}"
@ -32,6 +51,7 @@ DATE_FORMAT = '%b %d %H:%M:%S'
 LEVELS = {
    'TRANSPORT': 5,
    'RUNTIME': 15,
+    'CANCEL': 16,
    'PDB': 500,
 }

@ -41,6 +61,7 @@ STD_PALETTE = {
    'PDB': 'white',
    'WARNING': 'yellow',
    'INFO': 'green',
+    'CANCEL': 'yellow',
    'RUNTIME': 'white',
    'DEBUG': 'white',
    'TRANSPORT': 'cyan',
@ -52,6 +73,8 @@ BOLD_PALETTE = {
 }


+# TODO: this isn't showing the correct '{filename}'
+# as it did before..
 class StackLevelAdapter(logging.LoggerAdapter):

    def transport(
@ -67,16 +90,89 @@ class StackLevelAdapter(logging.LoggerAdapter):
    ) -> None:
        return self.log(15, msg)

+    def cancel(
+        self,
+        msg: str,
+    ) -> None:
+        return self.log(16, msg)
+
    def pdb(
        self,
        msg: str,
    ) -> None:
        return self.log(500, msg)

+    def log(self, level, msg, *args, **kwargs):
+        """
+        Delegate a log call to the underlying logger, after adding
+        contextual information from this adapter instance.
+        """
+        if self.isEnabledFor(level):
+            # msg, kwargs = self.process(msg, kwargs)
+            self._log(level, msg, args, **kwargs)
+
+    # LOL, the stdlib doesn't allow passing through ``stacklevel``..
+    def _log(
+        self,
+        level,
+        msg,
+        args,
+        exc_info=None,
+        extra=None,
+        stack_info=False,
+
+        # XXX: bit we added to show fileinfo from actual caller.
+        # this level then ``.log()`` then finally the caller's level..
+        stacklevel=3,
+    ):
+        """
+        Low-level log implementation, proxied to allow nested logger adapters.
+        """
+        return self.logger._log(
+            level,
+            msg,
+            args,
+            exc_info=exc_info,
+            extra=self.extra,
+            stack_info=stack_info,
+            stacklevel=stacklevel,
+        )
+
+
+_conc_name_getters = {
+    'task': lambda: trio.lowlevel.current_task().name,
+    'actor': lambda: current_actor(),
+    'actor_name': lambda: current_actor().name,
+    'actor_uid': lambda: current_actor().uid[1][:6],
+}
+
+
+class ActorContextInfo(Mapping):
+    "Dyanmic lookup for local actor and task names"
+    _context_keys = (
+        'task',
+        'actor',
+        'actor_name',
+        'actor_uid',
+    )
+
+    def __len__(self):
+        return len(self._context_keys)
+
+    def __iter__(self):
+        return iter(self._context_keys)
+
+    def __getitem__(self, key: str) -> str:
+        try:
+            return _conc_name_getters[key]()
+        except RuntimeError:
+            # no local actor/task context initialized yet
+            return f'no {key} context'
+

 def get_logger(

-    name: str = None,
+    name: str | None = None,
    _root_name: str = _proj_name,

 ) -> StackLevelAdapter:
@ -111,7 +207,7 @@ def get_logger(


 def get_console_log(
-    level: str = None,
+    level: str | None = None,
    **kwargs,
 ) -> logging.LoggerAdapter:
    '''Get the package logger and enable a handler which writes to stderr.
@ -144,5 +240,5 @@ def get_console_log(
    return log


-def get_loglevel() -> Optional[str]:
+def get_loglevel() -> str:
    return _default_loglevel
--- a/tractor/msg.py
+++ b/tractor/msg.py
@ -1,306 +1,80 @@
-"""
-Messaging pattern APIs and helpers.
-
-NOTE: this module is likely deprecated by the new bi-directional streaming
-support provided by ``tractor.Context.open_stream()`` and friends.
-
-"""
-import inspect
-import typing
-from typing import Dict, Any, Set, Callable, List, Tuple
-from functools import partial
-from async_generator import aclosing
-
-import trio
-import wrapt
-
-from .log import get_logger
-from ._streaming import Context
-
-__all__ = ['pub']
-
-log = get_logger('messaging')
-
-
-async def fan_out_to_ctxs(
-    pub_async_gen_func: typing.Callable,  # it's an async gen ... gd mypy
-    topics2ctxs: Dict[str, list],
-    packetizer: typing.Callable = None,
-) -> None:
-    """Request and fan out quotes to each subscribed actor channel.
-    """
-    def get_topics():
-        return tuple(topics2ctxs.keys())
-
-    agen = pub_async_gen_func(get_topics=get_topics)
-
-    async with aclosing(agen) as pub_gen:
-
-        async for published in pub_gen:
-
-            ctx_payloads: List[Tuple[Context, Any]] = []
-
-            for topic, data in published.items():
-                log.debug(f"publishing {topic, data}")
-
-                # build a new dict packet or invoke provided packetizer
-                if packetizer is None:
-                    packet = {topic: data}
-
-                else:
-                    packet = packetizer(topic, data)
-
-                for ctx in topics2ctxs.get(topic, list()):
-                    ctx_payloads.append((ctx, packet))
-
-            if not ctx_payloads:
-                log.debug(f"Unconsumed values:\n{published}")
-
-            # deliver to each subscriber (fan out)
-            if ctx_payloads:
-                for ctx, payload in ctx_payloads:
-                    try:
-                        await ctx.send_yield(payload)
-                    except (
-                        # That's right, anything you can think of...
-                        trio.ClosedResourceError, ConnectionResetError,
-                        ConnectionRefusedError,
-                    ):
-                        log.warning(f"{ctx.chan} went down?")
-                        for ctx_list in topics2ctxs.values():
-                            try:
-                                ctx_list.remove(ctx)
-                            except ValueError:
-                                continue
-
-            if not get_topics():
-                log.warning(f"No subscribers left for {pub_gen}")
-                break
-
-
-def modify_subs(
-
-    topics2ctxs: Dict[str, List[Context]],
-    topics: Set[str],
-    ctx: Context,
-
-) -> None:
-    """Absolute symbol subscription list for each quote stream.
-
-    Effectively a symbol subscription api.
-    """
-    log.info(f"{ctx.chan.uid} changed subscription to {topics}")
-
-    # update map from each symbol to requesting client's chan
-    for topic in topics:
-        topics2ctxs.setdefault(topic, list()).append(ctx)
-
-    # remove any existing symbol subscriptions if symbol is not
-    # found in ``symbols``
-    # TODO: this can likely be factored out into the pub-sub api
-    for topic in filter(
-        lambda topic: topic not in topics, topics2ctxs.copy()
-    ):
-        ctx_list = topics2ctxs.get(topic)
-        if ctx_list:
-            try:
-                ctx_list.remove(ctx)
-            except ValueError:
-                pass
-
-        if not ctx_list:
-            # pop empty sets which will trigger bg quoter task termination
-            topics2ctxs.pop(topic)
-
-
-_pub_state: Dict[str, dict] = {}
-_pubtask2lock: Dict[str, trio.StrictFIFOLock] = {}
-
-
-def pub(
-    wrapped: typing.Callable = None,
-    *,
-    tasks: Set[str] = set(),
-):
-    """Publisher async generator decorator.
-
-    A publisher can be called multiple times from different actors but
-    will only spawn a finite set of internal tasks to stream values to
-    each caller. The ``tasks: Set[str]`` argument to the decorator
-    specifies the names of the mutex set of publisher tasks.  When the
-    publisher function is called, an argument ``task_name`` must be
-    passed to specify which task (of the set named in ``tasks``) should
-    be used. This allows for using the same publisher with different
-    input (arguments) without allowing more concurrent tasks then
-    necessary.
-
-    Values yielded from the decorated async generator must be
-    ``Dict[str, Dict[str, Any]]`` where the fist level key is the topic
-    string and determines which subscription the packet will be
-    delivered to and the value is a packet ``Dict[str, Any]`` by default
-    of the form:
-
-    .. ::python
-
-        {topic: str: value: Any}
-
-    The caller can instead opt to pass a ``packetizer`` callback who's
-    return value will be delivered as the published response.
-
-    The decorated async generator function must accept an argument
-    :func:`get_topics` which dynamically returns the tuple of current
-    subscriber topics:
-
-    .. code:: python
-
-        from tractor.msg import pub
-
-        @pub(tasks={'source_1', 'source_2'})
-        async def pub_service(get_topics):
-            data = await web_request(endpoints=get_topics())
-            for item in data:
-                yield data['key'], data
-
-
-    The publisher must be called passing in the following arguments:
-    - ``topics: Set[str]`` the topic sequence or "subscriptions"
-    - ``task_name: str`` the task to use (if ``tasks`` was passed)
-    - ``ctx: Context`` the tractor context (only needed if calling the
-      pub func without a nursery, otherwise this is provided implicitly)
-    - packetizer: ``Callable[[str, Any], Any]`` a callback who receives
-      the topic and value from the publisher function each ``yield`` such that
-      whatever is returned is sent as the published value to subscribers of
-      that topic.  By default this is a dict ``{topic: str: value: Any}``.
-
-    As an example, to make a subscriber call the above function:
-
-    .. code:: python
-
-        from functools import partial
-        import tractor
-
-        async with tractor.open_nursery() as n:
-            portal = n.run_in_actor(
-                'publisher',  # actor name
-                partial(      # func to execute in it
-                    pub_service,
-                    topics=('clicks', 'users'),
-                    task_name='source1',
-                )
-            )
-            async for value in await portal.result():
-                print(f"Subscriber received {value}")
-
-
-    Here, you don't need to provide the ``ctx`` argument since the
-    remote actor provides it automatically to the spawned task. If you
-    were to call ``pub_service()`` directly from a wrapping function you
-    would need to provide this explicitly.
-
-    Remember you only need this if you need *a finite set of tasks*
-    running in a single actor to stream data to an arbitrary number of
-    subscribers. If you are ok to have a new task running for every call
-    to ``pub_service()`` then probably don't need this.
-    """
-    global _pubtask2lock
-
-    # handle the decorator not called with () case
-    if wrapped is None:
-        return partial(pub, tasks=tasks)
-
-    task2lock: Dict[str, trio.StrictFIFOLock] = {}
-
-    for name in tasks:
-        task2lock[name] = trio.StrictFIFOLock()
-
-    @wrapt.decorator
-    async def wrapper(agen, instance, args, kwargs):
-
-        # XXX: this is used to extract arguments properly as per the
-        # `wrapt` docs
-        async def _execute(
-            ctx: Context,
-            topics: Set[str],
-            *args,
-            # *,
-            task_name: str = None,  # default: only one task allocated
-            packetizer: Callable = None,
-            **kwargs,
-        ):
-            if task_name is None:
-                task_name = trio.lowlevel.current_task().name
-
-            if tasks and task_name not in tasks:
-                raise TypeError(
-                    f"{agen} must be called with a `task_name` named "
-                    f"argument with a value from {tasks}")
-
-            elif not tasks and not task2lock:
-                # add a default root-task lock if none defined
-                task2lock[task_name] = trio.StrictFIFOLock()
-
-            _pubtask2lock.update(task2lock)
-
-            topics = set(topics)
-            lock = _pubtask2lock[task_name]
-
-            all_subs = _pub_state.setdefault('_subs', {})
-            topics2ctxs = all_subs.setdefault(task_name, {})
-
-            try:
-                modify_subs(topics2ctxs, topics, ctx)
-                # block and let existing feed task deliver
-                # stream data until it is cancelled in which case
-                # the next waiting task will take over and spawn it again
-                async with lock:
-                    # no data feeder task yet; so start one
-                    respawn = True
-                    while respawn:
-                        respawn = False
-                        log.info(
-                            f"Spawning data feed task for {funcname}")
-                        try:
-                            # unblocks when no more symbols subscriptions exist
-                            # and the streamer task terminates
-                            await fan_out_to_ctxs(
-                                pub_async_gen_func=partial(
-                                    agen, *args, **kwargs),
-                                topics2ctxs=topics2ctxs,
-                                packetizer=packetizer,
-                            )
-                            log.info(
-                                f"Terminating stream task {task_name or ''}"
-                                f" for {agen.__name__}")
-                        except trio.BrokenResourceError:
-                            log.exception("Respawning failed data feed task")
-                            respawn = True
-            finally:
-                # remove all subs for this context
-                modify_subs(topics2ctxs, set(), ctx)
-
-                # if there are truly no more subscriptions with this broker
-                # drop from broker subs dict
-                if not any(topics2ctxs.values()):
-                    log.info(
-                        f"No more subscriptions for publisher {task_name}")
-
-        # invoke it
-        await _execute(*args, **kwargs)
-
-    funcname = wrapped.__name__
-    if not inspect.isasyncgenfunction(wrapped):
-        raise TypeError(
-            f"Publisher {funcname} must be an async generator function"
-        )
-    if 'get_topics' not in inspect.signature(wrapped).parameters:
-        raise TypeError(
-            f"Publisher async gen {funcname} must define a "
-            "`get_topics` argument"
-        )
-
-    # XXX: manually monkey the wrapped function since
-    # ``wrapt.decorator`` doesn't seem to want to play nice with its
-    # whole "adapter" thing...
-    wrapped._tractor_stream_function = True  # type: ignore
-
-    return wrapper(wrapped)
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+Built-in messaging patterns, types, APIs and helpers.
+
+'''
+
+# TODO: integration with our ``enable_modules: list[str]`` caps sys.
+
+# ``pkgutil.resolve_name()`` internally uses
+# ``importlib.import_module()`` which can be filtered by inserting
+# a ``MetaPathFinder`` into ``sys.meta_path`` (which we could do before
+# entering the ``_runtime.process_messages()`` loop).
+# - https://github.com/python/cpython/blob/main/Lib/pkgutil.py#L645
+# - https://stackoverflow.com/questions/1350466/preventing-python-code-from-importing-certain-modules
+#   - https://stackoverflow.com/a/63320902
+#   - https://docs.python.org/3/library/sys.html#sys.meta_path
+
+# the new "Implicit Namespace Packages" might be relevant?
+# - https://www.python.org/dev/peps/pep-0420/
+
+# add implicit serialized message type support so that paths can be
+# handed directly to IPC primitives such as streams and `Portal.run()`
+# calls:
+# - via ``msgspec``:
+#   - https://jcristharif.com/msgspec/api.html#struct
+#   - https://jcristharif.com/msgspec/extending.html
+# via ``msgpack-python``:
+# - https://github.com/msgpack/msgpack-python#packingunpacking-of-custom-data-type
+
+from __future__ import annotations
+from pkgutil import resolve_name
+
+
+class NamespacePath(str):
+    '''
+    A serializeable description of a (function) Python object location
+    described by the target's module path and namespace key meant as
+    a message-native "packet" to allows actors to point-and-load objects
+    by absolute reference.
+
+    '''
+    _ref: object = None
+
+    def load_ref(self) -> object:
+        if self._ref is None:
+            self._ref = resolve_name(self)
+        return self._ref
+
+    def to_tuple(
+        self,
+
+    ) -> tuple[str, str]:
+        ref = self.load_ref()
+        return ref.__module__, getattr(ref, '__name__', '')
+
+    @classmethod
+    def from_ref(
+        cls,
+        ref,
+
+    ) -> NamespacePath:
+        return cls(':'.join(
+            (ref.__module__,
+             getattr(ref, '__name__', ''))
+        ))
--- a/tractor/testing/init.py
+++ b/tractor/testing/init.py
@ -1 +0,0 @@
-from ._tractor_test import tractor_test
--- a/tractor/testing/_tractor_test.py
+++ b/tractor/testing/_tractor_test.py
@ -1,89 +0,0 @@
-import inspect
-import platform
-from functools import partial, wraps
-
-import trio
-import tractor
-# from tractor import run
-
-
-__all__ = ['tractor_test']
-
-
-def tractor_test(fn):
-    """
-    Use:
-
-    @tractor_test
-    async def test_whatever():
-        await ...
-
-    If fixtures:
-
-        - ``arb_addr`` (a socket addr tuple where arbiter is listening)
-        - ``loglevel`` (logging level passed to tractor internals)
-        - ``start_method`` (subprocess spawning backend)
-
-    are defined in the `pytest` fixture space they will be automatically
-    injected to tests declaring these funcargs.
-    """
-    @wraps(fn)
-    def wrapper(
-        *args,
-        loglevel=None,
-        arb_addr=None,
-        start_method=None,
-        **kwargs
-    ):
-        # __tracebackhide__ = True
-
-        if 'arb_addr' in inspect.signature(fn).parameters:
-            # injects test suite fixture value to test as well
-            # as `run()`
-            kwargs['arb_addr'] = arb_addr
-
-        if 'loglevel' in inspect.signature(fn).parameters:
-            # allows test suites to define a 'loglevel' fixture
-            # that activates the internal logging
-            kwargs['loglevel'] = loglevel
-
-        if start_method is None:
-            if platform.system() == "Windows":
-                start_method = 'spawn'
-            else:
-                start_method = 'trio'
-
-        if 'start_method' in inspect.signature(fn).parameters:
-            # set of subprocess spawning backends
-            kwargs['start_method'] = start_method
-
-        if kwargs:
-
-            # use explicit root actor start
-
-            async def _main():
-                async with tractor.open_root_actor(
-                    # **kwargs,
-                    arbiter_addr=arb_addr,
-                    loglevel=loglevel,
-                    start_method=start_method,
-
-                    # TODO: only enable when pytest is passed --pdb
-                    # debug_mode=True,
-
-                ) as actor:
-                    await fn(*args, **kwargs)
-
-            main = _main
-
-        else:
-            # use implicit root actor start
-            main = partial(fn, *args, **kwargs)
-
-        return trio.run(main)
-            # arbiter_addr=arb_addr,
-            # loglevel=loglevel,
-            # start_method=start_method,
-        # )
-
-    return wrapper
--- a/tractor/to_asyncio.py
+++ b/tractor/to_asyncio.py
@ -0,0 +1,550 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+Infection apis for ``asyncio`` loops running ``trio`` using guest mode.
+
+'''
+import asyncio
+from asyncio.exceptions import CancelledError
+from contextlib import asynccontextmanager as acm
+from dataclasses import dataclass
+import inspect
+from typing import (
+    Any,
+    Callable,
+    AsyncIterator,
+    Awaitable,
+    Optional,
+)
+
+import trio
+from outcome import Error
+
+from .log import get_logger
+from ._state import current_actor
+from ._exceptions import AsyncioCancelled
+from .trionics._broadcast import (
+    broadcast_receiver,
+    BroadcastReceiver,
+)
+
+log = get_logger(__name__)
+
+
+__all__ = ['run_task', 'run_as_asyncio_guest']
+
+
+@dataclass
+class LinkedTaskChannel(trio.abc.Channel):
+    '''
+    A "linked task channel" which allows for two-way synchronized msg
+    passing between a ``trio``-in-guest-mode task and an ``asyncio``
+    task scheduled in the host loop.
+
+    '''
+    _to_aio: asyncio.Queue
+    _from_aio: trio.MemoryReceiveChannel
+    _to_trio: trio.MemorySendChannel
+
+    _trio_cs: trio.CancelScope
+    _aio_task_complete: trio.Event
+    _trio_exited: bool = False
+
+    # set after ``asyncio.create_task()``
+    _aio_task: Optional[asyncio.Task] = None
+    _aio_err: Optional[BaseException] = None
+    _broadcaster: Optional[BroadcastReceiver] = None
+
+    async def aclose(self) -> None:
+        await self._from_aio.aclose()
+
+    async def receive(self) -> Any:
+        async with translate_aio_errors(
+            self,
+
+            # XXX: obviously this will deadlock if an on-going stream is
+            # being procesed.
+            # wait_on_aio_task=False,
+        ):
+
+            # TODO: do we need this to guarantee asyncio code get's
+            # cancelled in the case where the trio side somehow creates
+            # a state where the asyncio cycle-task isn't getting the
+            # cancel request sent by (in theory) the last checkpoint
+            # cycle on the trio side?
+            # await trio.lowlevel.checkpoint()
+
+            return await self._from_aio.receive()
+
+    async def wait_asyncio_complete(self) -> None:
+        await self._aio_task_complete.wait()
+
+    # def cancel_asyncio_task(self) -> None:
+    #     self._aio_task.cancel()
+
+    async def send(self, item: Any) -> None:
+        '''
+        Send a value through to the asyncio task presuming
+        it defines a ``from_trio`` argument, if it does not
+        this method will raise an error.
+
+        '''
+        self._to_aio.put_nowait(item)
+
+    def closed(self) -> bool:
+        return self._from_aio._closed  # type: ignore
+
+    # TODO: shoud we consider some kind of "decorator" system
+    # that checks for structural-typing compatibliity and then
+    # automatically adds this ctx-mngr-as-method machinery?
+    @acm
+    async def subscribe(
+        self,
+
+    ) -> AsyncIterator[BroadcastReceiver]:
+        '''
+        Allocate and return a ``BroadcastReceiver`` which delegates
+        to this inter-task channel.
+
+        This allows multiple local tasks to receive each their own copy
+        of this message stream.
+
+        See ``tractor._streaming.MsgStream.subscribe()`` for further
+        similar details.
+        '''
+        if self._broadcaster is None:
+
+            bcast = self._broadcaster = broadcast_receiver(
+                self,
+                # use memory channel size by default
+                self._from_aio._state.max_buffer_size,  # type: ignore
+                receive_afunc=self.receive,
+            )
+
+            self.receive = bcast.receive  # type: ignore
+
+        async with self._broadcaster.subscribe() as bstream:
+            assert bstream.key != self._broadcaster.key
+            assert bstream._recv == self._broadcaster._recv
+            yield bstream
+
+
+def _run_asyncio_task(
+
+    func: Callable,
+    *,
+    qsize: int = 1,
+    provide_channels: bool = False,
+    **kwargs,
+
+) -> LinkedTaskChannel:
+    '''
+    Run an ``asyncio`` async function or generator in a task, return
+    or stream the result back to ``trio``.
+
+    '''
+    __tracebackhide__ = True
+    if not current_actor().is_infected_aio():
+        raise RuntimeError("`infect_asyncio` mode is not enabled!?")
+
+    # ITC (inter task comms), these channel/queue names are mostly from
+    # ``asyncio``'s perspective.
+    aio_q = from_trio = asyncio.Queue(qsize)  # type: ignore
+    to_trio, from_aio = trio.open_memory_channel(qsize)  # type: ignore
+
+    args = tuple(inspect.getfullargspec(func).args)
+
+    if getattr(func, '_tractor_steam_function', None):
+        # the assumption is that the target async routine accepts the
+        # send channel then it intends to yield more then one return
+        # value otherwise it would just return ;P
+        assert qsize > 1
+
+    if provide_channels:
+        assert 'to_trio' in args
+
+    # allow target func to accept/stream results manually by name
+    if 'to_trio' in args:
+        kwargs['to_trio'] = to_trio
+
+    if 'from_trio' in args:
+        kwargs['from_trio'] = from_trio
+
+    coro = func(**kwargs)
+
+    cancel_scope = trio.CancelScope()
+    aio_task_complete = trio.Event()
+    aio_err: Optional[BaseException] = None
+
+    chan = LinkedTaskChannel(
+        aio_q,  # asyncio.Queue
+        from_aio,  # recv chan
+        to_trio,  # send chan
+
+        cancel_scope,
+        aio_task_complete,
+    )
+
+    async def wait_on_coro_final_result(
+
+        to_trio: trio.MemorySendChannel,
+        coro: Awaitable,
+        aio_task_complete: trio.Event,
+
+    ) -> None:
+        '''
+        Await ``coro`` and relay result back to ``trio``.
+
+        '''
+        nonlocal aio_err
+        nonlocal chan
+
+        orig = result = id(coro)
+        try:
+            result = await coro
+        except BaseException as aio_err:
+            log.exception('asyncio task errored')
+            chan._aio_err = aio_err
+            raise
+
+        else:
+            if (
+                result != orig and
+                aio_err is None and
+
+                # in the ``open_channel_from()`` case we don't
+                # relay through the "return value".
+                not provide_channels
+            ):
+                to_trio.send_nowait(result)
+
+        finally:
+            # if the task was spawned using ``open_channel_from()``
+            # then we close the channels on exit.
+            if provide_channels:
+                # only close the sender side which will relay
+                # a ``trio.EndOfChannel`` to the trio (consumer) side.
+                to_trio.close()
+
+            aio_task_complete.set()
+            log.runtime(f'`asyncio` task: {task.get_name()} is complete')
+
+    # start the asyncio task we submitted from trio
+    if not inspect.isawaitable(coro):
+        raise TypeError(f"No support for invoking {coro}")
+
+    task = asyncio.create_task(
+        wait_on_coro_final_result(
+            to_trio,
+            coro,
+            aio_task_complete
+        )
+    )
+    chan._aio_task = task
+
+    def cancel_trio(task: asyncio.Task) -> None:
+        '''
+        Cancel the calling ``trio`` task on error.
+
+        '''
+        nonlocal chan
+        aio_err = chan._aio_err
+        task_err: Optional[BaseException] = None
+
+        # only to avoid ``asyncio`` complaining about uncaptured
+        # task exceptions
+        try:
+            task.exception()
+        except BaseException as terr:
+            task_err = terr
+
+            if isinstance(terr, CancelledError):
+                log.cancel(f'`asyncio` task cancelled: {task.get_name()}')
+            else:
+                log.exception(f'`asyncio` task: {task.get_name()} errored')
+
+            assert type(terr) is type(aio_err), 'Asyncio task error mismatch?'
+
+        if aio_err is not None:
+            # XXX: uhh is this true?
+            # assert task_err, f'Asyncio task {task.get_name()} discrepancy!?'
+
+            # NOTE: currently mem chan closure may act as a form
+            # of error relay (at least in the ``asyncio.CancelledError``
+            # case) since we have no way to directly trigger a ``trio``
+            # task error without creating a nursery to throw one.
+            # We might want to change this in the future though.
+            from_aio.close()
+
+            if type(aio_err) is CancelledError:
+                log.cancel("infected task was cancelled")
+
+                # TODO: show that the cancellation originated
+                # from the ``trio`` side? right?
+                # if cancel_scope.cancelled:
+                #     raise aio_err from err
+
+            elif task_err is None:
+                assert aio_err
+                aio_err.with_traceback(aio_err.__traceback__)
+                log.error('infected task errorred')
+
+            # XXX: alway cancel the scope on error
+            # in case the trio task is blocking
+            # on a checkpoint.
+            cancel_scope.cancel()
+
+            # raise any ``asyncio`` side error.
+            raise aio_err
+
+    task.add_done_callback(cancel_trio)
+    return chan
+
+
+@acm
+async def translate_aio_errors(
+
+    chan: LinkedTaskChannel,
+    wait_on_aio_task: bool = False,
+
+) -> AsyncIterator[None]:
+    '''
+    Error handling context around ``asyncio`` task spawns which
+    appropriately translates errors and cancels into ``trio`` land.
+
+    '''
+    trio_task = trio.lowlevel.current_task()
+
+    aio_err: Optional[BaseException] = None
+
+    # TODO: make thisi a channel method?
+    def maybe_raise_aio_err(
+        err: Optional[Exception] = None
+    ) -> None:
+        aio_err = chan._aio_err
+        if (
+            aio_err is not None and
+            type(aio_err) != CancelledError
+        ):
+            # always raise from any captured asyncio error
+            if err:
+                raise aio_err from err
+            else:
+                raise aio_err
+
+    task = chan._aio_task
+    assert task
+    try:
+        yield
+
+    except (
+        trio.Cancelled,
+    ):
+        # relay cancel through to called ``asyncio`` task
+        assert chan._aio_task
+        chan._aio_task.cancel(
+            msg=f'the `trio` caller task was cancelled: {trio_task.name}'
+        )
+        raise
+
+    except (
+        # NOTE: see the note in the ``cancel_trio()`` asyncio task
+        # termination callback
+        trio.ClosedResourceError,
+        # trio.BrokenResourceError,
+    ):
+        aio_err = chan._aio_err
+        if (
+            task.cancelled() and
+            type(aio_err) is CancelledError
+        ):
+            # if an underlying ``asyncio.CancelledError`` triggered this
+            # channel close, raise our (non-``BaseException``) wrapper
+            # error: ``AsyncioCancelled`` from that source error.
+            raise AsyncioCancelled from aio_err
+
+        else:
+            raise
+
+    finally:
+        if (
+            # NOTE: always cancel the ``asyncio`` task if we've made it
+            # this far and it's not done.
+            not task.done() and aio_err
+
+            # or the trio side has exited it's surrounding cancel scope
+            # indicating the lifetime of the ``asyncio``-side task
+            # should also be terminated.
+            or chan._trio_exited
+        ):
+            log.runtime(
+                f'Cancelling `asyncio`-task: {task.get_name()}'
+            )
+            # assert not aio_err, 'WTF how did asyncio do this?!'
+            task.cancel()
+
+        # Required to sync with the far end ``asyncio``-task to ensure
+        # any error is captured (via monkeypatching the
+        # ``channel._aio_err``) before calling ``maybe_raise_aio_err()``
+        # below!
+        if wait_on_aio_task:
+            await chan._aio_task_complete.wait()
+
+        # NOTE: if any ``asyncio`` error was caught, raise it here inline
+        # here in the ``trio`` task
+        maybe_raise_aio_err()
+
+
+async def run_task(
+    func: Callable,
+    *,
+
+    qsize: int = 2**10,
+    **kwargs,
+
+) -> Any:
+    '''
+    Run an ``asyncio`` async function or generator in a task, return
+    or stream the result back to ``trio``.
+
+    '''
+    # simple async func
+    chan = _run_asyncio_task(
+        func,
+        qsize=1,
+        **kwargs,
+    )
+    with chan._from_aio:
+        async with translate_aio_errors(
+            chan,
+            wait_on_aio_task=True,
+        ):
+            # return single value that is the output from the
+            # ``asyncio`` function-as-task. Expect the mem chan api to
+            # do the job of handling cross-framework cancellations
+            # / errors via closure and translation in the
+            # ``translate_aio_errors()`` in the above ctx mngr.
+            return await chan.receive()
+
+
+@acm
+async def open_channel_from(
+
+    target: Callable[..., Any],
+    **kwargs,
+
+) -> AsyncIterator[Any]:
+    '''
+    Open an inter-loop linked task channel for streaming between a target
+    spawned ``asyncio`` task and ``trio``.
+
+    '''
+    chan = _run_asyncio_task(
+        target,
+        qsize=2**8,
+        provide_channels=True,
+        **kwargs,
+    )
+    async with chan._from_aio:
+        async with translate_aio_errors(
+            chan,
+            wait_on_aio_task=True,
+        ):
+            # sync to a "started()"-like first delivered value from the
+            # ``asyncio`` task.
+            try:
+                with chan._trio_cs:
+                    first = await chan.receive()
+
+                    # deliver stream handle upward
+                    yield first, chan
+            finally:
+                chan._trio_exited = True
+                chan._to_trio.close()
+
+
+def run_as_asyncio_guest(
+
+    trio_main: Callable,
+
+) -> None:
+    '''
+    Entry for an "infected ``asyncio`` actor".
+
+    Entrypoint for a Python process which starts the ``asyncio`` event
+    loop and runs ``trio`` in guest mode resulting in a system where
+    ``trio`` tasks can control ``asyncio`` tasks whilst maintaining
+    SC semantics.
+
+    '''
+    # Uh, oh.
+    #
+    # :o
+
+    # It looks like your event loop has caught a case of the ``trio``s.
+
+    # :()
+
+    # Don't worry, we've heard you'll barely notice. You might
+    # hallucinate a few more propagating errors and feel like your
+    # digestion has slowed but if anything get's too bad your parents
+    # will know about it.
+
+    # :)
+
+    async def aio_main(trio_main):
+
+        loop = asyncio.get_running_loop()
+        trio_done_fut = asyncio.Future()
+
+        def trio_done_callback(main_outcome):
+
+            if isinstance(main_outcome, Error):
+                error = main_outcome.error
+                trio_done_fut.set_exception(error)
+
+                # TODO: explicit asyncio tb?
+                # traceback.print_exception(error)
+
+                # XXX: do we need this?
+                # actor.cancel_soon()
+
+                main_outcome.unwrap()
+            else:
+                trio_done_fut.set_result(main_outcome)
+                log.runtime(f"trio_main finished: {main_outcome!r}")
+
+        # start the infection: run trio on the asyncio loop in "guest mode"
+        log.info(f"Infecting asyncio process with {trio_main}")
+
+        trio.lowlevel.start_guest_run(
+            trio_main,
+            run_sync_soon_threadsafe=loop.call_soon_threadsafe,
+            done_callback=trio_done_callback,
+        )
+        # ``.unwrap()`` will raise here on error
+        return (await trio_done_fut).unwrap()
+
+    # might as well if it's installed.
+    try:
+        import uvloop
+        loop = uvloop.new_event_loop()
+        asyncio.set_event_loop(loop)
+    except ImportError:
+        pass
+
+    return asyncio.run(aio_main(trio_main))
--- a/tractor/trionics/init.py
+++ b/tractor/trionics/init.py
@ -0,0 +1,40 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+Sugary patterns for trio + tractor designs.
+
+'''
+from ._mngrs import (
+    gather_contexts,
+    maybe_open_context,
+    maybe_open_nursery,
+)
+from ._broadcast import (
+    broadcast_receiver,
+    BroadcastReceiver,
+    Lagged,
+)
+
+
+__all__ = [
+    'gather_contexts',
+    'broadcast_receiver',
+    'BroadcastReceiver',
+    'Lagged',
+    'maybe_open_context',
+    'maybe_open_nursery',
+]
--- a/tractor/trionics/_broadcast.py
+++ b/tractor/trionics/_broadcast.py
@ -0,0 +1,461 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+``tokio`` style broadcast channel.
+https://docs.rs/tokio/1.11.0/tokio/sync/broadcast/index.html
+
+'''
+from __future__ import annotations
+from abc import abstractmethod
+from collections import deque
+from contextlib import asynccontextmanager
+from functools import partial
+from operator import ne
+from typing import Optional, Callable, Awaitable, Any, AsyncIterator, Protocol
+from typing import Generic, TypeVar
+
+import trio
+from trio._core._run import Task
+from trio.abc import ReceiveChannel
+from trio.lowlevel import current_task
+from msgspec import Struct
+from tractor.log import get_logger
+
+log = get_logger(__name__)
+
+# A regular invariant generic type
+T = TypeVar("T")
+
+# covariant because AsyncReceiver[Derived] can be passed to someone
+# expecting AsyncReceiver[Base])
+ReceiveType = TypeVar("ReceiveType", covariant=True)
+
+
+class AsyncReceiver(
+    Protocol,
+    Generic[ReceiveType],
+):
+    '''
+    An async receivable duck-type that quacks much like trio's
+    ``trio.abc.ReceiveChannel``.
+
+    '''
+    @abstractmethod
+    async def receive(self) -> ReceiveType:
+        ...
+
+    @abstractmethod
+    def __aiter__(self) -> AsyncIterator[ReceiveType]:
+        ...
+
+    @abstractmethod
+    async def __anext__(self) -> ReceiveType:
+        ...
+
+    # ``trio.abc.AsyncResource`` methods
+    @abstractmethod
+    async def aclose(self):
+        ...
+
+    @abstractmethod
+    async def __aenter__(self) -> AsyncReceiver[ReceiveType]:
+        ...
+
+    @abstractmethod
+    async def __aexit__(self, *args) -> None:
+        ...
+
+
+class Lagged(trio.TooSlowError):
+    '''
+    Subscribed consumer task was too slow and was overrun
+    by the fastest consumer-producer pair.
+
+    '''
+
+
+class BroadcastState(Struct):
+    '''
+    Common state to all receivers of a broadcast.
+
+    '''
+    queue: deque
+    maxlen: int
+
+    # map of underlying instance id keys to receiver instances which
+    # must be provided as a singleton per broadcaster set.
+    subs: dict[int, int]
+
+    # broadcast event to wake up all sleeping consumer tasks
+    # on a newly produced value from the sender.
+    recv_ready: Optional[tuple[int, trio.Event]] = None
+
+    # if a ``trio.EndOfChannel`` is received on any
+    # consumer all consumers should be placed in this state
+    # such that the group is notified of the end-of-broadcast.
+    # For now, this is solely for testing/debugging purposes.
+    eoc: bool = False
+
+    # If the broadcaster was cancelled, we might as well track it
+    cancelled: dict[int, Task] = {}
+
+    def statistics(self) -> dict[str, Any]:
+        '''
+        Return broadcast receiver group "statistics" like many of
+        ``trio``'s internal task-sync primitives.
+
+        '''
+        key: int | None
+        ev: trio.Event | None
+
+        subs = self.subs
+        if self.recv_ready is not None:
+            key, ev = self.recv_ready
+        else:
+            key = ev = None
+
+        qlens: dict[int, int] = {}
+        for tid, sz in subs.items():
+            qlens[tid] = sz if sz != -1 else 0
+
+        return {
+            'open_consumers': len(subs),
+            'queued_len_by_task': qlens,
+            'max_buffer_size': self.maxlen,
+            'tasks_waiting': ev.statistics().tasks_waiting if ev else 0,
+            'tasks_cancelled': self.cancelled,
+            'next_value_receiver_id': key,
+        }
+
+
+class BroadcastReceiver(ReceiveChannel):
+    '''
+    A memory receive channel broadcaster which is non-lossy for the
+    fastest consumer.
+
+    Additional consumer tasks can receive all produced values by registering
+    with ``.subscribe()`` and receiving from the new instance it delivers.
+
+    '''
+    def __init__(
+        self,
+
+        rx_chan: AsyncReceiver,
+        state: BroadcastState,
+        receive_afunc: Optional[Callable[[], Awaitable[Any]]] = None,
+        raise_on_lag: bool = True,
+
+    ) -> None:
+
+        # register the original underlying (clone)
+        self.key = id(self)
+        self._state = state
+
+        # each consumer has an int count which indicates
+        # which index contains the next value that the task has not yet
+        # consumed and thus should read. In the "up-to-date" case the
+        # consumer task must wait for a new value from the underlying
+        # receiver and we use ``-1`` as the sentinel for this state.
+        state.subs[self.key] = -1
+
+        # underlying for this receiver
+        self._rx = rx_chan
+        self._recv = receive_afunc or rx_chan.receive
+        self._closed: bool = False
+        self._raise_on_lag = raise_on_lag
+
+    def receive_nowait(
+        self,
+        _key: int | None = None,
+        _state: BroadcastState | None = None,
+
+    ) -> Any:
+        '''
+        Sync version of `.receive()` which does all the low level work
+        of receiving from the underlying/wrapped receive channel.
+
+        '''
+        key = _key or self.key
+        state = _state or self._state
+
+        # TODO: ideally we can make some way to "lock out" the
+        # underlying receive channel in some way such that if some task
+        # tries to pull from it directly (i.e. one we're unaware of)
+        # then it errors out.
+
+        # only tasks which have entered ``.subscribe()`` can
+        # receive on this broadcaster.
+        try:
+            seq = state.subs[key]
+        except KeyError:
+            if self._closed:
+                raise trio.ClosedResourceError
+
+            raise RuntimeError(
+                f'{self} is not registerd as subscriber')
+
+        # check that task does not already have a value it can receive
+        # immediately and/or that it has lagged.
+        if seq > -1:
+            # get the oldest value we haven't received immediately
+            try:
+                value = state.queue[seq]
+            except IndexError:
+
+                # adhere to ``tokio`` style "lagging":
+                # "Once RecvError::Lagged is returned, the lagging
+                # receiver's position is updated to the oldest value
+                # contained by the channel. The next call to recv will
+                # return this value."
+                # https://docs.rs/tokio/1.11.0/tokio/sync/broadcast/index.html#lagging
+
+                mxln = state.maxlen
+                lost = seq - mxln
+
+                # decrement to the last value and expect
+                # consumer to either handle the ``Lagged`` and come back
+                # or bail out on its own (thus un-subscribing)
+                state.subs[key] = mxln - 1
+
+                # this task was overrun by the producer side
+                task: Task = current_task()
+                msg = f'Task `{task.name}` overrun and dropped `{lost}` values'
+
+                if self._raise_on_lag:
+                    raise Lagged(msg)
+                else:
+                    log.warning(msg)
+                    return self.receive_nowait(_key, _state)
+
+            state.subs[key] -= 1
+            return value
+
+        raise trio.WouldBlock
+
+    async def _receive_from_underlying(
+        self,
+        key: int,
+        state: BroadcastState,
+
+    ) -> ReceiveType:
+
+        if self._closed:
+            raise trio.ClosedResourceError
+
+        event = trio.Event()
+        assert state.recv_ready is None
+        state.recv_ready = key, event
+
+        try:
+            # if we're cancelled here it should be
+            # fine to bail without affecting any other consumers
+            # right?
+            value = await self._recv()
+
+            # items with lower indices are "newer"
+            # NOTE: ``collections.deque`` implicitly takes care of
+            # trucating values outside our ``state.maxlen``. In the
+            # alt-backend-array-case we'll need to make sure this is
+            # implemented in similar ringer-buffer-ish style.
+            state.queue.appendleft(value)
+
+            # broadcast new value to all subscribers by increasing
+            # all sequence numbers that will point in the queue to
+            # their latest available value.
+
+            # don't decrement the sequence for this task since we
+            # already retreived the last value
+
+            # XXX: which of these impls is fastest?
+            # subs = state.subs.copy()
+            # subs.pop(key)
+
+            for sub_key in filter(
+                # lambda k: k != key, state.subs,
+                partial(ne, key), state.subs,
+            ):
+                state.subs[sub_key] += 1
+
+            # NOTE: this should ONLY be set if the above task was *NOT*
+            # cancelled on the `._recv()` call.
+            event.set()
+            return value
+
+        except trio.EndOfChannel:
+            # if any one consumer gets an EOC from the underlying
+            # receiver we need to unblock and send that signal to
+            # all other consumers.
+            self._state.eoc = True
+            if event.statistics().tasks_waiting:
+                event.set()
+            raise
+
+        except (
+            trio.Cancelled,
+        ):
+            # handle cancelled specially otherwise sibling
+            # consumers will be awoken with a sequence of -1
+            # and will potentially try to rewait the underlying
+            # receiver instead of just cancelling immediately.
+            self._state.cancelled[key] = current_task()
+            if event.statistics().tasks_waiting:
+                event.set()
+            raise
+
+        finally:
+            # Reset receiver waiter task event for next blocking condition.
+            # this MUST be reset even if the above ``.recv()`` call
+            # was cancelled to avoid the next consumer from blocking on
+            # an event that won't be set!
+            state.recv_ready = None
+
+    async def receive(self) -> ReceiveType:
+        key = self.key
+        state = self._state
+
+        try:
+            return self.receive_nowait(
+                _key=key,
+                _state=state,
+            )
+        except trio.WouldBlock:
+            pass
+
+        # current task already has the latest value **and** is the
+        # first task to begin waiting for a new one so we begin blocking
+        # until rescheduled with the a new value from the underlying.
+        if state.recv_ready is None:
+            return await self._receive_from_underlying(key, state)
+
+        # This task is all caught up and ready to receive the latest
+        # value, so queue/schedule it to be woken on the next internal
+        # event.
+        else:
+            while state.recv_ready is not None:
+                # seq = state.subs[key]
+                # assert seq == -1  # sanity
+                _, ev = state.recv_ready
+                await ev.wait()
+                try:
+                    return self.receive_nowait(
+                        _key=key,
+                        _state=state,
+                    )
+                except trio.WouldBlock:
+                    if self._closed:
+                        raise trio.ClosedResourceError
+
+                    subs = state.subs
+                    if (
+                        len(subs) == 1
+                        and key in subs
+                        # or cancelled
+                    ):
+                        # XXX: we are the last and only user of this BR so
+                        # likely it makes sense to unwind back to the
+                        # underlying?
+                        # import tractor
+                        # await tractor.breakpoint()
+                        log.warning(
+                            f'Only one sub left for {self}?\n'
+                            'We can probably unwind from breceiver?'
+                        )
+
+                    # XXX: In the case where the first task to allocate the
+                    # ``.recv_ready`` event is cancelled we will be woken
+                    # with a non-incremented sequence number (the ``-1``
+                    # sentinel) and thus will read the oldest value if we
+                    # use that. Instead we need to detect if we have not
+                    # been incremented and then receive again.
+                    # return await self.receive()
+
+            return await self._receive_from_underlying(key, state)
+
+    @asynccontextmanager
+    async def subscribe(
+        self,
+        raise_on_lag: bool = True,
+
+    ) -> AsyncIterator[BroadcastReceiver]:
+        '''
+        Subscribe for values from this broadcast receiver.
+
+        Returns a new ``BroadCastReceiver`` which is registered for and
+        pulls data from a clone of the original
+        ``trio.abc.ReceiveChannel`` provided at creation.
+
+        '''
+        if self._closed:
+            raise trio.ClosedResourceError
+
+        state = self._state
+        br = BroadcastReceiver(
+            rx_chan=self._rx,
+            state=state,
+            receive_afunc=self._recv,
+            raise_on_lag=raise_on_lag,
+        )
+        # assert clone in state.subs
+        assert br.key in state.subs
+
+        try:
+            yield br
+        finally:
+            await br.aclose()
+
+    async def aclose(
+        self,
+    ) -> None:
+        '''
+        Close this receiver without affecting other consumers.
+
+        '''
+        if self._closed:
+            return
+
+        # if there are sleeping consumers wake
+        # them on closure.
+        rr = self._state.recv_ready
+        if rr:
+            _, event = rr
+            event.set()
+
+        # XXX: leaving it like this consumers can still get values
+        # up to the last received that still reside in the queue.
+        self._state.subs.pop(self.key)
+        self._closed = True
+
+
+def broadcast_receiver(
+
+    recv_chan: AsyncReceiver,
+    max_buffer_size: int,
+    receive_afunc: Optional[Callable[[], Awaitable[Any]]] = None,
+    raise_on_lag: bool = True,
+
+) -> BroadcastReceiver:
+
+    return BroadcastReceiver(
+        recv_chan,
+        state=BroadcastState(
+            queue=deque(maxlen=max_buffer_size),
+            maxlen=max_buffer_size,
+            subs={},
+        ),
+        receive_afunc=receive_afunc,
+        raise_on_lag=raise_on_lag,
+    )
--- a/tractor/trionics/_mngrs.py
+++ b/tractor/trionics/_mngrs.py
@ -0,0 +1,278 @@
+# tractor: structured concurrent "actors".
+# Copyright 2018-eternity Tyler Goodlet.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+'''
+Async context manager primitives with hard ``trio``-aware semantics
+
+'''
+from contextlib import asynccontextmanager as acm
+import inspect
+from typing import (
+    Any,
+    AsyncContextManager,
+    AsyncGenerator,
+    AsyncIterator,
+    Callable,
+    Hashable,
+    Optional,
+    Sequence,
+    TypeVar,
+)
+
+import trio
+from trio_typing import TaskStatus
+
+from .._state import current_actor
+from ..log import get_logger
+
+
+log = get_logger(__name__)
+
+# A regular invariant generic type
+T = TypeVar("T")
+
+
+@acm
+async def maybe_open_nursery(
+    nursery: trio.Nursery | None = None,
+    shield: bool = False,
+) -> AsyncGenerator[trio.Nursery, Any]:
+    '''
+    Create a new nursery if None provided.
+
+    Blocks on exit as expected if no input nursery is provided.
+
+    '''
+    if nursery is not None:
+        yield nursery
+    else:
+        async with trio.open_nursery() as nursery:
+            nursery.cancel_scope.shield = shield
+            yield nursery
+
+
+async def _enter_and_wait(
+
+    mngr: AsyncContextManager[T],
+    unwrapped: dict[int, T],
+    all_entered: trio.Event,
+    parent_exit: trio.Event,
+
+) -> None:
+    '''
+    Open the async context manager deliver it's value
+    to this task's spawner and sleep until cancelled.
+
+    '''
+    async with mngr as value:
+        unwrapped[id(mngr)] = value
+
+        if all(unwrapped.values()):
+            all_entered.set()
+
+        await parent_exit.wait()
+
+
+@acm
+async def gather_contexts(
+
+    mngrs: Sequence[AsyncContextManager[T]],
+
+) -> AsyncGenerator[tuple[Optional[T], ...], None]:
+    '''
+    Concurrently enter a sequence of async context managers, each in
+    a separate ``trio`` task and deliver the unwrapped values in the
+    same order once all managers have entered. On exit all contexts are
+    subsequently and concurrently exited.
+
+    This function is somewhat similar to common usage of
+    ``contextlib.AsyncExitStack.enter_async_context()`` (in a loop) in
+    combo with ``asyncio.gather()`` except the managers are concurrently
+    entered and exited, and cancellation just works.
+
+    '''
+    unwrapped: dict[int, Optional[T]] = {}.fromkeys(id(mngr) for mngr in mngrs)
+
+    all_entered = trio.Event()
+    parent_exit = trio.Event()
+
+    # XXX: ensure greedy sequence of manager instances
+    # since a lazy inline generator doesn't seem to work
+    # with `async with` syntax.
+    mngrs = list(mngrs)
+
+    if not mngrs:
+        raise ValueError(
+            'input mngrs is empty?\n'
+            'Did try to use inline generator syntax?'
+        )
+
+    async with trio.open_nursery() as n:
+        for mngr in mngrs:
+            n.start_soon(
+                _enter_and_wait,
+                mngr,
+                unwrapped,
+                all_entered,
+                parent_exit,
+            )
+
+        # deliver control once all managers have started up
+        await all_entered.wait()
+
+        try:
+            yield tuple(unwrapped.values())
+        finally:
+            # NOTE: this is ABSOLUTELY REQUIRED to avoid
+            # the following wacky bug:
+            # <tractorbugurlhere>
+            parent_exit.set()
+
+
+# Per actor task caching helpers.
+# Further potential examples of interest:
+# https://gist.github.com/njsmith/cf6fc0a97f53865f2c671659c88c1798#file-cache-py-L8
+
+class _Cache:
+    '''
+    Globally (actor-processs scoped) cached, task access to
+    a kept-alive-while-in-use async resource.
+
+    '''
+    service_n: Optional[trio.Nursery] = None
+    locks: dict[Hashable, trio.Lock] = {}
+    users: int = 0
+    values: dict[Any,  Any] = {}
+    resources: dict[
+        Hashable,
+        tuple[trio.Nursery, trio.Event]
+    ] = {}
+    # nurseries: dict[int, trio.Nursery] = {}
+    no_more_users: Optional[trio.Event] = None
+
+    @classmethod
+    async def run_ctx(
+        cls,
+        mng,
+        ctx_key: tuple,
+        task_status: TaskStatus[T] = trio.TASK_STATUS_IGNORED,
+
+    ) -> None:
+        async with mng as value:
+            _, no_more_users = cls.resources[ctx_key]
+            cls.values[ctx_key] = value
+            task_status.started(value)
+            try:
+                await no_more_users.wait()
+            finally:
+                # discard nursery ref so it won't be re-used (an error)?
+                value = cls.values.pop(ctx_key)
+                cls.resources.pop(ctx_key)
+
+
+@acm
+async def maybe_open_context(
+
+    acm_func: Callable[..., AsyncContextManager[T]],
+
+    # XXX: used as cache key after conversion to tuple
+    # and all embedded values must also be hashable
+    kwargs: dict = {},
+    key: Hashable | Callable[..., Hashable] = None,
+
+) -> AsyncIterator[tuple[bool, T]]:
+    '''
+    Maybe open a context manager if there is not already a _Cached
+    version for the provided ``key`` for *this* actor. Return the
+    _Cached instance on a _Cache hit.
+
+    '''
+    fid = id(acm_func)
+
+    if inspect.isfunction(key):
+        ctx_key = (fid, key(**kwargs))
+    else:
+        ctx_key = (fid, key or tuple(kwargs.items()))
+
+    # yielded output
+    yielded: Any = None
+
+    # Lock resource acquisition around task racing  / ``trio``'s
+    # scheduler protocol.
+    # NOTE: the lock is target context manager func specific in order
+    # to allow re-entrant use cases where one `maybe_open_context()`
+    # wrapped factor may want to call into another.
+    lock = _Cache.locks.setdefault(fid, trio.Lock())
+    await lock.acquire()
+
+    # XXX: one singleton nursery per actor and we want to
+    # have it not be closed until all consumers have exited (which is
+    # currently difficult to implement any other way besides using our
+    # pre-allocated runtime instance..)
+    service_n: trio.Nursery = current_actor()._service_n
+
+    # TODO: is there any way to allocate
+    # a 'stays-open-till-last-task-finshed nursery?
+    # service_n: trio.Nursery
+    # async with maybe_open_nursery(_Cache.service_n) as service_n:
+    #     _Cache.service_n = service_n
+
+    try:
+        # **critical section** that should prevent other tasks from
+        # checking the _Cache until complete otherwise the scheduler
+        # may switch and by accident we create more then one resource.
+        yielded = _Cache.values[ctx_key]
+
+    except KeyError:
+        log.info(f'Allocating new {acm_func} for {ctx_key}')
+        mngr = acm_func(**kwargs)
+        resources = _Cache.resources
+        assert not resources.get(ctx_key), f'Resource exists? {ctx_key}'
+        resources[ctx_key] = (service_n, trio.Event())
+
+        # sync up to the mngr's yielded value
+        yielded = await service_n.start(
+            _Cache.run_ctx,
+            mngr,
+            ctx_key,
+        )
+        _Cache.users += 1
+        lock.release()
+        yield False, yielded
+
+    else:
+        log.info(f'Reusing _Cached resource for {ctx_key}')
+        _Cache.users += 1
+        lock.release()
+        yield True, yielded
+
+    finally:
+        _Cache.users -= 1
+
+        if yielded is not None:
+            # if no more consumers, teardown the client
+            if _Cache.users <= 0:
+                log.info(f'De-allocating resource for {ctx_key}')
+
+                # XXX: if we're cancelled we the entry may have never
+                # been entered since the nursery task was killed.
+                # _, no_more_users = _Cache.resources[ctx_key]
+                entry = _Cache.resources.get(ctx_key)
+                if entry:
+                    _, no_more_users = entry
+                    no_more_users.set()
+
+                _Cache.locks.pop(fid)