From ca1b01f926ec87f0907ad79a9ab715c0adfb6038 Mon Sep 17 00:00:00 2001 From: mahmoud Date: Fri, 10 Apr 2026 20:49:16 +0000 Subject: [PATCH] mpi integration test --- examples/integration/mpi4py/__init__.py | 0 examples/integration/mpi4py/_child.py | 5 ++ .../integration/mpi4py/inherit_parent_main.py | 50 +++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 examples/integration/mpi4py/__init__.py create mode 100644 examples/integration/mpi4py/_child.py create mode 100644 examples/integration/mpi4py/inherit_parent_main.py diff --git a/examples/integration/mpi4py/__init__.py b/examples/integration/mpi4py/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/examples/integration/mpi4py/_child.py b/examples/integration/mpi4py/_child.py new file mode 100644 index 00000000..e9d7186a --- /dev/null +++ b/examples/integration/mpi4py/_child.py @@ -0,0 +1,5 @@ +import os + + +async def child_fn() -> str: + return f"child OK pid={os.getpid()}" diff --git a/examples/integration/mpi4py/inherit_parent_main.py b/examples/integration/mpi4py/inherit_parent_main.py new file mode 100644 index 00000000..60e30a95 --- /dev/null +++ b/examples/integration/mpi4py/inherit_parent_main.py @@ -0,0 +1,50 @@ +""" +Integration test: spawning tractor actors from an MPI process. + +When a parent is launched via ``mpirun``, Open MPI sets ``OMPI_*`` env +vars that bind ``MPI_Init`` to the ``orted`` daemon. Tractor children +inherit those env vars, so if ``inherit_parent_main=True`` (the default) +the child re-executes ``__main__``, re-imports ``mpi4py``, and +``MPI_Init_thread`` fails because the child was never spawned by +``orted``:: + + getting local rank failed + --> Returned value No permission (-17) instead of ORTE_SUCCESS + +Passing ``inherit_parent_main=False`` and placing RPC functions in a +separate importable module (``_child``) avoids the re-import entirely. + +Usage:: + + mpirun --allow-run-as-root -np 1 python -m \ + examples.integration.mpi4py.inherit_parent_main +""" + +from mpi4py import MPI + +import os +import trio +import tractor + +from ._child import child_fn + + +async def main() -> None: + rank = MPI.COMM_WORLD.Get_rank() + print(f"[parent] rank={rank} pid={os.getpid()}", flush=True) + + async with tractor.open_nursery(start_method='trio') as an: + portal = await an.start_actor( + 'mpi-child', + enable_modules=[child_fn.__module__], + # Without this the child replays __main__, which + # re-imports mpi4py and crashes on MPI_Init. + inherit_parent_main=False, + ) + result = await portal.run(child_fn) + print(f"[parent] got: {result}", flush=True) + await portal.cancel_actor() + + +if __name__ == "__main__": + trio.run(main)