Do __main__ fixups like ``mulitprocessing does``
Instead of hackery trying to map modules manually from the filesystem let Python do all the work by simply copying what ``multiprocessing`` does to "fixup the __main__ module" in spawned subprocesses. The new private module ``_mp_fixup_main.py`` is simply cherry picked code from ``multiprocessing.spawn`` which does just that. We only need these "fixups" when using a backend other then ``multiprocessing``; for now just when using ``trio_run_in_process``.try_trip^2
parent
2a4307975d
commit
6348121d23
|
@ -28,6 +28,7 @@ from ._exceptions import (
|
|||
from ._discovery import get_arbiter
|
||||
from ._portal import Portal
|
||||
from . import _state
|
||||
from . import _mp_fixup_main
|
||||
|
||||
|
||||
log = get_logger('tractor')
|
||||
|
@ -169,6 +170,14 @@ class Actor:
|
|||
_root_nursery: trio.Nursery
|
||||
_server_nursery: trio.Nursery
|
||||
|
||||
# marked by the process spawning backend at startup
|
||||
# will be None for the parent most process started manually
|
||||
# by the user (currently called the "arbiter")
|
||||
_spawn_method: Optional[str] = None
|
||||
|
||||
# Information about `__main__` from parent
|
||||
_parent_main_data: Dict[str, str]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
|
@ -178,17 +187,20 @@ class Actor:
|
|||
loglevel: str = None,
|
||||
arbiter_addr: Optional[Tuple[str, int]] = None,
|
||||
) -> None:
|
||||
"""This constructor is called in the parent actor **before** the spawning
|
||||
phase (aka before a new process is executed).
|
||||
"""
|
||||
self.name = name
|
||||
self.uid = (name, uid or str(uuid.uuid4()))
|
||||
|
||||
# retreive and store parent `__main__` data which
|
||||
# will be passed to children
|
||||
self._parent_main_data = _mp_fixup_main._mp_figure_out_main()
|
||||
|
||||
mods = {}
|
||||
for name in rpc_module_paths or ():
|
||||
mod = importlib.import_module(name)
|
||||
suffix_index = mod.__file__.find('.py')
|
||||
unique_modname = os.path.basename(mod.__file__[:suffix_index])
|
||||
mods[unique_modname] = _get_mod_abspath(mod)
|
||||
if mod.__name__ == '__main__' or mod.__name__ == '__mp_main__':
|
||||
self._main_mod = unique_modname
|
||||
mods[name] = _get_mod_abspath(mod)
|
||||
|
||||
self.rpc_module_paths = mods
|
||||
self._mods: dict = {}
|
||||
|
@ -243,35 +255,40 @@ class Actor:
|
|||
code (if it exists).
|
||||
"""
|
||||
try:
|
||||
for modname, absfilepath in self.rpc_module_paths.items():
|
||||
sys.path.append(os.path.dirname(absfilepath))
|
||||
log.debug(f"Attempting to import {modname}@{absfilepath}")
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
modname, absfilepath)
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(mod) # type: ignore
|
||||
self._mods[modname] = mod
|
||||
if self._spawn_method == 'trio_run_in_process':
|
||||
parent_data = self._parent_main_data
|
||||
if 'init_main_from_name' in parent_data:
|
||||
_mp_fixup_main._fixup_main_from_name(
|
||||
parent_data['init_main_from_name'])
|
||||
elif 'init_main_from_path' in parent_data:
|
||||
_mp_fixup_main._fixup_main_from_path(
|
||||
parent_data['init_main_from_path'])
|
||||
|
||||
# XXX append the allowed module to the python path
|
||||
# which should allow for relative (at least downward)
|
||||
# imports. Seems to be the only that will work currently
|
||||
# to get `trio-run-in-process` to import modules we "send
|
||||
# it".
|
||||
|
||||
# if self.name != 'arbiter':
|
||||
# importlib.import_module('doggy')
|
||||
# from celery.contrib import rdb; rdb.set_trace()
|
||||
for modpath, filepath in self.rpc_module_paths.items():
|
||||
# XXX append the allowed module to the python path which
|
||||
# should allow for relative (at least downward) imports.
|
||||
sys.path.append(os.path.dirname(filepath))
|
||||
# XXX leaving this in for now incase we decide to swap
|
||||
# it with the above path mutating solution:
|
||||
# spec = importlib.util.spec_from_file_location(
|
||||
# modname, absfilepath)
|
||||
# mod = importlib.util.module_from_spec(spec)
|
||||
# spec.loader.exec_module(mod) # type: ignore
|
||||
log.debug(f"Attempting to import {modpath}@{filepath}")
|
||||
mod = importlib.import_module(modpath)
|
||||
self._mods[modpath] = mod
|
||||
except ModuleNotFoundError:
|
||||
# it is expected the corresponding `ModuleNotExposed` error
|
||||
# will be raised later
|
||||
log.error(f"Failed to import {modname} in {self.name}")
|
||||
log.error(f"Failed to import {modpath} in {self.name}")
|
||||
raise
|
||||
|
||||
def _get_rpc_func(self, ns, funcname):
|
||||
if ns == '__main__' or ns == '__mp_main__':
|
||||
# lookup the specific module in the child denoted
|
||||
# as `__main__`/`__mp_main__` in the parent
|
||||
ns = self._main_mod
|
||||
if ns == "__mp_main__":
|
||||
# In subprocesses, `__main__` will actually map to
|
||||
# `__mp_main__` which should be the same entry-point-module
|
||||
# as the parent.
|
||||
ns = "__main__"
|
||||
try:
|
||||
return getattr(self._mods[ns], funcname)
|
||||
except KeyError as err:
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
"""
|
||||
Helpers pulled mostly verbatim from ``multiprocessing.spawn``
|
||||
to aid with "fixing up" the ``__main__`` module in subprocesses.
|
||||
|
||||
These helpers are needed for any spawing backend that doesn't already handle this.
|
||||
For example when using ``trio_run_in_process`` it is needed but obviously not when
|
||||
we're already using ``multiprocessing``.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import platform
|
||||
import types
|
||||
import runpy
|
||||
from typing import Dict
|
||||
|
||||
|
||||
ORIGINAL_DIR = os.path.abspath(os.getcwd())
|
||||
|
||||
|
||||
def _mp_figure_out_main() -> Dict[str, str]:
|
||||
"""Taken from ``multiprocessing.spawn.get_preparation_data()``.
|
||||
|
||||
Retrieve parent actor `__main__` module data.
|
||||
"""
|
||||
d = {}
|
||||
# Figure out whether to initialise main in the subprocess as a module
|
||||
# or through direct execution (or to leave it alone entirely)
|
||||
main_module = sys.modules['__main__']
|
||||
main_mod_name = getattr(main_module.__spec__, "name", None)
|
||||
if main_mod_name is not None:
|
||||
d['init_main_from_name'] = main_mod_name
|
||||
# elif sys.platform != 'win32' or (not WINEXE and not WINSERVICE):
|
||||
elif platform.system() != 'Windows':
|
||||
main_path = getattr(main_module, '__file__', None)
|
||||
if main_path is not None:
|
||||
if (
|
||||
not os.path.isabs(main_path) and (
|
||||
ORIGINAL_DIR is not None)
|
||||
):
|
||||
# process.ORIGINAL_DIR is not None):
|
||||
# main_path = os.path.join(process.ORIGINAL_DIR, main_path)
|
||||
main_path = os.path.join(ORIGINAL_DIR, main_path)
|
||||
d['init_main_from_path'] = os.path.normpath(main_path)
|
||||
|
||||
return d
|
||||
|
||||
|
||||
# Multiprocessing module helpers to fix up the main module in
|
||||
# spawned subprocesses
|
||||
def _fixup_main_from_name(mod_name: str) -> None:
|
||||
# __main__.py files for packages, directories, zip archives, etc, run
|
||||
# their "main only" code unconditionally, so we don't even try to
|
||||
# populate anything in __main__, nor do we make any changes to
|
||||
# __main__ attributes
|
||||
current_main = sys.modules['__main__']
|
||||
if mod_name == "__main__" or mod_name.endswith(".__main__"):
|
||||
return
|
||||
|
||||
# If this process was forked, __main__ may already be populated
|
||||
if getattr(current_main.__spec__, "name", None) == mod_name:
|
||||
return
|
||||
|
||||
# Otherwise, __main__ may contain some non-main code where we need to
|
||||
# support unpickling it properly. We rerun it as __mp_main__ and make
|
||||
# the normal __main__ an alias to that
|
||||
# old_main_modules.append(current_main)
|
||||
main_module = types.ModuleType("__mp_main__")
|
||||
main_content = runpy.run_module(mod_name,
|
||||
run_name="__mp_main__",
|
||||
alter_sys=True)
|
||||
main_module.__dict__.update(main_content)
|
||||
sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
|
||||
|
||||
|
||||
def _fixup_main_from_path(main_path: str) -> None:
|
||||
# If this process was forked, __main__ may already be populated
|
||||
current_main = sys.modules['__main__']
|
||||
|
||||
# Unfortunately, the main ipython launch script historically had no
|
||||
# "if __name__ == '__main__'" guard, so we work around that
|
||||
# by treating it like a __main__.py file
|
||||
# See https://github.com/ipython/ipython/issues/4698
|
||||
main_name = os.path.splitext(os.path.basename(main_path))[0]
|
||||
if main_name == 'ipython':
|
||||
return
|
||||
|
||||
# Otherwise, if __file__ already has the setting we expect,
|
||||
# there's nothing more to do
|
||||
if getattr(current_main, '__file__', None) == main_path:
|
||||
return
|
||||
|
||||
# If the parent process has sent a path through rather than a module
|
||||
# name we assume it is an executable script that may contain
|
||||
# non-main code that needs to be executed
|
||||
# old_main_modules.append(current_main)
|
||||
main_module = types.ModuleType("__mp_main__")
|
||||
main_content = runpy.run_path(main_path,
|
||||
run_name="__mp_main__")
|
||||
main_module.__dict__.update(main_content)
|
||||
sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
|
|
@ -166,6 +166,9 @@ async def new_proc(
|
|||
"""
|
||||
cancel_scope = None
|
||||
|
||||
# mark the new actor with the global spawn method
|
||||
subactor._spawn_method = _spawn_method
|
||||
|
||||
async with trio.open_nursery() as nursery:
|
||||
if use_trio_run_in_process or _spawn_method == 'trio_run_in_process':
|
||||
# trio_run_in_process
|
||||
|
|
Loading…
Reference in New Issue