forked from goodboy/tractor
Do __main__ fixups like ``mulitprocessing does``
Instead of hackery trying to map modules manually from the filesystem let Python do all the work by simply copying what ``multiprocessing`` does to "fixup the __main__ module" in spawned subprocesses. The new private module ``_mp_fixup_main.py`` is simply cherry picked code from ``multiprocessing.spawn`` which does just that. We only need these "fixups" when using a backend other then ``multiprocessing``; for now just when using ``trio_run_in_process``.try_trip^2
parent
2a4307975d
commit
6348121d23
|
@ -28,6 +28,7 @@ from ._exceptions import (
|
||||||
from ._discovery import get_arbiter
|
from ._discovery import get_arbiter
|
||||||
from ._portal import Portal
|
from ._portal import Portal
|
||||||
from . import _state
|
from . import _state
|
||||||
|
from . import _mp_fixup_main
|
||||||
|
|
||||||
|
|
||||||
log = get_logger('tractor')
|
log = get_logger('tractor')
|
||||||
|
@ -169,6 +170,14 @@ class Actor:
|
||||||
_root_nursery: trio.Nursery
|
_root_nursery: trio.Nursery
|
||||||
_server_nursery: trio.Nursery
|
_server_nursery: trio.Nursery
|
||||||
|
|
||||||
|
# marked by the process spawning backend at startup
|
||||||
|
# will be None for the parent most process started manually
|
||||||
|
# by the user (currently called the "arbiter")
|
||||||
|
_spawn_method: Optional[str] = None
|
||||||
|
|
||||||
|
# Information about `__main__` from parent
|
||||||
|
_parent_main_data: Dict[str, str]
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
name: str,
|
name: str,
|
||||||
|
@ -178,17 +187,20 @@ class Actor:
|
||||||
loglevel: str = None,
|
loglevel: str = None,
|
||||||
arbiter_addr: Optional[Tuple[str, int]] = None,
|
arbiter_addr: Optional[Tuple[str, int]] = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
"""This constructor is called in the parent actor **before** the spawning
|
||||||
|
phase (aka before a new process is executed).
|
||||||
|
"""
|
||||||
self.name = name
|
self.name = name
|
||||||
self.uid = (name, uid or str(uuid.uuid4()))
|
self.uid = (name, uid or str(uuid.uuid4()))
|
||||||
|
|
||||||
|
# retreive and store parent `__main__` data which
|
||||||
|
# will be passed to children
|
||||||
|
self._parent_main_data = _mp_fixup_main._mp_figure_out_main()
|
||||||
|
|
||||||
mods = {}
|
mods = {}
|
||||||
for name in rpc_module_paths or ():
|
for name in rpc_module_paths or ():
|
||||||
mod = importlib.import_module(name)
|
mod = importlib.import_module(name)
|
||||||
suffix_index = mod.__file__.find('.py')
|
mods[name] = _get_mod_abspath(mod)
|
||||||
unique_modname = os.path.basename(mod.__file__[:suffix_index])
|
|
||||||
mods[unique_modname] = _get_mod_abspath(mod)
|
|
||||||
if mod.__name__ == '__main__' or mod.__name__ == '__mp_main__':
|
|
||||||
self._main_mod = unique_modname
|
|
||||||
|
|
||||||
self.rpc_module_paths = mods
|
self.rpc_module_paths = mods
|
||||||
self._mods: dict = {}
|
self._mods: dict = {}
|
||||||
|
@ -243,35 +255,40 @@ class Actor:
|
||||||
code (if it exists).
|
code (if it exists).
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
for modname, absfilepath in self.rpc_module_paths.items():
|
if self._spawn_method == 'trio_run_in_process':
|
||||||
sys.path.append(os.path.dirname(absfilepath))
|
parent_data = self._parent_main_data
|
||||||
log.debug(f"Attempting to import {modname}@{absfilepath}")
|
if 'init_main_from_name' in parent_data:
|
||||||
spec = importlib.util.spec_from_file_location(
|
_mp_fixup_main._fixup_main_from_name(
|
||||||
modname, absfilepath)
|
parent_data['init_main_from_name'])
|
||||||
mod = importlib.util.module_from_spec(spec)
|
elif 'init_main_from_path' in parent_data:
|
||||||
spec.loader.exec_module(mod) # type: ignore
|
_mp_fixup_main._fixup_main_from_path(
|
||||||
self._mods[modname] = mod
|
parent_data['init_main_from_path'])
|
||||||
|
|
||||||
# XXX append the allowed module to the python path
|
for modpath, filepath in self.rpc_module_paths.items():
|
||||||
# which should allow for relative (at least downward)
|
# XXX append the allowed module to the python path which
|
||||||
# imports. Seems to be the only that will work currently
|
# should allow for relative (at least downward) imports.
|
||||||
# to get `trio-run-in-process` to import modules we "send
|
sys.path.append(os.path.dirname(filepath))
|
||||||
# it".
|
# XXX leaving this in for now incase we decide to swap
|
||||||
|
# it with the above path mutating solution:
|
||||||
# if self.name != 'arbiter':
|
# spec = importlib.util.spec_from_file_location(
|
||||||
# importlib.import_module('doggy')
|
# modname, absfilepath)
|
||||||
# from celery.contrib import rdb; rdb.set_trace()
|
# mod = importlib.util.module_from_spec(spec)
|
||||||
|
# spec.loader.exec_module(mod) # type: ignore
|
||||||
|
log.debug(f"Attempting to import {modpath}@{filepath}")
|
||||||
|
mod = importlib.import_module(modpath)
|
||||||
|
self._mods[modpath] = mod
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
# it is expected the corresponding `ModuleNotExposed` error
|
# it is expected the corresponding `ModuleNotExposed` error
|
||||||
# will be raised later
|
# will be raised later
|
||||||
log.error(f"Failed to import {modname} in {self.name}")
|
log.error(f"Failed to import {modpath} in {self.name}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _get_rpc_func(self, ns, funcname):
|
def _get_rpc_func(self, ns, funcname):
|
||||||
if ns == '__main__' or ns == '__mp_main__':
|
if ns == "__mp_main__":
|
||||||
# lookup the specific module in the child denoted
|
# In subprocesses, `__main__` will actually map to
|
||||||
# as `__main__`/`__mp_main__` in the parent
|
# `__mp_main__` which should be the same entry-point-module
|
||||||
ns = self._main_mod
|
# as the parent.
|
||||||
|
ns = "__main__"
|
||||||
try:
|
try:
|
||||||
return getattr(self._mods[ns], funcname)
|
return getattr(self._mods[ns], funcname)
|
||||||
except KeyError as err:
|
except KeyError as err:
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
"""
|
||||||
|
Helpers pulled mostly verbatim from ``multiprocessing.spawn``
|
||||||
|
to aid with "fixing up" the ``__main__`` module in subprocesses.
|
||||||
|
|
||||||
|
These helpers are needed for any spawing backend that doesn't already handle this.
|
||||||
|
For example when using ``trio_run_in_process`` it is needed but obviously not when
|
||||||
|
we're already using ``multiprocessing``.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import platform
|
||||||
|
import types
|
||||||
|
import runpy
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
|
||||||
|
ORIGINAL_DIR = os.path.abspath(os.getcwd())
|
||||||
|
|
||||||
|
|
||||||
|
def _mp_figure_out_main() -> Dict[str, str]:
|
||||||
|
"""Taken from ``multiprocessing.spawn.get_preparation_data()``.
|
||||||
|
|
||||||
|
Retrieve parent actor `__main__` module data.
|
||||||
|
"""
|
||||||
|
d = {}
|
||||||
|
# Figure out whether to initialise main in the subprocess as a module
|
||||||
|
# or through direct execution (or to leave it alone entirely)
|
||||||
|
main_module = sys.modules['__main__']
|
||||||
|
main_mod_name = getattr(main_module.__spec__, "name", None)
|
||||||
|
if main_mod_name is not None:
|
||||||
|
d['init_main_from_name'] = main_mod_name
|
||||||
|
# elif sys.platform != 'win32' or (not WINEXE and not WINSERVICE):
|
||||||
|
elif platform.system() != 'Windows':
|
||||||
|
main_path = getattr(main_module, '__file__', None)
|
||||||
|
if main_path is not None:
|
||||||
|
if (
|
||||||
|
not os.path.isabs(main_path) and (
|
||||||
|
ORIGINAL_DIR is not None)
|
||||||
|
):
|
||||||
|
# process.ORIGINAL_DIR is not None):
|
||||||
|
# main_path = os.path.join(process.ORIGINAL_DIR, main_path)
|
||||||
|
main_path = os.path.join(ORIGINAL_DIR, main_path)
|
||||||
|
d['init_main_from_path'] = os.path.normpath(main_path)
|
||||||
|
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
# Multiprocessing module helpers to fix up the main module in
|
||||||
|
# spawned subprocesses
|
||||||
|
def _fixup_main_from_name(mod_name: str) -> None:
|
||||||
|
# __main__.py files for packages, directories, zip archives, etc, run
|
||||||
|
# their "main only" code unconditionally, so we don't even try to
|
||||||
|
# populate anything in __main__, nor do we make any changes to
|
||||||
|
# __main__ attributes
|
||||||
|
current_main = sys.modules['__main__']
|
||||||
|
if mod_name == "__main__" or mod_name.endswith(".__main__"):
|
||||||
|
return
|
||||||
|
|
||||||
|
# If this process was forked, __main__ may already be populated
|
||||||
|
if getattr(current_main.__spec__, "name", None) == mod_name:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Otherwise, __main__ may contain some non-main code where we need to
|
||||||
|
# support unpickling it properly. We rerun it as __mp_main__ and make
|
||||||
|
# the normal __main__ an alias to that
|
||||||
|
# old_main_modules.append(current_main)
|
||||||
|
main_module = types.ModuleType("__mp_main__")
|
||||||
|
main_content = runpy.run_module(mod_name,
|
||||||
|
run_name="__mp_main__",
|
||||||
|
alter_sys=True)
|
||||||
|
main_module.__dict__.update(main_content)
|
||||||
|
sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
|
||||||
|
|
||||||
|
|
||||||
|
def _fixup_main_from_path(main_path: str) -> None:
|
||||||
|
# If this process was forked, __main__ may already be populated
|
||||||
|
current_main = sys.modules['__main__']
|
||||||
|
|
||||||
|
# Unfortunately, the main ipython launch script historically had no
|
||||||
|
# "if __name__ == '__main__'" guard, so we work around that
|
||||||
|
# by treating it like a __main__.py file
|
||||||
|
# See https://github.com/ipython/ipython/issues/4698
|
||||||
|
main_name = os.path.splitext(os.path.basename(main_path))[0]
|
||||||
|
if main_name == 'ipython':
|
||||||
|
return
|
||||||
|
|
||||||
|
# Otherwise, if __file__ already has the setting we expect,
|
||||||
|
# there's nothing more to do
|
||||||
|
if getattr(current_main, '__file__', None) == main_path:
|
||||||
|
return
|
||||||
|
|
||||||
|
# If the parent process has sent a path through rather than a module
|
||||||
|
# name we assume it is an executable script that may contain
|
||||||
|
# non-main code that needs to be executed
|
||||||
|
# old_main_modules.append(current_main)
|
||||||
|
main_module = types.ModuleType("__mp_main__")
|
||||||
|
main_content = runpy.run_path(main_path,
|
||||||
|
run_name="__mp_main__")
|
||||||
|
main_module.__dict__.update(main_content)
|
||||||
|
sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
|
|
@ -166,6 +166,9 @@ async def new_proc(
|
||||||
"""
|
"""
|
||||||
cancel_scope = None
|
cancel_scope = None
|
||||||
|
|
||||||
|
# mark the new actor with the global spawn method
|
||||||
|
subactor._spawn_method = _spawn_method
|
||||||
|
|
||||||
async with trio.open_nursery() as nursery:
|
async with trio.open_nursery() as nursery:
|
||||||
if use_trio_run_in_process or _spawn_method == 'trio_run_in_process':
|
if use_trio_run_in_process or _spawn_method == 'trio_run_in_process':
|
||||||
# trio_run_in_process
|
# trio_run_in_process
|
||||||
|
|
Loading…
Reference in New Issue