From 6348121d2328d176cea8f616534352a5af878c71 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 29 Jan 2020 21:06:40 -0500 Subject: [PATCH] Do __main__ fixups like ``mulitprocessing does`` Instead of hackery trying to map modules manually from the filesystem let Python do all the work by simply copying what ``multiprocessing`` does to "fixup the __main__ module" in spawned subprocesses. The new private module ``_mp_fixup_main.py`` is simply cherry picked code from ``multiprocessing.spawn`` which does just that. We only need these "fixups" when using a backend other then ``multiprocessing``; for now just when using ``trio_run_in_process``. --- tractor/_actor.py | 71 +++++++++++++++++---------- tractor/_mp_fixup_main.py | 100 ++++++++++++++++++++++++++++++++++++++ tractor/_spawn.py | 3 ++ 3 files changed, 147 insertions(+), 27 deletions(-) create mode 100644 tractor/_mp_fixup_main.py diff --git a/tractor/_actor.py b/tractor/_actor.py index 42656d2..0531a05 100644 --- a/tractor/_actor.py +++ b/tractor/_actor.py @@ -28,6 +28,7 @@ from ._exceptions import ( from ._discovery import get_arbiter from ._portal import Portal from . import _state +from . import _mp_fixup_main log = get_logger('tractor') @@ -169,6 +170,14 @@ class Actor: _root_nursery: trio.Nursery _server_nursery: trio.Nursery + # marked by the process spawning backend at startup + # will be None for the parent most process started manually + # by the user (currently called the "arbiter") + _spawn_method: Optional[str] = None + + # Information about `__main__` from parent + _parent_main_data: Dict[str, str] + def __init__( self, name: str, @@ -178,17 +187,20 @@ class Actor: loglevel: str = None, arbiter_addr: Optional[Tuple[str, int]] = None, ) -> None: + """This constructor is called in the parent actor **before** the spawning + phase (aka before a new process is executed). + """ self.name = name self.uid = (name, uid or str(uuid.uuid4())) + # retreive and store parent `__main__` data which + # will be passed to children + self._parent_main_data = _mp_fixup_main._mp_figure_out_main() + mods = {} for name in rpc_module_paths or (): mod = importlib.import_module(name) - suffix_index = mod.__file__.find('.py') - unique_modname = os.path.basename(mod.__file__[:suffix_index]) - mods[unique_modname] = _get_mod_abspath(mod) - if mod.__name__ == '__main__' or mod.__name__ == '__mp_main__': - self._main_mod = unique_modname + mods[name] = _get_mod_abspath(mod) self.rpc_module_paths = mods self._mods: dict = {} @@ -243,35 +255,40 @@ class Actor: code (if it exists). """ try: - for modname, absfilepath in self.rpc_module_paths.items(): - sys.path.append(os.path.dirname(absfilepath)) - log.debug(f"Attempting to import {modname}@{absfilepath}") - spec = importlib.util.spec_from_file_location( - modname, absfilepath) - mod = importlib.util.module_from_spec(spec) - spec.loader.exec_module(mod) # type: ignore - self._mods[modname] = mod + if self._spawn_method == 'trio_run_in_process': + parent_data = self._parent_main_data + if 'init_main_from_name' in parent_data: + _mp_fixup_main._fixup_main_from_name( + parent_data['init_main_from_name']) + elif 'init_main_from_path' in parent_data: + _mp_fixup_main._fixup_main_from_path( + parent_data['init_main_from_path']) - # XXX append the allowed module to the python path - # which should allow for relative (at least downward) - # imports. Seems to be the only that will work currently - # to get `trio-run-in-process` to import modules we "send - # it". - - # if self.name != 'arbiter': - # importlib.import_module('doggy') - # from celery.contrib import rdb; rdb.set_trace() + for modpath, filepath in self.rpc_module_paths.items(): + # XXX append the allowed module to the python path which + # should allow for relative (at least downward) imports. + sys.path.append(os.path.dirname(filepath)) + # XXX leaving this in for now incase we decide to swap + # it with the above path mutating solution: + # spec = importlib.util.spec_from_file_location( + # modname, absfilepath) + # mod = importlib.util.module_from_spec(spec) + # spec.loader.exec_module(mod) # type: ignore + log.debug(f"Attempting to import {modpath}@{filepath}") + mod = importlib.import_module(modpath) + self._mods[modpath] = mod except ModuleNotFoundError: # it is expected the corresponding `ModuleNotExposed` error # will be raised later - log.error(f"Failed to import {modname} in {self.name}") + log.error(f"Failed to import {modpath} in {self.name}") raise def _get_rpc_func(self, ns, funcname): - if ns == '__main__' or ns == '__mp_main__': - # lookup the specific module in the child denoted - # as `__main__`/`__mp_main__` in the parent - ns = self._main_mod + if ns == "__mp_main__": + # In subprocesses, `__main__` will actually map to + # `__mp_main__` which should be the same entry-point-module + # as the parent. + ns = "__main__" try: return getattr(self._mods[ns], funcname) except KeyError as err: diff --git a/tractor/_mp_fixup_main.py b/tractor/_mp_fixup_main.py new file mode 100644 index 0000000..7869561 --- /dev/null +++ b/tractor/_mp_fixup_main.py @@ -0,0 +1,100 @@ +""" +Helpers pulled mostly verbatim from ``multiprocessing.spawn`` +to aid with "fixing up" the ``__main__`` module in subprocesses. + +These helpers are needed for any spawing backend that doesn't already handle this. +For example when using ``trio_run_in_process`` it is needed but obviously not when +we're already using ``multiprocessing``. +""" +import os +import sys +import platform +import types +import runpy +from typing import Dict + + +ORIGINAL_DIR = os.path.abspath(os.getcwd()) + + +def _mp_figure_out_main() -> Dict[str, str]: + """Taken from ``multiprocessing.spawn.get_preparation_data()``. + + Retrieve parent actor `__main__` module data. + """ + d = {} + # Figure out whether to initialise main in the subprocess as a module + # or through direct execution (or to leave it alone entirely) + main_module = sys.modules['__main__'] + main_mod_name = getattr(main_module.__spec__, "name", None) + if main_mod_name is not None: + d['init_main_from_name'] = main_mod_name + # elif sys.platform != 'win32' or (not WINEXE and not WINSERVICE): + elif platform.system() != 'Windows': + main_path = getattr(main_module, '__file__', None) + if main_path is not None: + if ( + not os.path.isabs(main_path) and ( + ORIGINAL_DIR is not None) + ): + # process.ORIGINAL_DIR is not None): + # main_path = os.path.join(process.ORIGINAL_DIR, main_path) + main_path = os.path.join(ORIGINAL_DIR, main_path) + d['init_main_from_path'] = os.path.normpath(main_path) + + return d + + +# Multiprocessing module helpers to fix up the main module in +# spawned subprocesses +def _fixup_main_from_name(mod_name: str) -> None: + # __main__.py files for packages, directories, zip archives, etc, run + # their "main only" code unconditionally, so we don't even try to + # populate anything in __main__, nor do we make any changes to + # __main__ attributes + current_main = sys.modules['__main__'] + if mod_name == "__main__" or mod_name.endswith(".__main__"): + return + + # If this process was forked, __main__ may already be populated + if getattr(current_main.__spec__, "name", None) == mod_name: + return + + # Otherwise, __main__ may contain some non-main code where we need to + # support unpickling it properly. We rerun it as __mp_main__ and make + # the normal __main__ an alias to that + # old_main_modules.append(current_main) + main_module = types.ModuleType("__mp_main__") + main_content = runpy.run_module(mod_name, + run_name="__mp_main__", + alter_sys=True) + main_module.__dict__.update(main_content) + sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module + + +def _fixup_main_from_path(main_path: str) -> None: + # If this process was forked, __main__ may already be populated + current_main = sys.modules['__main__'] + + # Unfortunately, the main ipython launch script historically had no + # "if __name__ == '__main__'" guard, so we work around that + # by treating it like a __main__.py file + # See https://github.com/ipython/ipython/issues/4698 + main_name = os.path.splitext(os.path.basename(main_path))[0] + if main_name == 'ipython': + return + + # Otherwise, if __file__ already has the setting we expect, + # there's nothing more to do + if getattr(current_main, '__file__', None) == main_path: + return + + # If the parent process has sent a path through rather than a module + # name we assume it is an executable script that may contain + # non-main code that needs to be executed + # old_main_modules.append(current_main) + main_module = types.ModuleType("__mp_main__") + main_content = runpy.run_path(main_path, + run_name="__mp_main__") + main_module.__dict__.update(main_content) + sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 3582f8b..311ad7c 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -166,6 +166,9 @@ async def new_proc( """ cancel_scope = None + # mark the new actor with the global spawn method + subactor._spawn_method = _spawn_method + async with trio.open_nursery() as nursery: if use_trio_run_in_process or _spawn_method == 'trio_run_in_process': # trio_run_in_process