| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | """ |
| | Pickle and restore the intepreter session. |
| | """ |
| |
|
| | __all__ = [ |
| | 'dump_module', 'load_module', 'load_module_asdict', |
| | 'dump_session', 'load_session' |
| | ] |
| |
|
| | import re |
| | import os |
| | import sys |
| | import warnings |
| | import pathlib |
| | import tempfile |
| |
|
| | TEMPDIR = pathlib.PurePath(tempfile.gettempdir()) |
| |
|
| | |
| | from typing import Optional, Union |
| |
|
| | from dill import _dill, Pickler, Unpickler |
| | from ._dill import ( |
| | BuiltinMethodType, FunctionType, MethodType, ModuleType, TypeType, |
| | _import_module, _is_builtin_module, _is_imported_module, _main_module, |
| | _reverse_typemap, __builtin__, UnpicklingError, |
| | ) |
| |
|
| | def _module_map(): |
| | """get map of imported modules""" |
| | from collections import defaultdict |
| | from types import SimpleNamespace |
| | modmap = SimpleNamespace( |
| | by_name=defaultdict(list), |
| | by_id=defaultdict(list), |
| | top_level={}, |
| | ) |
| | for modname, module in sys.modules.items(): |
| | if modname in ('__main__', '__mp_main__') or not isinstance(module, ModuleType): |
| | continue |
| | if '.' not in modname: |
| | modmap.top_level[id(module)] = modname |
| | for objname, modobj in module.__dict__.items(): |
| | modmap.by_name[objname].append((modobj, modname)) |
| | modmap.by_id[id(modobj)].append((modobj, objname, modname)) |
| | return modmap |
| |
|
| | IMPORTED_AS_TYPES = (ModuleType, TypeType, FunctionType, MethodType, BuiltinMethodType) |
| | if 'PyCapsuleType' in _reverse_typemap: |
| | IMPORTED_AS_TYPES += (_reverse_typemap['PyCapsuleType'],) |
| | IMPORTED_AS_MODULES = ('ctypes', 'typing', 'subprocess', 'threading', |
| | r'concurrent\.futures(\.\w+)?', r'multiprocessing(\.\w+)?') |
| | IMPORTED_AS_MODULES = tuple(re.compile(x) for x in IMPORTED_AS_MODULES) |
| |
|
| | def _lookup_module(modmap, name, obj, main_module): |
| | """lookup name or id of obj if module is imported""" |
| | for modobj, modname in modmap.by_name[name]: |
| | if modobj is obj and sys.modules[modname] is not main_module: |
| | return modname, name |
| | __module__ = getattr(obj, '__module__', None) |
| | if isinstance(obj, IMPORTED_AS_TYPES) or (__module__ is not None |
| | and any(regex.fullmatch(__module__) for regex in IMPORTED_AS_MODULES)): |
| | for modobj, objname, modname in modmap.by_id[id(obj)]: |
| | if sys.modules[modname] is not main_module: |
| | return modname, objname |
| | return None, None |
| |
|
| | def _stash_modules(main_module): |
| | modmap = _module_map() |
| | newmod = ModuleType(main_module.__name__) |
| |
|
| | imported = [] |
| | imported_as = [] |
| | imported_top_level = [] |
| | original = {} |
| | for name, obj in main_module.__dict__.items(): |
| | if obj is main_module: |
| | original[name] = newmod |
| | elif obj is main_module.__dict__: |
| | original[name] = newmod.__dict__ |
| | |
| | elif any(obj is singleton for singleton in (None, False, True)) \ |
| | or isinstance(obj, ModuleType) and _is_builtin_module(obj): |
| | original[name] = obj |
| | else: |
| | source_module, objname = _lookup_module(modmap, name, obj, main_module) |
| | if source_module is not None: |
| | if objname == name: |
| | imported.append((source_module, name)) |
| | else: |
| | imported_as.append((source_module, objname, name)) |
| | else: |
| | try: |
| | imported_top_level.append((modmap.top_level[id(obj)], name)) |
| | except KeyError: |
| | original[name] = obj |
| |
|
| | if len(original) < len(main_module.__dict__): |
| | newmod.__dict__.update(original) |
| | newmod.__dill_imported = imported |
| | newmod.__dill_imported_as = imported_as |
| | newmod.__dill_imported_top_level = imported_top_level |
| | if getattr(newmod, '__loader__', None) is None and _is_imported_module(main_module): |
| | |
| | newmod.__loader__ = True |
| | return newmod |
| | else: |
| | return main_module |
| |
|
| | def _restore_modules(unpickler, main_module): |
| | try: |
| | for modname, name in main_module.__dict__.pop('__dill_imported'): |
| | main_module.__dict__[name] = unpickler.find_class(modname, name) |
| | for modname, objname, name in main_module.__dict__.pop('__dill_imported_as'): |
| | main_module.__dict__[name] = unpickler.find_class(modname, objname) |
| | for modname, name in main_module.__dict__.pop('__dill_imported_top_level'): |
| | main_module.__dict__[name] = __import__(modname) |
| | except KeyError: |
| | pass |
| |
|
| | |
| | def dump_module( |
| | filename: Union[str, os.PathLike] = None, |
| | module: Optional[Union[ModuleType, str]] = None, |
| | refimported: bool = False, |
| | **kwds |
| | ) -> None: |
| | """Pickle the current state of :py:mod:`__main__` or another module to a file. |
| | |
| | Save the contents of :py:mod:`__main__` (e.g. from an interactive |
| | interpreter session), an imported module, or a module-type object (e.g. |
| | built with :py:class:`~types.ModuleType`), to a file. The pickled |
| | module can then be restored with the function :py:func:`load_module`. |
| | |
| | Args: |
| | filename: a path-like object or a writable stream. If `None` |
| | (the default), write to a named file in a temporary directory. |
| | module: a module object or the name of an importable module. If `None` |
| | (the default), :py:mod:`__main__` is saved. |
| | refimported: if `True`, all objects identified as having been imported |
| | into the module's namespace are saved by reference. *Note:* this is |
| | similar but independent from ``dill.settings[`byref`]``, as |
| | ``refimported`` refers to virtually all imported objects, while |
| | ``byref`` only affects select objects. |
| | **kwds: extra keyword arguments passed to :py:class:`Pickler()`. |
| | |
| | Raises: |
| | :py:exc:`PicklingError`: if pickling fails. |
| | |
| | Examples: |
| | |
| | - Save current interpreter session state: |
| | |
| | >>> import dill |
| | >>> squared = lambda x: x*x |
| | >>> dill.dump_module() # save state of __main__ to /tmp/session.pkl |
| | |
| | - Save the state of an imported/importable module: |
| | |
| | >>> import dill |
| | >>> import pox |
| | >>> pox.plus_one = lambda x: x+1 |
| | >>> dill.dump_module('pox_session.pkl', module=pox) |
| | |
| | - Save the state of a non-importable, module-type object: |
| | |
| | >>> import dill |
| | >>> from types import ModuleType |
| | >>> foo = ModuleType('foo') |
| | >>> foo.values = [1,2,3] |
| | >>> import math |
| | >>> foo.sin = math.sin |
| | >>> dill.dump_module('foo_session.pkl', module=foo, refimported=True) |
| | |
| | - Restore the state of the saved modules: |
| | |
| | >>> import dill |
| | >>> dill.load_module() |
| | >>> squared(2) |
| | 4 |
| | >>> pox = dill.load_module('pox_session.pkl') |
| | >>> pox.plus_one(1) |
| | 2 |
| | >>> foo = dill.load_module('foo_session.pkl') |
| | >>> [foo.sin(x) for x in foo.values] |
| | [0.8414709848078965, 0.9092974268256817, 0.1411200080598672] |
| | |
| | - Use `refimported` to save imported objects by reference: |
| | |
| | >>> import dill |
| | >>> from html.entities import html5 |
| | >>> type(html5), len(html5) |
| | (dict, 2231) |
| | >>> import io |
| | >>> buf = io.BytesIO() |
| | >>> dill.dump_module(buf) # saves __main__, with html5 saved by value |
| | >>> len(buf.getvalue()) # pickle size in bytes |
| | 71665 |
| | >>> buf = io.BytesIO() |
| | >>> dill.dump_module(buf, refimported=True) # html5 saved by reference |
| | >>> len(buf.getvalue()) |
| | 438 |
| | |
| | *Changed in version 0.3.6:* Function ``dump_session()`` was renamed to |
| | ``dump_module()``. Parameters ``main`` and ``byref`` were renamed to |
| | ``module`` and ``refimported``, respectively. |
| | |
| | Note: |
| | Currently, ``dill.settings['byref']`` and ``dill.settings['recurse']`` |
| | don't apply to this function. |
| | """ |
| | for old_par, par in [('main', 'module'), ('byref', 'refimported')]: |
| | if old_par in kwds: |
| | message = "The argument %r has been renamed %r" % (old_par, par) |
| | if old_par == 'byref': |
| | message += " to distinguish it from dill.settings['byref']" |
| | warnings.warn(message + ".", PendingDeprecationWarning) |
| | if locals()[par]: |
| | raise TypeError("both %r and %r arguments were used" % (par, old_par)) |
| | refimported = kwds.pop('byref', refimported) |
| | module = kwds.pop('main', module) |
| |
|
| | from .settings import settings |
| | protocol = settings['protocol'] |
| | main = module |
| | if main is None: |
| | main = _main_module |
| | elif isinstance(main, str): |
| | main = _import_module(main) |
| | if not isinstance(main, ModuleType): |
| | raise TypeError("%r is not a module" % main) |
| | if hasattr(filename, 'write'): |
| | file = filename |
| | else: |
| | if filename is None: |
| | filename = str(TEMPDIR/'session.pkl') |
| | file = open(filename, 'wb') |
| | try: |
| | pickler = Pickler(file, protocol, **kwds) |
| | pickler._original_main = main |
| | if refimported: |
| | main = _stash_modules(main) |
| | pickler._main = main |
| | pickler._byref = False |
| | pickler._recurse = False |
| | pickler._session = True |
| | pickler._first_pass = True |
| | pickler._main_modified = main is not pickler._original_main |
| | pickler.dump(main) |
| | finally: |
| | if file is not filename: |
| | file.close() |
| | return |
| |
|
| | |
| | def dump_session(filename=None, main=None, byref=False, **kwds): |
| | warnings.warn("dump_session() has been renamed dump_module()", PendingDeprecationWarning) |
| | dump_module(filename, module=main, refimported=byref, **kwds) |
| | dump_session.__doc__ = dump_module.__doc__ |
| |
|
| | class _PeekableReader: |
| | """lightweight stream wrapper that implements peek()""" |
| | def __init__(self, stream): |
| | self.stream = stream |
| | def read(self, n): |
| | return self.stream.read(n) |
| | def readline(self): |
| | return self.stream.readline() |
| | def tell(self): |
| | return self.stream.tell() |
| | def close(self): |
| | return self.stream.close() |
| | def peek(self, n): |
| | stream = self.stream |
| | try: |
| | if hasattr(stream, 'flush'): stream.flush() |
| | position = stream.tell() |
| | stream.seek(position) |
| | chunk = stream.read(n) |
| | stream.seek(position) |
| | return chunk |
| | except (AttributeError, OSError): |
| | raise NotImplementedError("stream is not peekable: %r", stream) from None |
| |
|
| | def _make_peekable(stream): |
| | """return stream as an object with a peek() method""" |
| | import io |
| | if hasattr(stream, 'peek'): |
| | return stream |
| | if not (hasattr(stream, 'tell') and hasattr(stream, 'seek')): |
| | try: |
| | return io.BufferedReader(stream) |
| | except Exception: |
| | pass |
| | return _PeekableReader(stream) |
| |
|
| | def _identify_module(file, main=None): |
| | """identify the name of the module stored in the given file-type object""" |
| | from pickletools import genops |
| | UNICODE = {'UNICODE', 'BINUNICODE', 'SHORT_BINUNICODE'} |
| | found_import = False |
| | try: |
| | for opcode, arg, pos in genops(file.peek(256)): |
| | if not found_import: |
| | if opcode.name in ('GLOBAL', 'SHORT_BINUNICODE') and \ |
| | arg.endswith('_import_module'): |
| | found_import = True |
| | else: |
| | if opcode.name in UNICODE: |
| | return arg |
| | else: |
| | raise UnpicklingError("reached STOP without finding main module") |
| | except (NotImplementedError, ValueError) as error: |
| | |
| | if isinstance(error, NotImplementedError) and main is not None: |
| | |
| | return None |
| | raise UnpicklingError("unable to identify main module") from error |
| |
|
| | def load_module( |
| | filename: Union[str, os.PathLike] = None, |
| | module: Optional[Union[ModuleType, str]] = None, |
| | **kwds |
| | ) -> Optional[ModuleType]: |
| | """Update the selected module (default is :py:mod:`__main__`) with |
| | the state saved at ``filename``. |
| | |
| | Restore a module to the state saved with :py:func:`dump_module`. The |
| | saved module can be :py:mod:`__main__` (e.g. an interpreter session), |
| | an imported module, or a module-type object (e.g. created with |
| | :py:class:`~types.ModuleType`). |
| | |
| | When restoring the state of a non-importable module-type object, the |
| | current instance of this module may be passed as the argument ``main``. |
| | Otherwise, a new instance is created with :py:class:`~types.ModuleType` |
| | and returned. |
| | |
| | Args: |
| | filename: a path-like object or a readable stream. If `None` |
| | (the default), read from a named file in a temporary directory. |
| | module: a module object or the name of an importable module; |
| | the module name and kind (i.e. imported or non-imported) must |
| | match the name and kind of the module stored at ``filename``. |
| | **kwds: extra keyword arguments passed to :py:class:`Unpickler()`. |
| | |
| | Raises: |
| | :py:exc:`UnpicklingError`: if unpickling fails. |
| | :py:exc:`ValueError`: if the argument ``main`` and module saved |
| | at ``filename`` are incompatible. |
| | |
| | Returns: |
| | A module object, if the saved module is not :py:mod:`__main__` or |
| | a module instance wasn't provided with the argument ``main``. |
| | |
| | Examples: |
| | |
| | - Save the state of some modules: |
| | |
| | >>> import dill |
| | >>> squared = lambda x: x*x |
| | >>> dill.dump_module() # save state of __main__ to /tmp/session.pkl |
| | >>> |
| | >>> import pox # an imported module |
| | >>> pox.plus_one = lambda x: x+1 |
| | >>> dill.dump_module('pox_session.pkl', module=pox) |
| | >>> |
| | >>> from types import ModuleType |
| | >>> foo = ModuleType('foo') # a module-type object |
| | >>> foo.values = [1,2,3] |
| | >>> import math |
| | >>> foo.sin = math.sin |
| | >>> dill.dump_module('foo_session.pkl', module=foo, refimported=True) |
| | |
| | - Restore the state of the interpreter: |
| | |
| | >>> import dill |
| | >>> dill.load_module() # updates __main__ from /tmp/session.pkl |
| | >>> squared(2) |
| | 4 |
| | |
| | - Load the saved state of an importable module: |
| | |
| | >>> import dill |
| | >>> pox = dill.load_module('pox_session.pkl') |
| | >>> pox.plus_one(1) |
| | 2 |
| | >>> import sys |
| | >>> pox in sys.modules.values() |
| | True |
| | |
| | - Load the saved state of a non-importable module-type object: |
| | |
| | >>> import dill |
| | >>> foo = dill.load_module('foo_session.pkl') |
| | >>> [foo.sin(x) for x in foo.values] |
| | [0.8414709848078965, 0.9092974268256817, 0.1411200080598672] |
| | >>> import math |
| | >>> foo.sin is math.sin # foo.sin was saved by reference |
| | True |
| | >>> import sys |
| | >>> foo in sys.modules.values() |
| | False |
| | |
| | - Update the state of a non-importable module-type object: |
| | |
| | >>> import dill |
| | >>> from types import ModuleType |
| | >>> foo = ModuleType('foo') |
| | >>> foo.values = ['a','b'] |
| | >>> foo.sin = lambda x: x*x |
| | >>> dill.load_module('foo_session.pkl', module=foo) |
| | >>> [foo.sin(x) for x in foo.values] |
| | [0.8414709848078965, 0.9092974268256817, 0.1411200080598672] |
| | |
| | *Changed in version 0.3.6:* Function ``load_session()`` was renamed to |
| | ``load_module()``. Parameter ``main`` was renamed to ``module``. |
| | |
| | See also: |
| | :py:func:`load_module_asdict` to load the contents of module saved |
| | with :py:func:`dump_module` into a dictionary. |
| | """ |
| | if 'main' in kwds: |
| | warnings.warn( |
| | "The argument 'main' has been renamed 'module'.", |
| | PendingDeprecationWarning |
| | ) |
| | if module is not None: |
| | raise TypeError("both 'module' and 'main' arguments were used") |
| | module = kwds.pop('main') |
| | main = module |
| | if hasattr(filename, 'read'): |
| | file = filename |
| | else: |
| | if filename is None: |
| | filename = str(TEMPDIR/'session.pkl') |
| | file = open(filename, 'rb') |
| | try: |
| | file = _make_peekable(file) |
| | |
| | unpickler = Unpickler(file, **kwds) |
| | unpickler._session = True |
| |
|
| | |
| | pickle_main = _identify_module(file, main) |
| | if main is None and pickle_main is not None: |
| | main = pickle_main |
| | if isinstance(main, str): |
| | if main.startswith('__runtime__.'): |
| | |
| | main = ModuleType(main.partition('.')[-1]) |
| | else: |
| | main = _import_module(main) |
| | if main is not None: |
| | if not isinstance(main, ModuleType): |
| | raise TypeError("%r is not a module" % main) |
| | unpickler._main = main |
| | else: |
| | main = unpickler._main |
| |
|
| | |
| | is_main_imported = _is_imported_module(main) |
| | if pickle_main is not None: |
| | is_runtime_mod = pickle_main.startswith('__runtime__.') |
| | if is_runtime_mod: |
| | pickle_main = pickle_main.partition('.')[-1] |
| | error_msg = "can't update{} module{} %r with the saved state of{} module{} %r" |
| | if is_runtime_mod and is_main_imported: |
| | raise ValueError( |
| | error_msg.format(" imported", "", "", "-type object") |
| | % (main.__name__, pickle_main) |
| | ) |
| | if not is_runtime_mod and not is_main_imported: |
| | raise ValueError( |
| | error_msg.format("", "-type object", " imported", "") |
| | % (pickle_main, main.__name__) |
| | ) |
| | if main.__name__ != pickle_main: |
| | raise ValueError(error_msg.format("", "", "", "") % (main.__name__, pickle_main)) |
| |
|
| | |
| | if not is_main_imported: |
| | runtime_main = '__runtime__.%s' % main.__name__ |
| | sys.modules[runtime_main] = main |
| |
|
| | loaded = unpickler.load() |
| | finally: |
| | if not hasattr(filename, 'read'): |
| | file.close() |
| | try: |
| | del sys.modules[runtime_main] |
| | except (KeyError, NameError): |
| | pass |
| | assert loaded is main |
| | _restore_modules(unpickler, main) |
| | if main is _main_module or main is module: |
| | return None |
| | else: |
| | return main |
| |
|
| | |
| | def load_session(filename=None, main=None, **kwds): |
| | warnings.warn("load_session() has been renamed load_module().", PendingDeprecationWarning) |
| | load_module(filename, module=main, **kwds) |
| | load_session.__doc__ = load_module.__doc__ |
| |
|
| | def load_module_asdict( |
| | filename: Union[str, os.PathLike] = None, |
| | update: bool = False, |
| | **kwds |
| | ) -> dict: |
| | """ |
| | Load the contents of a saved module into a dictionary. |
| | |
| | ``load_module_asdict()`` is the near-equivalent of:: |
| | |
| | lambda filename: vars(dill.load_module(filename)).copy() |
| | |
| | however, does not alter the original module. Also, the path of |
| | the loaded module is stored in the ``__session__`` attribute. |
| | |
| | Args: |
| | filename: a path-like object or a readable stream. If `None` |
| | (the default), read from a named file in a temporary directory. |
| | update: if `True`, initialize the dictionary with the current state |
| | of the module prior to loading the state stored at filename. |
| | **kwds: extra keyword arguments passed to :py:class:`Unpickler()` |
| | |
| | Raises: |
| | :py:exc:`UnpicklingError`: if unpickling fails |
| | |
| | Returns: |
| | A copy of the restored module's dictionary. |
| | |
| | Note: |
| | If ``update`` is True, the corresponding module may first be imported |
| | into the current namespace before the saved state is loaded from |
| | filename to the dictionary. Note that any module that is imported into |
| | the current namespace as a side-effect of using ``update`` will not be |
| | modified by loading the saved module in filename to a dictionary. |
| | |
| | Example: |
| | >>> import dill |
| | >>> alist = [1, 2, 3] |
| | >>> anum = 42 |
| | >>> dill.dump_module() |
| | >>> anum = 0 |
| | >>> new_var = 'spam' |
| | >>> main = dill.load_module_asdict() |
| | >>> main['__name__'], main['__session__'] |
| | ('__main__', '/tmp/session.pkl') |
| | >>> main is globals() # loaded objects don't reference globals |
| | False |
| | >>> main['alist'] == alist |
| | True |
| | >>> main['alist'] is alist # was saved by value |
| | False |
| | >>> main['anum'] == anum # changed after the session was saved |
| | False |
| | >>> new_var in main # would be True if the option 'update' was set |
| | False |
| | """ |
| | if 'module' in kwds: |
| | raise TypeError("'module' is an invalid keyword argument for load_module_asdict()") |
| | if hasattr(filename, 'read'): |
| | file = filename |
| | else: |
| | if filename is None: |
| | filename = str(TEMPDIR/'session.pkl') |
| | file = open(filename, 'rb') |
| | try: |
| | file = _make_peekable(file) |
| | main_name = _identify_module(file) |
| | old_main = sys.modules.get(main_name) |
| | main = ModuleType(main_name) |
| | if update: |
| | if old_main is None: |
| | old_main = _import_module(main_name) |
| | main.__dict__.update(old_main.__dict__) |
| | else: |
| | main.__builtins__ = __builtin__ |
| | sys.modules[main_name] = main |
| | load_module(file, **kwds) |
| | finally: |
| | if not hasattr(filename, 'read'): |
| | file.close() |
| | try: |
| | if old_main is None: |
| | del sys.modules[main_name] |
| | else: |
| | sys.modules[main_name] = old_main |
| | except NameError: |
| | pass |
| | main.__session__ = str(filename) |
| | return main.__dict__ |
| |
|
| |
|
| | |
| | |
| | for name in ( |
| | '_lookup_module', '_module_map', '_restore_modules', '_stash_modules', |
| | 'dump_session', 'load_session' |
| | ): |
| | setattr(_dill, name, globals()[name]) |
| | del name |
| |
|