Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_VF.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/__config__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/__future__.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_appdirs.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_classes.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_compile.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_custom_ops.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_environment.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_guards.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_jit_internal.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_linalg_utils.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_lobpcg.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_lowrank.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_namedtensor_internals.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_ops.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_python_dispatcher.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_size_docs.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_sources.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_storage_docs.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_streambase.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_tensor.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_tensor_str.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_thread_safe_fork.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_utils.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_utils_internal.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_vmap_internals.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_weights_only_unpickler.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/functional.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/hub.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/library.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/quasirandom.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/random.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/return_types.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/serialization.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/storage.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/torch_version.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/types.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/version.cpython-312.pyc +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_awaits/__init__.py +53 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__init__.py +549 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/decompositions.py +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/decompositions_for_jvp.py +336 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/decompositions_for_rng.py +266 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_dispatch/__init__.py +0 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_dispatch/python.py +192 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/config.py +45 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/error.py +56 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/verifier.py +531 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_inductor/__autotune_main__.py +33 -0
- Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_inductor/__init__.py +447 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_VF.cpython-312.pyc
ADDED
|
Binary file (1.56 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/__config__.cpython-312.pyc
ADDED
|
Binary file (1.08 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/__future__.cpython-312.pyc
ADDED
|
Binary file (3.58 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_appdirs.cpython-312.pyc
ADDED
|
Binary file (29.6 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_classes.cpython-312.pyc
ADDED
|
Binary file (3.34 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_compile.cpython-312.pyc
ADDED
|
Binary file (2.54 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_custom_ops.cpython-312.pyc
ADDED
|
Binary file (13.9 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_environment.cpython-312.pyc
ADDED
|
Binary file (344 Bytes). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_guards.cpython-312.pyc
ADDED
|
Binary file (57 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_jit_internal.cpython-312.pyc
ADDED
|
Binary file (52.6 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_linalg_utils.cpython-312.pyc
ADDED
|
Binary file (6.66 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_lobpcg.cpython-312.pyc
ADDED
|
Binary file (49.5 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_lowrank.cpython-312.pyc
ADDED
|
Binary file (12.8 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_namedtensor_internals.cpython-312.pyc
ADDED
|
Binary file (6.34 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_ops.cpython-312.pyc
ADDED
|
Binary file (63.1 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_python_dispatcher.cpython-312.pyc
ADDED
|
Binary file (5.34 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_size_docs.cpython-312.pyc
ADDED
|
Binary file (1.35 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_sources.cpython-312.pyc
ADDED
|
Binary file (5.69 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_storage_docs.cpython-312.pyc
ADDED
|
Binary file (1.75 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_streambase.cpython-312.pyc
ADDED
|
Binary file (968 Bytes). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_tensor.cpython-312.pyc
ADDED
|
Binary file (78.1 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_tensor_str.cpython-312.pyc
ADDED
|
Binary file (32.2 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_thread_safe_fork.cpython-312.pyc
ADDED
|
Binary file (220 Bytes). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_utils.cpython-312.pyc
ADDED
|
Binary file (44.5 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_utils_internal.cpython-312.pyc
ADDED
|
Binary file (14.3 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_vmap_internals.cpython-312.pyc
ADDED
|
Binary file (10.2 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/_weights_only_unpickler.cpython-312.pyc
ADDED
|
Binary file (26 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/functional.cpython-312.pyc
ADDED
|
Binary file (85.1 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/hub.cpython-312.pyc
ADDED
|
Binary file (37.9 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/library.cpython-312.pyc
ADDED
|
Binary file (75 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/quasirandom.cpython-312.pyc
ADDED
|
Binary file (10.3 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/random.cpython-312.pyc
ADDED
|
Binary file (8.97 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/return_types.cpython-312.pyc
ADDED
|
Binary file (2.04 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/serialization.cpython-312.pyc
ADDED
|
Binary file (87.8 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/storage.cpython-312.pyc
ADDED
|
Binary file (73.7 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/torch_version.cpython-312.pyc
ADDED
|
Binary file (3.27 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/types.cpython-312.pyc
ADDED
|
Binary file (4.21 kB). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/__pycache__/version.cpython-312.pyc
ADDED
|
Binary file (636 Bytes). View file
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_awaits/__init__.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import Generic, TypeVar
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
|
| 7 |
+
__all__ = ['Await']
|
| 8 |
+
|
| 9 |
+
W = TypeVar("W")
|
| 10 |
+
|
| 11 |
+
class _PyAwaitMeta(type(torch._C._Await), type(Generic)): # type: ignore[misc, no-redef]
|
| 12 |
+
pass
|
| 13 |
+
|
| 14 |
+
class _Await(torch._C._Await, Generic[W], metaclass=_PyAwaitMeta):
|
| 15 |
+
r"""
|
| 16 |
+
Wrapper around a ``torch._C.Await`` which encapsulates delayed execution
|
| 17 |
+
of a callable. All manipulations happen with functions ``torch.jit._awaitable``,
|
| 18 |
+
``torch.jit._awaitable_wait``, ``torch.jit._awaitable_nowait``.
|
| 19 |
+
|
| 20 |
+
Torch scriptable manipulations:
|
| 21 |
+
``torch.jit._awaitable(func, *args)``
|
| 22 |
+
Creates ``Await[W]`` object, where W is return type of func.
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
``torch.jit._awaitable_wait(Await[W])``
|
| 26 |
+
Returns the result of the function, specified at ``_awaitable``, with specified arguments.
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
The result of type ``W`` of the function call. The result is owned by ``Await[W]``
|
| 30 |
+
and returned on all following ``_awaitable_wait`` calls.
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
``torch.jit._awaitable_nowait(W)``
|
| 34 |
+
Returns:
|
| 35 |
+
Trivial ``Await[W]`` with specified result.
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
Only in eager mode:
|
| 39 |
+
``fn() -> Callable[Tuple[Any], W]``
|
| 40 |
+
Returns:
|
| 41 |
+
Specified at ``_awaitable`` python function ``func``.
|
| 42 |
+
|
| 43 |
+
``args() -> Tuple[Any]``
|
| 44 |
+
Returns:
|
| 45 |
+
Specified at ``_awaitable`` python args.
|
| 46 |
+
|
| 47 |
+
``is_nowait() -> _bool``
|
| 48 |
+
Returns:
|
| 49 |
+
``True`` if this object was created via ``_awaitable_nowait`` call (trivial `Await[W]`).
|
| 50 |
+
|
| 51 |
+
In eager mode ``Await[W]`` can be used as ``W`` i.e. attributes of W can be called on ``Await[W]``,
|
| 52 |
+
``_awaitable_wait()`` call will be transparently added.
|
| 53 |
+
"""
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/__init__.py
ADDED
|
@@ -0,0 +1,549 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import inspect
|
| 3 |
+
from collections import defaultdict
|
| 4 |
+
from collections.abc import Callable, Sequence
|
| 5 |
+
from functools import lru_cache, partial, wraps
|
| 6 |
+
from itertools import chain
|
| 7 |
+
from typing import Optional, TYPE_CHECKING, TypeVar, Union
|
| 8 |
+
from typing_extensions import ParamSpec
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
if TYPE_CHECKING:
|
| 12 |
+
from torch.export.decomp_utils import CustomDecompTable
|
| 13 |
+
|
| 14 |
+
import torch
|
| 15 |
+
import torch.library
|
| 16 |
+
from torch._ops import HigherOrderOperator, OperatorBase, OpOverload, OpOverloadPacket
|
| 17 |
+
from torch._prims_common import CustomOutParamAnnotation
|
| 18 |
+
from torch._subclasses.functional_tensor import FunctionalTensor
|
| 19 |
+
from torch.utils import _pytree as pytree
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
__all__ = [
|
| 23 |
+
"decomposition_table",
|
| 24 |
+
"pre_autograd_decomposition_table",
|
| 25 |
+
"meta_table",
|
| 26 |
+
"register_decomposition",
|
| 27 |
+
"get_decompositions",
|
| 28 |
+
"core_aten_decompositions",
|
| 29 |
+
"_should_decompose_because_unsafe_op",
|
| 30 |
+
]
|
| 31 |
+
|
| 32 |
+
_T = TypeVar("_T")
|
| 33 |
+
_P = ParamSpec("_P")
|
| 34 |
+
|
| 35 |
+
# TODO: relax key type here; torch registrations should be possible to; but
|
| 36 |
+
# right now this type is accurate
|
| 37 |
+
global_decomposition_table: dict[str, dict[torch._ops.OperatorBase, Callable]] = (
|
| 38 |
+
defaultdict(dict)
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
decomposition_table = global_decomposition_table["post_autograd"]
|
| 42 |
+
pre_autograd_decomposition_table = global_decomposition_table["pre_autograd"]
|
| 43 |
+
meta_table = global_decomposition_table["meta"]
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _should_decompose_because_unsafe_op(op: torch._ops.OperatorBase) -> bool:
|
| 47 |
+
"""
|
| 48 |
+
Returns True if the op must always decompose in export/compile tracing system
|
| 49 |
+
|
| 50 |
+
In export, we always decompose certain CIA ops that are tagged with
|
| 51 |
+
maybe_aliasing_or_mutating because we statically need to know if the op is
|
| 52 |
+
mutating or not. But these CIA ops could have different behaviour in runtime.
|
| 53 |
+
|
| 54 |
+
native_batch_norm is a prim op which has a wrong schema and it needs to be replaced
|
| 55 |
+
with correct schema. But until then, we will force decompose it via this tag.
|
| 56 |
+
"""
|
| 57 |
+
if not isinstance(op, torch._ops.OpOverload):
|
| 58 |
+
return False
|
| 59 |
+
if torch.Tag.maybe_aliasing_or_mutating in op.tags:
|
| 60 |
+
return True
|
| 61 |
+
return op is torch.ops.aten.native_batch_norm.default
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def _add_op_to_registry(registry, op, fn):
|
| 65 |
+
"""
|
| 66 |
+
This is an internal API for adding an op to the decomposition table.
|
| 67 |
+
|
| 68 |
+
If op is OpOverload, it will be added to the registry directly.
|
| 69 |
+
If op is OpOverloadPacket, all the valid op_overloads in the packet will be added to the registry.
|
| 70 |
+
"""
|
| 71 |
+
overloads: list[Union[torch._ops.OperatorBase]] = []
|
| 72 |
+
if isinstance(op, HigherOrderOperator):
|
| 73 |
+
# There's no concept of overloads for HigherOrderOperator
|
| 74 |
+
registry[op] = fn
|
| 75 |
+
return
|
| 76 |
+
elif isinstance(op, OpOverload):
|
| 77 |
+
overloads.append(op)
|
| 78 |
+
else:
|
| 79 |
+
assert isinstance(op, OpOverloadPacket)
|
| 80 |
+
for ol in op.overloads():
|
| 81 |
+
overloads.append(getattr(op, ol))
|
| 82 |
+
|
| 83 |
+
for op_overload in overloads:
|
| 84 |
+
if op_overload in registry:
|
| 85 |
+
raise RuntimeError(f"duplicate registrations for {op_overload}")
|
| 86 |
+
# TorchScript dumps a bunch of extra nonsense overloads
|
| 87 |
+
# which don't have corresponding dispatcher entries, we need
|
| 88 |
+
# to filter those out, e.g aten.add.float_int
|
| 89 |
+
if torch._C._dispatch_has_kernel(op_overload.name()):
|
| 90 |
+
registry[op_overload] = fn
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def _convert_out_params(f):
|
| 94 |
+
out_annotation = f.__annotations__.get("out")
|
| 95 |
+
|
| 96 |
+
# If there are no out params, do not wrap the function.
|
| 97 |
+
if not out_annotation:
|
| 98 |
+
return f
|
| 99 |
+
|
| 100 |
+
# Hack to detect when out is a Tuple. There seems to be no pretty way of doing this
|
| 101 |
+
if getattr(out_annotation, "__origin__", None) is tuple:
|
| 102 |
+
sig = inspect.signature(f)
|
| 103 |
+
out_names = sig.return_annotation._fields
|
| 104 |
+
# If out is a tuple, we need to register a function that unpacks all the out
|
| 105 |
+
# elements as this is what native_functions.yaml expects
|
| 106 |
+
|
| 107 |
+
@wraps(f)
|
| 108 |
+
def _fn(*args, **kwargs):
|
| 109 |
+
out_kwargs = tuple(kwargs.pop(o, None) for o in out_names)
|
| 110 |
+
# Either all of the out kwargs are set or none of them
|
| 111 |
+
is_none = out_kwargs[0] is None
|
| 112 |
+
assert all((o is None) == is_none for o in out_kwargs)
|
| 113 |
+
return f(*args, **kwargs, out=None if is_none else out_kwargs)
|
| 114 |
+
|
| 115 |
+
out_params = [
|
| 116 |
+
inspect.Parameter(
|
| 117 |
+
o,
|
| 118 |
+
kind=inspect.Parameter.KEYWORD_ONLY,
|
| 119 |
+
default=None,
|
| 120 |
+
annotation=t,
|
| 121 |
+
)
|
| 122 |
+
for o, t in zip(out_names, out_annotation.__args__)
|
| 123 |
+
]
|
| 124 |
+
# Drop the out parameter and concatenate the new kwargs in the signature
|
| 125 |
+
params = chain((v for k, v in sig.parameters.items() if k != "out"), out_params)
|
| 126 |
+
_fn.__signature__ = inspect.Signature( # type: ignore[attr-defined]
|
| 127 |
+
parameters=params, # type: ignore[arg-type]
|
| 128 |
+
return_annotation=sig.return_annotation,
|
| 129 |
+
)
|
| 130 |
+
# Drop the out parameter and concatenate the new kwargs in the annotations
|
| 131 |
+
_fn.__annotations__ = {k: v for k, v in f.__annotations__.items() if k != "out"}
|
| 132 |
+
for o in out_params:
|
| 133 |
+
_fn.__annotations__[o.name] = o.annotation
|
| 134 |
+
|
| 135 |
+
# Propagate that this function is wrapped by `out_wrapper`
|
| 136 |
+
_fn._torch_decompositions_out_wrapper = f._torch_decompositions_out_wrapper # type: ignore[attr-defined]
|
| 137 |
+
|
| 138 |
+
return _fn
|
| 139 |
+
|
| 140 |
+
# Alternatively, there may be a single tensor out parameter with a name
|
| 141 |
+
# other than "out". This will need special treatment and is indicated by an
|
| 142 |
+
# annotation, which we will remove here so it is not exposed after wrapping.
|
| 143 |
+
custom_out_param_name = f.__annotations__.pop(CustomOutParamAnnotation, None)
|
| 144 |
+
if custom_out_param_name:
|
| 145 |
+
|
| 146 |
+
@wraps(f)
|
| 147 |
+
def _fn(*args, **kwargs):
|
| 148 |
+
out_kwarg = kwargs.pop(custom_out_param_name, None)
|
| 149 |
+
return f(*args, **kwargs, out=out_kwarg)
|
| 150 |
+
|
| 151 |
+
out_param = inspect.Parameter(
|
| 152 |
+
custom_out_param_name,
|
| 153 |
+
kind=inspect.Parameter.KEYWORD_ONLY,
|
| 154 |
+
default=None,
|
| 155 |
+
annotation=out_annotation,
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
# Drop the out parameter and concatenate the new kwarg in the signature
|
| 159 |
+
sig = inspect.signature(f)
|
| 160 |
+
params = chain(
|
| 161 |
+
(v for k, v in sig.parameters.items() if k != "out"), (out_param,)
|
| 162 |
+
)
|
| 163 |
+
_fn.__signature__ = inspect.Signature( # type: ignore[attr-defined]
|
| 164 |
+
parameters=params, # type: ignore[arg-type]
|
| 165 |
+
return_annotation=sig.return_annotation,
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# Drop the out parameter and concatenate the new kwargs in the annotations
|
| 169 |
+
_fn.__annotations__ = {k: v for k, v in f.__annotations__.items() if k != "out"}
|
| 170 |
+
_fn.__annotations__[out_param.name] = out_param.annotation
|
| 171 |
+
|
| 172 |
+
return _fn
|
| 173 |
+
|
| 174 |
+
return f
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def register_decomposition(
|
| 178 |
+
aten_op, registry=None, *, type="post_autograd", unsafe=False
|
| 179 |
+
) -> Callable[[Callable[_P, _T]], Callable[_P, _T]]:
|
| 180 |
+
"""
|
| 181 |
+
A decorator to register a function as a decomposition to the Python
|
| 182 |
+
decomposition table. Use it like this::
|
| 183 |
+
|
| 184 |
+
@register_decomposition(torch.ops.aten.clamp_min)
|
| 185 |
+
def clamp_min(x):
|
| 186 |
+
return torch.clamp(self, min=min)
|
| 187 |
+
|
| 188 |
+
If you are writing a new decomposition, consider contributing it
|
| 189 |
+
directly to PyTorch in torch._decomp.decompositions.
|
| 190 |
+
|
| 191 |
+
This API is experimental; we are almost certainly going to extend
|
| 192 |
+
the API when we make decompositions eligible for use in transforms (e.g.,
|
| 193 |
+
autograd) and not just backend tracing, where we then need to know if a
|
| 194 |
+
decomposition can be used to simulate a transform.
|
| 195 |
+
|
| 196 |
+
By default, we also will register it to the Meta key of dispatcher,
|
| 197 |
+
and replace the c++ Meta implementation if there is already one.
|
| 198 |
+
|
| 199 |
+
unsafe kwarg is for reuse of this function for registering non-function
|
| 200 |
+
things
|
| 201 |
+
"""
|
| 202 |
+
|
| 203 |
+
assert type in {"post_autograd", "pre_autograd", "meta"}
|
| 204 |
+
|
| 205 |
+
def decomposition_decorator(fn: Callable[_P, _T]) -> Callable[_P, _T]:
|
| 206 |
+
orig_fn = fn
|
| 207 |
+
if not unsafe:
|
| 208 |
+
fn = _convert_out_params(fn)
|
| 209 |
+
|
| 210 |
+
nonlocal registry
|
| 211 |
+
if registry is None:
|
| 212 |
+
registry = global_decomposition_table[type]
|
| 213 |
+
|
| 214 |
+
def register(op):
|
| 215 |
+
_add_op_to_registry(registry, op, fn)
|
| 216 |
+
|
| 217 |
+
# To handle allowing multiple aten_ops at once
|
| 218 |
+
pytree.tree_map_(register, aten_op)
|
| 219 |
+
return orig_fn
|
| 220 |
+
|
| 221 |
+
return decomposition_decorator
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def get_decompositions(
|
| 225 |
+
aten_ops: Sequence[Union[torch._ops.OperatorBase, OpOverloadPacket]],
|
| 226 |
+
type: str = "post_autograd",
|
| 227 |
+
) -> dict[torch._ops.OperatorBase, Callable]:
|
| 228 |
+
"""
|
| 229 |
+
Retrieve a dictionary of decompositions corresponding to the list of
|
| 230 |
+
operator overloads and overload packets passed as input. Overload
|
| 231 |
+
packets will include all decomposed overloads in the packet. If there is
|
| 232 |
+
no decomposition for a requested operator, it is silently ignored.
|
| 233 |
+
|
| 234 |
+
This API is experimental; we are almost certainly going to give an alternate,
|
| 235 |
+
more recommended formulation, where a user provides the set of operators
|
| 236 |
+
they know how to implement, and we provide decompositions for everything
|
| 237 |
+
not in this set.
|
| 238 |
+
"""
|
| 239 |
+
assert type in {"post_autograd", "pre_autograd", "meta"}
|
| 240 |
+
|
| 241 |
+
registry = global_decomposition_table[type]
|
| 242 |
+
packets_to_overloads = defaultdict(list)
|
| 243 |
+
|
| 244 |
+
for opo in registry:
|
| 245 |
+
if isinstance(opo, (OpOverload, OpOverloadPacket)):
|
| 246 |
+
packets_to_overloads[opo.overloadpacket].append(opo)
|
| 247 |
+
decompositions: dict[torch._ops.OperatorBase, Callable] = {}
|
| 248 |
+
for op in aten_ops:
|
| 249 |
+
if isinstance(op, OpOverloadPacket) and op in packets_to_overloads:
|
| 250 |
+
for op_overload in packets_to_overloads[op]:
|
| 251 |
+
decompositions[op_overload] = registry[op_overload]
|
| 252 |
+
elif isinstance(op, (torch._ops.OperatorBase)) and op in registry:
|
| 253 |
+
decompositions[op] = registry[op]
|
| 254 |
+
return decompositions
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
def remove_decompositions(
|
| 258 |
+
decompositions: dict[torch._ops.OperatorBase, Callable],
|
| 259 |
+
aten_ops: Sequence[Union[OpOverload, OpOverloadPacket]],
|
| 260 |
+
) -> None:
|
| 261 |
+
"""
|
| 262 |
+
Given a dictionary of decompositions obtained from get_decompositions(), removes
|
| 263 |
+
operators associated with a list of operator overloads and overload packets passed
|
| 264 |
+
as input. If the decomposition dictionary does not contain a decomposition that is
|
| 265 |
+
specified to be removed, it is silently ignored.
|
| 266 |
+
"""
|
| 267 |
+
for op in aten_ops:
|
| 268 |
+
if isinstance(op, OpOverloadPacket):
|
| 269 |
+
for overload_name in op.overloads():
|
| 270 |
+
opo = getattr(op, overload_name)
|
| 271 |
+
decompositions.pop(opo, None)
|
| 272 |
+
elif isinstance(op, OpOverload):
|
| 273 |
+
decompositions.pop(op, None)
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
# populate the table
|
| 277 |
+
import torch._decomp.decompositions
|
| 278 |
+
import torch._refs
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def core_aten_decompositions() -> "CustomDecompTable":
|
| 282 |
+
from torch.export.exported_program import default_decompositions
|
| 283 |
+
|
| 284 |
+
return default_decompositions()
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
# See NOTE [Core ATen Ops]
|
| 288 |
+
#
|
| 289 |
+
# list was copied from torch/_inductor/decomposition.py
|
| 290 |
+
# excluding decompositions that results in prim ops
|
| 291 |
+
# Resulting opset of decomposition is core aten ops
|
| 292 |
+
def _core_aten_decompositions_post_autograd() -> dict[
|
| 293 |
+
torch._ops.OperatorBase, Callable
|
| 294 |
+
]:
|
| 295 |
+
aten = torch.ops.aten
|
| 296 |
+
return get_decompositions(
|
| 297 |
+
[
|
| 298 |
+
aten.addcdiv,
|
| 299 |
+
aten.addcdiv_,
|
| 300 |
+
aten.addcmul,
|
| 301 |
+
aten.addcmul_,
|
| 302 |
+
aten.addr,
|
| 303 |
+
aten.affine_grid_generator,
|
| 304 |
+
aten.alias_copy,
|
| 305 |
+
aten.all,
|
| 306 |
+
aten.aminmax,
|
| 307 |
+
aten.arange.default,
|
| 308 |
+
aten.arange.start,
|
| 309 |
+
aten.avg_pool2d_backward,
|
| 310 |
+
aten.baddbmm,
|
| 311 |
+
aten.binary_cross_entropy,
|
| 312 |
+
aten.binary_cross_entropy_backward,
|
| 313 |
+
aten.binary_cross_entropy_with_logits,
|
| 314 |
+
aten.block_diag,
|
| 315 |
+
aten.bernoulli.p,
|
| 316 |
+
aten.bernoulli.default,
|
| 317 |
+
aten.celu,
|
| 318 |
+
aten.celu_,
|
| 319 |
+
aten.channel_shuffle,
|
| 320 |
+
aten.clamp_max,
|
| 321 |
+
aten.clamp_min,
|
| 322 |
+
aten.col2im,
|
| 323 |
+
aten.count_nonzero,
|
| 324 |
+
aten.linalg_cross,
|
| 325 |
+
aten.cudnn_batch_norm,
|
| 326 |
+
aten.cudnn_batch_norm_backward,
|
| 327 |
+
aten.miopen_batch_norm_backward,
|
| 328 |
+
aten.deg2rad,
|
| 329 |
+
aten.deg2rad_,
|
| 330 |
+
aten.detach,
|
| 331 |
+
aten.diag_embed,
|
| 332 |
+
aten.diagonal_backward,
|
| 333 |
+
aten.diagonal_copy,
|
| 334 |
+
aten.dot,
|
| 335 |
+
aten.vdot,
|
| 336 |
+
aten.elu_,
|
| 337 |
+
aten.elu_backward,
|
| 338 |
+
aten._embedding_bag,
|
| 339 |
+
aten.embedding_dense_backward,
|
| 340 |
+
aten.empty_like,
|
| 341 |
+
aten._euclidean_dist.default,
|
| 342 |
+
aten.expand_as,
|
| 343 |
+
aten.expand_copy,
|
| 344 |
+
aten.eye,
|
| 345 |
+
aten.fill,
|
| 346 |
+
aten.fill_,
|
| 347 |
+
aten.floor_divide,
|
| 348 |
+
aten.frac,
|
| 349 |
+
aten.frac_,
|
| 350 |
+
aten._fused_moving_avg_obs_fq_helper,
|
| 351 |
+
aten.gelu_,
|
| 352 |
+
aten.gelu_backward,
|
| 353 |
+
aten.glu,
|
| 354 |
+
aten.glu_backward,
|
| 355 |
+
aten.hardshrink,
|
| 356 |
+
aten.hardsigmoid,
|
| 357 |
+
aten.hardsigmoid_,
|
| 358 |
+
aten.hardsigmoid_backward,
|
| 359 |
+
aten.hardswish,
|
| 360 |
+
aten.hardswish_,
|
| 361 |
+
aten.hardswish_backward,
|
| 362 |
+
aten.hardtanh_,
|
| 363 |
+
aten.hardtanh_backward,
|
| 364 |
+
aten.heaviside,
|
| 365 |
+
aten.heaviside_,
|
| 366 |
+
aten.huber_loss,
|
| 367 |
+
aten.huber_loss_backward,
|
| 368 |
+
aten.im2col,
|
| 369 |
+
aten.index_add.out,
|
| 370 |
+
aten.index_add.default,
|
| 371 |
+
aten.index_add_,
|
| 372 |
+
aten.index_copy.out,
|
| 373 |
+
aten.index_copy.default,
|
| 374 |
+
aten.index_copy_,
|
| 375 |
+
aten.index_fill.int_Scalar,
|
| 376 |
+
aten.index_fill.int_Tensor,
|
| 377 |
+
aten.index_fill.int_Scalar_out,
|
| 378 |
+
aten.index_fill.int_Tensor_out,
|
| 379 |
+
aten.index_fill_,
|
| 380 |
+
aten.isin,
|
| 381 |
+
aten.isneginf,
|
| 382 |
+
aten.isposinf,
|
| 383 |
+
aten.l1_loss,
|
| 384 |
+
aten._lazy_clone,
|
| 385 |
+
aten._test_parallel_materialize,
|
| 386 |
+
aten.leaky_relu_,
|
| 387 |
+
aten.leaky_relu_backward,
|
| 388 |
+
aten.lerp,
|
| 389 |
+
aten.lerp_,
|
| 390 |
+
aten.linspace,
|
| 391 |
+
aten.logaddexp,
|
| 392 |
+
aten.logaddexp2,
|
| 393 |
+
aten.logit,
|
| 394 |
+
aten.logit_,
|
| 395 |
+
aten.logit_backward,
|
| 396 |
+
aten.log_sigmoid_backward,
|
| 397 |
+
aten.log_sigmoid_forward,
|
| 398 |
+
aten._log_softmax_backward_data,
|
| 399 |
+
aten.logspace,
|
| 400 |
+
aten.logsumexp.default,
|
| 401 |
+
aten.masked_fill,
|
| 402 |
+
aten.masked_fill_,
|
| 403 |
+
aten.max_unpool2d,
|
| 404 |
+
aten.max_unpool3d,
|
| 405 |
+
aten.mish,
|
| 406 |
+
aten.mish_,
|
| 407 |
+
aten.mish_backward,
|
| 408 |
+
aten.mse_loss,
|
| 409 |
+
aten.mse_loss_backward,
|
| 410 |
+
aten.multi_margin_loss,
|
| 411 |
+
aten.multilabel_margin_loss_forward,
|
| 412 |
+
aten.mv,
|
| 413 |
+
aten.mvlgamma,
|
| 414 |
+
aten.mvlgamma_,
|
| 415 |
+
aten.nansum,
|
| 416 |
+
aten.nan_to_num,
|
| 417 |
+
aten.nan_to_num_,
|
| 418 |
+
aten.narrow,
|
| 419 |
+
aten.native_batch_norm_backward,
|
| 420 |
+
aten.native_dropout_backward,
|
| 421 |
+
aten.native_group_norm_backward,
|
| 422 |
+
aten.native_layer_norm_backward,
|
| 423 |
+
aten._fused_rms_norm,
|
| 424 |
+
aten._fused_rms_norm_backward,
|
| 425 |
+
aten.new_empty,
|
| 426 |
+
aten.new_full,
|
| 427 |
+
aten.new_ones,
|
| 428 |
+
aten.new_zeros,
|
| 429 |
+
aten.nll_loss2d_forward,
|
| 430 |
+
aten.nll_loss2d_backward,
|
| 431 |
+
aten.nll_loss_backward,
|
| 432 |
+
aten.nll_loss_forward,
|
| 433 |
+
aten.norm.ScalarOpt_dtype,
|
| 434 |
+
aten.norm.Scalar,
|
| 435 |
+
aten.norm.ScalarOpt_dim_dtype,
|
| 436 |
+
aten.norm.ScalarOpt_dim,
|
| 437 |
+
aten.norm.dtype_out,
|
| 438 |
+
aten.norm.out,
|
| 439 |
+
aten.norm.names_dtype_out,
|
| 440 |
+
aten.norm.names_out,
|
| 441 |
+
aten.norm.ScalarOpt_dtype_out,
|
| 442 |
+
aten.norm.Scalar_out,
|
| 443 |
+
aten.ones,
|
| 444 |
+
aten.ones_like,
|
| 445 |
+
aten.pixel_shuffle,
|
| 446 |
+
aten.pixel_unshuffle,
|
| 447 |
+
aten._prelu_kernel,
|
| 448 |
+
aten._prelu_kernel_backward,
|
| 449 |
+
aten._reshape_alias,
|
| 450 |
+
aten.rad2deg,
|
| 451 |
+
aten.rad2deg_,
|
| 452 |
+
aten.reflection_pad1d,
|
| 453 |
+
aten.reflection_pad1d_backward,
|
| 454 |
+
aten.reflection_pad2d,
|
| 455 |
+
aten.reflection_pad2d_backward,
|
| 456 |
+
aten.reflection_pad3d,
|
| 457 |
+
aten.reflection_pad3d_backward,
|
| 458 |
+
aten.replication_pad1d,
|
| 459 |
+
aten.replication_pad2d,
|
| 460 |
+
aten.replication_pad3d,
|
| 461 |
+
aten.renorm,
|
| 462 |
+
aten.renorm_,
|
| 463 |
+
aten.replication_pad2d,
|
| 464 |
+
aten.resize_as,
|
| 465 |
+
aten.roll,
|
| 466 |
+
aten.rot90,
|
| 467 |
+
aten.rrelu_with_noise,
|
| 468 |
+
aten.rrelu_with_noise_,
|
| 469 |
+
aten.rsub,
|
| 470 |
+
aten._safe_softmax,
|
| 471 |
+
aten._scaled_dot_product_flash_attention_for_cpu.default,
|
| 472 |
+
aten.select_backward,
|
| 473 |
+
aten.select_scatter,
|
| 474 |
+
aten.sgn,
|
| 475 |
+
aten.sgn_,
|
| 476 |
+
aten.sigmoid_backward,
|
| 477 |
+
aten.silu,
|
| 478 |
+
aten.silu_,
|
| 479 |
+
aten.silu_backward.grad_input,
|
| 480 |
+
aten.silu_backward,
|
| 481 |
+
aten.sinc,
|
| 482 |
+
aten.sinc_,
|
| 483 |
+
aten.slice_backward,
|
| 484 |
+
aten.smooth_l1_loss,
|
| 485 |
+
aten.smooth_l1_loss_backward,
|
| 486 |
+
aten.soft_margin_loss,
|
| 487 |
+
aten.soft_margin_loss_backward,
|
| 488 |
+
aten._softmax_backward_data,
|
| 489 |
+
aten.softplus,
|
| 490 |
+
aten.softplus_backward,
|
| 491 |
+
aten.softshrink,
|
| 492 |
+
aten.special_entr,
|
| 493 |
+
aten.special_log_ndtr,
|
| 494 |
+
aten.special_xlog1py,
|
| 495 |
+
aten.split.Tensor,
|
| 496 |
+
aten.split_with_sizes_copy,
|
| 497 |
+
aten.squeeze_copy,
|
| 498 |
+
aten.squeeze.default,
|
| 499 |
+
aten.squeeze.dim,
|
| 500 |
+
aten.std.correction,
|
| 501 |
+
aten.std.out,
|
| 502 |
+
aten.std.correction_out,
|
| 503 |
+
aten.std.names_out,
|
| 504 |
+
aten.std.correction_names_out,
|
| 505 |
+
aten.std_mean.correction,
|
| 506 |
+
aten.std_mean.correction_out,
|
| 507 |
+
aten.stack,
|
| 508 |
+
aten.sum.default,
|
| 509 |
+
aten.sum.out,
|
| 510 |
+
aten.t,
|
| 511 |
+
aten.t_copy,
|
| 512 |
+
aten.take,
|
| 513 |
+
aten.tanh_backward,
|
| 514 |
+
aten.threshold,
|
| 515 |
+
aten.threshold_,
|
| 516 |
+
aten.threshold_backward,
|
| 517 |
+
aten.trace,
|
| 518 |
+
aten.transpose.int,
|
| 519 |
+
aten.transpose_copy,
|
| 520 |
+
aten.tril,
|
| 521 |
+
aten.tril_,
|
| 522 |
+
aten.triu,
|
| 523 |
+
aten.triu_,
|
| 524 |
+
aten.unbind,
|
| 525 |
+
aten.unfold_backward,
|
| 526 |
+
aten.unfold_copy,
|
| 527 |
+
aten._unsafe_index,
|
| 528 |
+
aten._unsafe_index_put,
|
| 529 |
+
aten._unsafe_masked_index,
|
| 530 |
+
aten._unsafe_masked_index_put_accumulate,
|
| 531 |
+
aten.unsafe_split.Tensor,
|
| 532 |
+
aten.unsafe_split_with_sizes,
|
| 533 |
+
aten.unsqueeze_copy,
|
| 534 |
+
aten._unsafe_view,
|
| 535 |
+
aten.upsample_linear1d,
|
| 536 |
+
aten.upsample_bilinear2d.out,
|
| 537 |
+
aten.upsample_trilinear3d.out,
|
| 538 |
+
aten.upsample_nearest2d_backward,
|
| 539 |
+
aten.view_as_complex,
|
| 540 |
+
aten.xlogy,
|
| 541 |
+
aten.xlogy_,
|
| 542 |
+
aten.zero,
|
| 543 |
+
aten.zero_,
|
| 544 |
+
aten.zeros,
|
| 545 |
+
aten.zeros_like,
|
| 546 |
+
aten._chunk_cat,
|
| 547 |
+
aten._weight_norm_interface,
|
| 548 |
+
]
|
| 549 |
+
)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/decompositions.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/decompositions_for_jvp.py
ADDED
|
@@ -0,0 +1,336 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-decorators
|
| 2 |
+
# mypy: allow-untyped-defs
|
| 3 |
+
import inspect
|
| 4 |
+
from collections.abc import Callable
|
| 5 |
+
from typing import Optional
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
import torch._decomp
|
| 9 |
+
from torch import Tensor
|
| 10 |
+
from torch._prims_common.wrappers import _maybe_remove_out_wrapper
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
decomposition_table = torch._decomp.decomposition_table
|
| 14 |
+
decomposition_table_for_jvp: dict[torch._ops.OperatorBase, Callable] = {}
|
| 15 |
+
register_decomposition = torch._decomp.register_decomposition
|
| 16 |
+
aten = torch.ops.aten
|
| 17 |
+
|
| 18 |
+
# NOTE: [forward-mode AD decompositions mechanism]
|
| 19 |
+
#
|
| 20 |
+
# The mechanism is in VariableType,
|
| 21 |
+
# IF any inputs have forward grad
|
| 22 |
+
# AND there is no forward AD formula implemented
|
| 23 |
+
# AND the functions are actually differentiable
|
| 24 |
+
# run the decomposition
|
| 25 |
+
# See run_jit_decomposition_with_args_for_jvp
|
| 26 |
+
# We currently use python decompositions that we torchscript.
|
| 27 |
+
#
|
| 28 |
+
# Note that we would be building the backward graph at the decomposed level
|
| 29 |
+
# too, but that is OK, because we would've errored out otherwise anyway.
|
| 30 |
+
#
|
| 31 |
+
# TODO: The mechanism we are using to register decompositions doesn't
|
| 32 |
+
# seem to be exclusively used for jvp. So open question here is whether
|
| 33 |
+
# torch/csrc/jit/runtime/decomposition_registry.cpp is being used for other things.
|
| 34 |
+
# If that is the case, we may go down the decomposition path unexpectedly
|
| 35 |
+
# (and possibly produce an unintelligible error) vs erroring out earlier and
|
| 36 |
+
# printing that the forward AD formula is not implemented.
|
| 37 |
+
#
|
| 38 |
+
# The solution to this may be to have an explicitly white list control when
|
| 39 |
+
# to enable the decomposition.
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def maybe_register_decomposition(op):
|
| 43 |
+
def decorator(f):
|
| 44 |
+
try:
|
| 45 |
+
return register_decomposition(op)(f)
|
| 46 |
+
except Exception:
|
| 47 |
+
return f
|
| 48 |
+
|
| 49 |
+
return decorator
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# Functions where we need a special decomposition for jvp but there's another version that
|
| 53 |
+
# should be used more generally (ex. for jvp we need to recompute the mean and variance for
|
| 54 |
+
# the backwards of a normalization function. Without jvp, it should use the saved value)
|
| 55 |
+
decomposition_table_for_jvp = {}
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def register_decomposition_for_jvp(fn):
|
| 59 |
+
return register_decomposition(fn, registry=decomposition_table_for_jvp)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _register_jit_decomposition_for_jvp(decomp, use_python=False):
|
| 63 |
+
if decomp in decomposition_table_for_jvp:
|
| 64 |
+
decomposition_table_used = decomposition_table_for_jvp
|
| 65 |
+
elif decomp in decomposition_table:
|
| 66 |
+
decomposition_table_used = decomposition_table
|
| 67 |
+
else:
|
| 68 |
+
raise RuntimeError(f"could not find decomposition for {decomp}")
|
| 69 |
+
decomp_fn = decomposition_table_used[decomp]
|
| 70 |
+
|
| 71 |
+
# `out_wrapper` extends a decompositions signature with
|
| 72 |
+
# an `out` parameter. However jit will use the unwrapped function's
|
| 73 |
+
# signature instead so we need to unwrap here to prevent an error
|
| 74 |
+
decomp_fn = _maybe_remove_out_wrapper(decomp_fn)
|
| 75 |
+
|
| 76 |
+
if use_python:
|
| 77 |
+
decomp_fn = torch.jit.ignore(decomp_fn)
|
| 78 |
+
sig = inspect.signature(decomp_fn)
|
| 79 |
+
|
| 80 |
+
# Create a string wrapping the function from the signature
|
| 81 |
+
# example output:
|
| 82 |
+
# def wrapped_decomp(x: torch.Tensor, y: int, z: int):
|
| 83 |
+
# return decomp_fn(x, y, z)
|
| 84 |
+
# Thanks copilot!
|
| 85 |
+
def get_function_def(sig):
|
| 86 |
+
param_def = [f"{param_str}" for param_str in sig.parameters.values()]
|
| 87 |
+
param_use = [f"{param_str}" for param_str in sig.parameters]
|
| 88 |
+
|
| 89 |
+
return f"def wrapped_decomp({', '.join(param_def)}):\n return decomp_fn({', '.join(param_use)})\n"
|
| 90 |
+
|
| 91 |
+
f_str = get_function_def(sig)
|
| 92 |
+
graph = torch.jit.CompilationUnit(f_str).wrapped_decomp.graph
|
| 93 |
+
else:
|
| 94 |
+
graph = torch.jit.script(decomp_fn).graph
|
| 95 |
+
torch.jit._register_decomposition(decomp, graph)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
# The only decompositions here are temporary or hacks for the purposes of jvp
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# TODO: do these also belong here?
|
| 102 |
+
@maybe_register_decomposition(aten.trace.default)
|
| 103 |
+
def trace(self: Tensor) -> Tensor:
|
| 104 |
+
return torch.sum(torch.diag(self))
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
@maybe_register_decomposition(aten.log_sigmoid_forward.default)
|
| 108 |
+
def log_sigmoid_forward(self: Tensor) -> tuple[Tensor, Tensor]:
|
| 109 |
+
min = torch.minimum(self.new_zeros(()), self)
|
| 110 |
+
z = torch.exp(-torch.abs(self))
|
| 111 |
+
if self.is_cuda or self.is_xpu:
|
| 112 |
+
buffer = self.new_zeros((0,))
|
| 113 |
+
else:
|
| 114 |
+
buffer = z
|
| 115 |
+
return min - torch.log1p(z), buffer
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def recompute_mean_var(
|
| 119 |
+
input: Tensor, rstd: Tensor, inner_dim_indices: list[int], keepdim: bool
|
| 120 |
+
):
|
| 121 |
+
# for most norm decompositions, it will be the same as the core version except for here.
|
| 122 |
+
# We recompute the mean and variance so that they track gradients through input
|
| 123 |
+
|
| 124 |
+
mean = torch.mean(input, dim=inner_dim_indices, keepdim=keepdim)
|
| 125 |
+
var = torch.var(input, dim=inner_dim_indices, unbiased=False, keepdim=keepdim)
|
| 126 |
+
eps = torch.pow(1 / rstd, 2) - var # this makes me so sad inside
|
| 127 |
+
eps = eps.detach()
|
| 128 |
+
rstd = 1 / torch.sqrt(var + eps)
|
| 129 |
+
return mean, rstd
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
@register_decomposition_for_jvp(aten.native_layer_norm_backward)
|
| 133 |
+
def native_layer_norm_backward(
|
| 134 |
+
grad_out: Tensor,
|
| 135 |
+
input: Tensor,
|
| 136 |
+
normalized_shape: list[int],
|
| 137 |
+
mean: Tensor,
|
| 138 |
+
rstd: Tensor,
|
| 139 |
+
weight: Optional[Tensor],
|
| 140 |
+
bias: Optional[Tensor],
|
| 141 |
+
output_mask: list[bool],
|
| 142 |
+
) -> tuple[Optional[Tensor], Optional[Tensor], Optional[Tensor]]:
|
| 143 |
+
input_shape = input.shape
|
| 144 |
+
input_ndim = input.dim()
|
| 145 |
+
|
| 146 |
+
axis = input_ndim - len(normalized_shape)
|
| 147 |
+
inner_dims = input_shape[axis:]
|
| 148 |
+
outer_dims = input_shape[:axis]
|
| 149 |
+
inner_dim_indices = list(range(axis, input_ndim))
|
| 150 |
+
outer_dim_indices = list(range(axis))
|
| 151 |
+
|
| 152 |
+
N = 1
|
| 153 |
+
for i in inner_dims:
|
| 154 |
+
N *= i
|
| 155 |
+
M = 1
|
| 156 |
+
for i in outer_dims:
|
| 157 |
+
M *= i
|
| 158 |
+
if M <= 0 or N <= 0:
|
| 159 |
+
return (
|
| 160 |
+
input.new_zeros(input_shape),
|
| 161 |
+
input.new_zeros(input_shape[axis:]),
|
| 162 |
+
input.new_zeros(input_shape[axis:]),
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
mean_, rstd_ = recompute_mean_var(input, rstd, inner_dim_indices, keepdim=True)
|
| 166 |
+
|
| 167 |
+
x_hat = (input - mean_) * rstd_
|
| 168 |
+
if weight is not None:
|
| 169 |
+
grad_x_hat = grad_out * weight
|
| 170 |
+
else:
|
| 171 |
+
grad_x_hat = grad_out
|
| 172 |
+
a = grad_x_hat * N
|
| 173 |
+
b = torch.sum(grad_x_hat, inner_dim_indices, True)
|
| 174 |
+
c1 = torch.mul(grad_x_hat, x_hat)
|
| 175 |
+
c2 = torch.sum(c1, inner_dim_indices, True)
|
| 176 |
+
c3 = torch.mul(x_hat, c2)
|
| 177 |
+
inner = a - b - c3
|
| 178 |
+
|
| 179 |
+
if output_mask[0]:
|
| 180 |
+
d_input: Optional[Tensor] = (rstd_ / N) * inner
|
| 181 |
+
else:
|
| 182 |
+
d_input = torch.zeros_like(input) # should be None but doesn't work with vjp
|
| 183 |
+
|
| 184 |
+
if output_mask[1] and weight is not None:
|
| 185 |
+
if len(outer_dim_indices) > 0:
|
| 186 |
+
d_weight: Optional[Tensor] = torch.sum(
|
| 187 |
+
grad_out * x_hat, outer_dim_indices, False
|
| 188 |
+
)
|
| 189 |
+
else:
|
| 190 |
+
d_weight = grad_out * x_hat
|
| 191 |
+
elif weight is not None:
|
| 192 |
+
d_weight = torch.zeros_like(weight) # should be None but doesn't work with vjp
|
| 193 |
+
else:
|
| 194 |
+
d_weight = torch.zeros(()) # should be None but doesn't work with vjp
|
| 195 |
+
|
| 196 |
+
if output_mask[2] and bias is not None:
|
| 197 |
+
if len(outer_dim_indices) > 0:
|
| 198 |
+
d_bias: Optional[Tensor] = torch.sum(grad_out, outer_dim_indices, False)
|
| 199 |
+
else:
|
| 200 |
+
d_bias = grad_out.clone()
|
| 201 |
+
elif bias is not None:
|
| 202 |
+
d_bias = torch.zeros_like(bias) # should be None but doesn't work with vjp
|
| 203 |
+
else:
|
| 204 |
+
d_bias = torch.zeros(()) # should be None but doesn't work with vjp
|
| 205 |
+
|
| 206 |
+
return (d_input, d_weight, d_bias)
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def prod(x: list[int]):
|
| 210 |
+
r = 1
|
| 211 |
+
for i in x:
|
| 212 |
+
r *= i
|
| 213 |
+
return r
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
@register_decomposition_for_jvp(aten.native_batch_norm_backward)
|
| 217 |
+
def native_batch_norm_backward(
|
| 218 |
+
grad_out: Tensor,
|
| 219 |
+
input: Tensor,
|
| 220 |
+
weight: Optional[Tensor],
|
| 221 |
+
running_mean: Optional[Tensor],
|
| 222 |
+
running_var: Optional[Tensor],
|
| 223 |
+
save_mean: Optional[Tensor],
|
| 224 |
+
save_invstd: Optional[Tensor],
|
| 225 |
+
train: bool,
|
| 226 |
+
eps: float,
|
| 227 |
+
output_mask: list[bool],
|
| 228 |
+
) -> tuple[Tensor, Optional[Tensor], Optional[Tensor]]:
|
| 229 |
+
input_shape = input.shape
|
| 230 |
+
input_rank = input.dim()
|
| 231 |
+
assert input_rank >= 2, "rank of the input must be at least 2"
|
| 232 |
+
|
| 233 |
+
axis = 1
|
| 234 |
+
num_features = prod(input_shape) / input_shape[axis] # type: ignore[arg-type]
|
| 235 |
+
mean = save_mean
|
| 236 |
+
invstd = save_invstd
|
| 237 |
+
if train:
|
| 238 |
+
assert save_mean is not None and save_invstd is not None, (
|
| 239 |
+
"when train=True, save_mean and save_invstd are required"
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
reduciton_dims = [0] + list(range(2, input.dim()))
|
| 243 |
+
assert invstd is not None # for typing
|
| 244 |
+
mean, invstd = recompute_mean_var(input, invstd, reduciton_dims, keepdim=False)
|
| 245 |
+
else:
|
| 246 |
+
assert running_mean is not None and running_var is not None
|
| 247 |
+
mean = running_mean
|
| 248 |
+
invstd = torch.rsqrt(running_var + eps)
|
| 249 |
+
|
| 250 |
+
assert invstd is not None and mean is not None
|
| 251 |
+
|
| 252 |
+
broadcast_mask = [1] * input_rank
|
| 253 |
+
broadcast_mask[axis] = input_shape[axis]
|
| 254 |
+
|
| 255 |
+
reduction_axes: list[int] = []
|
| 256 |
+
for i in range(input_rank):
|
| 257 |
+
if i != axis:
|
| 258 |
+
reduction_axes.append(i)
|
| 259 |
+
|
| 260 |
+
mean = torch.reshape(mean, broadcast_mask)
|
| 261 |
+
norm = 1.0 / num_features
|
| 262 |
+
grad_output_sum = torch.sum(grad_out, reduction_axes)
|
| 263 |
+
dot_p = torch.sum(grad_out * (input - mean), reduction_axes)
|
| 264 |
+
|
| 265 |
+
grad_mean = torch.reshape(grad_output_sum * norm, broadcast_mask)
|
| 266 |
+
proj_scale = torch.reshape(torch.mul(dot_p * norm, invstd * invstd), broadcast_mask)
|
| 267 |
+
|
| 268 |
+
if weight is None:
|
| 269 |
+
grad_scale = torch.reshape(invstd, broadcast_mask) * 1.0
|
| 270 |
+
else:
|
| 271 |
+
grad_scale = torch.reshape(invstd * weight, broadcast_mask)
|
| 272 |
+
|
| 273 |
+
if train:
|
| 274 |
+
proj = (input - mean) * proj_scale
|
| 275 |
+
grad_input = ((grad_out - proj) - grad_mean) * grad_scale
|
| 276 |
+
else:
|
| 277 |
+
grad_input = grad_out * grad_scale
|
| 278 |
+
|
| 279 |
+
if output_mask[1]:
|
| 280 |
+
grad_weight = dot_p * invstd
|
| 281 |
+
elif weight is not None:
|
| 282 |
+
grad_weight = torch.zeros_like(
|
| 283 |
+
weight
|
| 284 |
+
) # should be None but doesn't work with vjp
|
| 285 |
+
else:
|
| 286 |
+
grad_weight = torch.zeros(()) # should be None but doesn't work with vjp
|
| 287 |
+
|
| 288 |
+
if output_mask[2]:
|
| 289 |
+
grad_bias = grad_output_sum
|
| 290 |
+
else:
|
| 291 |
+
grad_bias = torch.zeros_like(
|
| 292 |
+
grad_output_sum
|
| 293 |
+
) # should be None but doesn't work with vjp
|
| 294 |
+
|
| 295 |
+
return (grad_input, grad_weight, grad_bias)
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
@register_decomposition_for_jvp(aten.batch_norm_backward)
|
| 299 |
+
def batch_norm_backward(
|
| 300 |
+
grad_out: Tensor,
|
| 301 |
+
input: Tensor,
|
| 302 |
+
weight: Tensor,
|
| 303 |
+
running_mean: Optional[Tensor],
|
| 304 |
+
running_var: Optional[Tensor],
|
| 305 |
+
save_mean: Optional[Tensor],
|
| 306 |
+
save_var: Optional[Tensor],
|
| 307 |
+
update: bool,
|
| 308 |
+
eps: float,
|
| 309 |
+
output_mask: list[bool],
|
| 310 |
+
reserve: Tensor,
|
| 311 |
+
) -> tuple[Tensor, Optional[Tensor], Optional[Tensor]]:
|
| 312 |
+
return native_batch_norm_backward(
|
| 313 |
+
grad_out,
|
| 314 |
+
input,
|
| 315 |
+
weight,
|
| 316 |
+
running_mean,
|
| 317 |
+
running_var,
|
| 318 |
+
save_mean,
|
| 319 |
+
save_var,
|
| 320 |
+
update,
|
| 321 |
+
eps,
|
| 322 |
+
output_mask,
|
| 323 |
+
)
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
_register_jit_decomposition_for_jvp(torch.ops.aten.trace.default, use_python=True)
|
| 327 |
+
_register_jit_decomposition_for_jvp(torch.ops.aten.nll_loss_backward.default)
|
| 328 |
+
_register_jit_decomposition_for_jvp(torch.ops.aten.nll_loss2d_backward.default)
|
| 329 |
+
_register_jit_decomposition_for_jvp(torch.ops.aten._log_softmax_backward_data.default)
|
| 330 |
+
_register_jit_decomposition_for_jvp(torch.ops.aten._softmax_backward_data.default)
|
| 331 |
+
_register_jit_decomposition_for_jvp(torch.ops.aten.log_sigmoid_forward.default)
|
| 332 |
+
_register_jit_decomposition_for_jvp(torch.ops.aten.native_layer_norm_backward.default)
|
| 333 |
+
_register_jit_decomposition_for_jvp(torch.ops.aten.native_batch_norm_backward.default)
|
| 334 |
+
_register_jit_decomposition_for_jvp(torch.ops.aten.cudnn_batch_norm_backward.default)
|
| 335 |
+
_register_jit_decomposition_for_jvp(torch.ops.aten.batch_norm_backward.default)
|
| 336 |
+
_register_jit_decomposition_for_jvp(torch.ops.aten.miopen_batch_norm_backward.default)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_decomp/decompositions_for_rng.py
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-decorators
|
| 2 |
+
# mypy: allow-untyped-defs
|
| 3 |
+
import functools
|
| 4 |
+
from collections import defaultdict
|
| 5 |
+
from collections.abc import Callable
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
import torch._decomp as decomp
|
| 9 |
+
from torch._decomp import get_decompositions
|
| 10 |
+
from torch._ops import OpOverload
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
aten = torch.ops.aten
|
| 14 |
+
|
| 15 |
+
rng_decompositions: dict[str, dict[OpOverload, Callable]] = defaultdict(dict)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def register_rng_decomposition(aten_op):
|
| 19 |
+
return decomp.register_decomposition(aten_op, rng_decompositions)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def throw_on_non_cuda(device):
|
| 23 |
+
raise RuntimeError(
|
| 24 |
+
f"You are trying to functionalize a {device.type} RNG operator but {device.type} does not "
|
| 25 |
+
f"use Philox/counter-based RNG. Therefore, functionalizing a {device.type} RNG operator is "
|
| 26 |
+
"not supported. We are discussing the possibility of a Philox-based RNG implementation for CPU."
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# TODO - We have to register many more distributions here, and also higher level
|
| 31 |
+
# ops like dropout which have fused implementation and can hide the rand inside.
|
| 32 |
+
@register_rng_decomposition(aten.rand)
|
| 33 |
+
def rand(shape, dtype=None, layout=torch.strided, device=None, pin_memory=False):
|
| 34 |
+
if device and device.type != "cuda":
|
| 35 |
+
throw_on_non_cuda(device)
|
| 36 |
+
seed, offset = PhiloxStateTracker.get_state_as_tuple()
|
| 37 |
+
dtype = dtype or torch.float32
|
| 38 |
+
out, offset_jump = torch.ops.rngprims.philox_rand(
|
| 39 |
+
shape, seed, offset, None, device, dtype
|
| 40 |
+
)
|
| 41 |
+
PhiloxStateTracker.advance_offset(offset_jump)
|
| 42 |
+
return out
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@register_rng_decomposition(aten.rand_like)
|
| 46 |
+
def rand_like(
|
| 47 |
+
x: torch.Tensor,
|
| 48 |
+
dtype=None,
|
| 49 |
+
layout=None,
|
| 50 |
+
device=None,
|
| 51 |
+
pin_memory=False,
|
| 52 |
+
memory_format=torch.preserve_format,
|
| 53 |
+
):
|
| 54 |
+
device = device or x.device
|
| 55 |
+
if device.type != "cuda":
|
| 56 |
+
throw_on_non_cuda(device)
|
| 57 |
+
dtype = dtype or x.dtype
|
| 58 |
+
seed, offset = PhiloxStateTracker.get_state_as_tuple()
|
| 59 |
+
out, offset_jump = torch.ops.rngprims.philox_rand(
|
| 60 |
+
x.shape, seed, offset, None, device, dtype
|
| 61 |
+
)
|
| 62 |
+
PhiloxStateTracker.advance_offset(offset_jump)
|
| 63 |
+
return out
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
class PhiloxState:
|
| 67 |
+
"""
|
| 68 |
+
Represents a PhiloxRngState - (seed, offset) where offset = base_offset +
|
| 69 |
+
relative_offset. seed and base_offset basically point to the rng state just
|
| 70 |
+
before tracing starts. relative offset tracks the totally consumed offset at
|
| 71 |
+
trace time.
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
def __init__(self) -> None:
|
| 75 |
+
self.reset()
|
| 76 |
+
|
| 77 |
+
def reset(self):
|
| 78 |
+
self.seed = torch.tensor(())
|
| 79 |
+
self.base_offset = torch.tensor(())
|
| 80 |
+
self.relative_offset = 0
|
| 81 |
+
self.offset_advanced_alteast_once = False
|
| 82 |
+
|
| 83 |
+
def validate_state(self):
|
| 84 |
+
assert self.seed.numel() != 0 and self.base_offset.numel() != 0
|
| 85 |
+
|
| 86 |
+
def advance_offset(self, consumed_offset):
|
| 87 |
+
self.offset_advanced_alteast_once = True
|
| 88 |
+
self.relative_offset = self.relative_offset + consumed_offset
|
| 89 |
+
|
| 90 |
+
def set_state(self, seed, base_offset, relative_offset=0):
|
| 91 |
+
self.seed = seed
|
| 92 |
+
self.base_offset = base_offset
|
| 93 |
+
self.relative_offset = relative_offset
|
| 94 |
+
|
| 95 |
+
def get_state_as_tuple(self):
|
| 96 |
+
self.validate_state()
|
| 97 |
+
return (self.seed, self.base_offset + self.relative_offset)
|
| 98 |
+
|
| 99 |
+
def get_state_as_tensor(self):
|
| 100 |
+
# Only needed because we override get_rng_state.
|
| 101 |
+
self.validate_state()
|
| 102 |
+
return torch.stack([self.seed, self.base_offset + self.relative_offset])
|
| 103 |
+
|
| 104 |
+
def set_state_from_tensor(self, state):
|
| 105 |
+
# Only needed because we override set_rng_state.
|
| 106 |
+
self.seed, self.base_offset = torch.unbind(state)
|
| 107 |
+
self.relative_offset = 0
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
class PhiloxStateTracker:
|
| 111 |
+
"""
|
| 112 |
+
Singleton class to track the philox rng state during AOT Autograd tracing.
|
| 113 |
+
For each aot tracing instance, AOT Autograd resets this tracker and keeps
|
| 114 |
+
track of both forward and backward offsets. At runtime, we only care about
|
| 115 |
+
the total consumed forward and backward offsets. For dynamic shapes, these
|
| 116 |
+
offsets are a function of input shapes. Therefore, the AOT generated graphs
|
| 117 |
+
have additional outputs that compute total consumed forward and backward
|
| 118 |
+
offsets.
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
running_state: PhiloxState
|
| 122 |
+
fwd_state: PhiloxState
|
| 123 |
+
bwd_state: PhiloxState
|
| 124 |
+
|
| 125 |
+
def __enter__(self):
|
| 126 |
+
PhiloxStateTracker.reset()
|
| 127 |
+
return self
|
| 128 |
+
|
| 129 |
+
def __exit__(self, exc_type, exc_cal, exc_tb):
|
| 130 |
+
PhiloxStateTracker.reset()
|
| 131 |
+
|
| 132 |
+
@classmethod
|
| 133 |
+
def reset(cls):
|
| 134 |
+
cls.running_state = PhiloxState()
|
| 135 |
+
cls.fwd_state = PhiloxState()
|
| 136 |
+
cls.bwd_state = PhiloxState()
|
| 137 |
+
|
| 138 |
+
@classmethod
|
| 139 |
+
def mark_beginning_of_forward(cls):
|
| 140 |
+
# Tells the tracker to use fwd_state as the running state
|
| 141 |
+
cls.running_state = cls.fwd_state
|
| 142 |
+
|
| 143 |
+
@classmethod
|
| 144 |
+
def mark_beginning_of_backward(cls):
|
| 145 |
+
# Tells the tracker to use bwd_state as the running state
|
| 146 |
+
cls.running_state = cls.bwd_state
|
| 147 |
+
|
| 148 |
+
@classmethod
|
| 149 |
+
def record_state(cls, seed, offset, mode):
|
| 150 |
+
# Records the seed and offset tensors. These tensors are used to invoke
|
| 151 |
+
# the philox_rand functional primitives.
|
| 152 |
+
if mode == "forward":
|
| 153 |
+
cls.fwd_state.set_state(seed, offset)
|
| 154 |
+
cls.mark_beginning_of_forward()
|
| 155 |
+
else:
|
| 156 |
+
assert mode == "backward"
|
| 157 |
+
cls.bwd_state.set_state(seed, offset)
|
| 158 |
+
|
| 159 |
+
@classmethod
|
| 160 |
+
def get_state_as_tensor(cls):
|
| 161 |
+
# The only reason this exists is because we override get_rng_state and
|
| 162 |
+
# set_rng_state during tracing. get_rng_state expects a tensor output,
|
| 163 |
+
# so return (seed, offset) tuple upset other parts of the program like
|
| 164 |
+
# ctx.saved_tensors.
|
| 165 |
+
|
| 166 |
+
# A bad consequence is that if user saves and restores rng state, we
|
| 167 |
+
# have little bit of ugliness in the generated code, where we first
|
| 168 |
+
# concat the (seed, offset) to create a tensor for get_rng_state, and
|
| 169 |
+
# then split it back to get (seed, offset) tuple in set_rng_state.
|
| 170 |
+
|
| 171 |
+
# TODO: Investigate if there is be a better way to wrap the tuple in a
|
| 172 |
+
# false Tensor object, and then desugar it later on.
|
| 173 |
+
return cls.running_state.get_state_as_tensor()
|
| 174 |
+
|
| 175 |
+
@classmethod
|
| 176 |
+
def get_state_as_tuple(cls):
|
| 177 |
+
return cls.running_state.get_state_as_tuple()
|
| 178 |
+
|
| 179 |
+
@classmethod
|
| 180 |
+
def set_state_from_tensor(cls, x):
|
| 181 |
+
# This is only needed because we override set_rng_state. Look at the
|
| 182 |
+
# comment in get_state_from_tensor method.
|
| 183 |
+
cls.running_state.set_state_from_tensor(x)
|
| 184 |
+
|
| 185 |
+
@classmethod
|
| 186 |
+
def advance_offset(cls, consumed_offset):
|
| 187 |
+
cls.running_state.advance_offset(consumed_offset)
|
| 188 |
+
|
| 189 |
+
@classmethod
|
| 190 |
+
def get_current_relative_offset(cls):
|
| 191 |
+
return cls.running_state.relative_offset
|
| 192 |
+
|
| 193 |
+
@staticmethod
|
| 194 |
+
def multiple_of_4(offset):
|
| 195 |
+
# torch cuda rng state offset must be a multiple of 4. For inductor, as
|
| 196 |
+
# we sum up all the numel, the result might not be a multiple of 4. This
|
| 197 |
+
# method achieves that.
|
| 198 |
+
return (offset + 3) // 4 * 4
|
| 199 |
+
|
| 200 |
+
@classmethod
|
| 201 |
+
def get_updated_fwd_offset(cls):
|
| 202 |
+
# Short circuit if no rand ops were observed
|
| 203 |
+
if not cls.fwd_state.offset_advanced_alteast_once:
|
| 204 |
+
return cls.fwd_state.base_offset
|
| 205 |
+
return cls.multiple_of_4(
|
| 206 |
+
cls.fwd_state.base_offset + cls.fwd_state.relative_offset
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
@classmethod
|
| 210 |
+
def get_updated_bwd_offset(cls):
|
| 211 |
+
# Short circuit if no rand ops were observed
|
| 212 |
+
if not cls.bwd_state.offset_advanced_alteast_once:
|
| 213 |
+
return cls.bwd_state.base_offset
|
| 214 |
+
return cls.multiple_of_4(
|
| 215 |
+
cls.bwd_state.base_offset + cls.bwd_state.relative_offset
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
# Adding more decompositions which eventually use rand_like inside decomps.
|
| 220 |
+
# Adding these in rng_decompositions ensures the functionalization of rand_like
|
| 221 |
+
# ops used in these decomps. The list is copied from inductor codebase, which
|
| 222 |
+
# uses it for similar purpose.
|
| 223 |
+
#
|
| 224 |
+
# Caution - These decomps do not have same accuracy as that of eager. However,
|
| 225 |
+
# we can't just disable them with a config flag like fallback_random, because
|
| 226 |
+
# for functionalization of rng ops, we have to decompose these ops.
|
| 227 |
+
extra_random_decomps = get_decompositions(
|
| 228 |
+
[
|
| 229 |
+
aten.cauchy,
|
| 230 |
+
aten.cauchy_,
|
| 231 |
+
aten.exponential,
|
| 232 |
+
aten.exponential_,
|
| 233 |
+
aten.geometric,
|
| 234 |
+
aten.geometric_,
|
| 235 |
+
aten.native_dropout,
|
| 236 |
+
aten.normal,
|
| 237 |
+
aten.normal_,
|
| 238 |
+
aten.normal_functional,
|
| 239 |
+
aten.log_normal,
|
| 240 |
+
aten.log_normal_,
|
| 241 |
+
aten.rrelu_with_noise,
|
| 242 |
+
aten.rrelu_with_noise_,
|
| 243 |
+
aten.uniform_,
|
| 244 |
+
]
|
| 245 |
+
)
|
| 246 |
+
register_extra_random_decomp = functools.partial(
|
| 247 |
+
decomp.register_decomposition, registry=extra_random_decomps
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
@register_extra_random_decomp([aten.bernoulli_])
|
| 252 |
+
def bernoulli_(self, p=0.5):
|
| 253 |
+
if self.device == torch.device("cpu"):
|
| 254 |
+
return NotImplemented
|
| 255 |
+
return self.copy_(torch.rand_like(self, dtype=torch.float32) < p)
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
@register_extra_random_decomp([aten.bernoulli.p])
|
| 259 |
+
def bernoulli_p(self, p=0.5, *, generator=None):
|
| 260 |
+
if self.device == torch.device("cpu"):
|
| 261 |
+
return NotImplemented
|
| 262 |
+
assert generator is None
|
| 263 |
+
return torch.rand_like(self, dtype=torch.float32) < p
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
rng_decompositions.update(extra_random_decomps) # type: ignore[arg-type]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_dispatch/__init__.py
ADDED
|
File without changes
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_dispatch/python.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import itertools
|
| 3 |
+
import unittest.mock
|
| 4 |
+
from collections.abc import Callable, Iterator
|
| 5 |
+
from contextlib import contextmanager
|
| 6 |
+
from typing import TypeVar, Union
|
| 7 |
+
from typing_extensions import ParamSpec
|
| 8 |
+
|
| 9 |
+
import torch
|
| 10 |
+
import torch._C
|
| 11 |
+
import torch._ops
|
| 12 |
+
import torch.utils._python_dispatch
|
| 13 |
+
import torch.utils._pytree as pytree
|
| 14 |
+
from torch._C import DispatchKey
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
__all__ = ["enable_python_dispatcher", "no_python_dispatcher", "enable_pre_dispatch"]
|
| 18 |
+
|
| 19 |
+
no_python_dispatcher = torch._C._DisablePythonDispatcher
|
| 20 |
+
enable_python_dispatcher = torch._C._EnablePythonDispatcher
|
| 21 |
+
enable_pre_dispatch = torch._C._EnablePreDispatch
|
| 22 |
+
|
| 23 |
+
CROSSREF_FUNCTIONALIZE = False
|
| 24 |
+
|
| 25 |
+
_P = ParamSpec("_P")
|
| 26 |
+
_T = TypeVar("_T")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def all_py_loaded_overloads() -> Iterator[torch._ops.OpOverload]:
|
| 30 |
+
"""
|
| 31 |
+
Warning: the set of overloads this will report is very subtle. It is precisely
|
| 32 |
+
the set of torch.ops functions that have actually been accessed from Python
|
| 33 |
+
(e.g., we actually called torch.ops.aten.blah at some point. This is DIFFERENT
|
| 34 |
+
from the set of registered operators, which will in general be a larger set,
|
| 35 |
+
as this would include all operators which we ran C++ static initializers or
|
| 36 |
+
Python operator registration on. This does not eagerly populate the list on
|
| 37 |
+
torch.ops.aten; this list is lazy!
|
| 38 |
+
|
| 39 |
+
In other words, this is good for traversing over everything that has an
|
| 40 |
+
OpOverload object allocated in Python. We use it for cache invalidation, but
|
| 41 |
+
don't rely on this list being complete.
|
| 42 |
+
|
| 43 |
+
Note that even if we did report all C++ registered overloads, this isn't guaranteed
|
| 44 |
+
to be complete either, as a subsequent lazy load of a library which triggers more
|
| 45 |
+
registrations could add more things to the set.
|
| 46 |
+
"""
|
| 47 |
+
for ns in torch.ops:
|
| 48 |
+
packets = getattr(torch.ops, ns)
|
| 49 |
+
for op_name in packets:
|
| 50 |
+
packet = getattr(packets, op_name)
|
| 51 |
+
for overload in packet:
|
| 52 |
+
yield getattr(packet, overload)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
@contextmanager
|
| 56 |
+
def suspend_functionalization():
|
| 57 |
+
f_tls = torch._C._dispatch_tls_is_dispatch_key_included(
|
| 58 |
+
torch._C.DispatchKey.Functionalize
|
| 59 |
+
)
|
| 60 |
+
f_rv = torch._C._functionalization_reapply_views_tls()
|
| 61 |
+
if f_tls:
|
| 62 |
+
torch._disable_functionalization()
|
| 63 |
+
try:
|
| 64 |
+
yield
|
| 65 |
+
finally:
|
| 66 |
+
if f_tls:
|
| 67 |
+
torch._enable_functionalization(reapply_views=f_rv)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def check_tensor_metadata_matches(nv, rv, desc):
|
| 71 |
+
assert callable(desc)
|
| 72 |
+
assert nv.size() == rv.size(), f"{desc()}: sizes {nv.size()} != {rv.size()}"
|
| 73 |
+
assert nv.dtype == rv.dtype, f"{desc()}: dtype {nv.dtype} != {rv.dtype}"
|
| 74 |
+
same_strides, idx = torch._prims_common.check_significant_strides(
|
| 75 |
+
nv, rv, only_cuda=False
|
| 76 |
+
)
|
| 77 |
+
assert same_strides, (
|
| 78 |
+
f"{desc()}: strides {nv.stride()} != {rv.stride()} (mismatch at index {idx})"
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def check_metadata_matches(n, r, desc):
|
| 83 |
+
assert callable(desc)
|
| 84 |
+
n_vals, _n_spec = pytree.tree_flatten(n)
|
| 85 |
+
r_vals, _r_spec = pytree.tree_flatten(r)
|
| 86 |
+
# TODO: test the specs match; empirically sometimes we have a tuple
|
| 87 |
+
# on one side and a list on the other
|
| 88 |
+
assert len(n_vals) == len(r_vals), f"{len(n_vals)} != {len(r_vals)}"
|
| 89 |
+
for i, nv, rv in zip(range(len(n_vals)), n_vals, r_vals):
|
| 90 |
+
if not isinstance(rv, torch.Tensor):
|
| 91 |
+
continue
|
| 92 |
+
check_tensor_metadata_matches(nv, rv, lambda: f"{desc()} output {i}")
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
class Lit:
|
| 96 |
+
def __init__(self, s):
|
| 97 |
+
self.s = s
|
| 98 |
+
|
| 99 |
+
def __repr__(self):
|
| 100 |
+
return self.s
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def _fmt(a: object) -> object:
|
| 104 |
+
if isinstance(a, torch.Tensor):
|
| 105 |
+
return Lit(
|
| 106 |
+
f"torch.empty_strided({tuple(a.size())}, {a.stride()}, dtype={a.dtype})"
|
| 107 |
+
)
|
| 108 |
+
else:
|
| 109 |
+
return a
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def make_crossref_functionalize(
|
| 113 |
+
op: torch._ops.OpOverload[_P, _T], final_key: DispatchKey
|
| 114 |
+
) -> Union[Callable[_P, _T], DispatchKey]:
|
| 115 |
+
from torch._subclasses.fake_tensor import FakeTensorMode
|
| 116 |
+
|
| 117 |
+
# This case is pretty weird, suppress it for now
|
| 118 |
+
if op is torch.ops.aten.lift_fresh.default:
|
| 119 |
+
return final_key
|
| 120 |
+
|
| 121 |
+
def handler(*args: _P.args, **kwargs: _P.kwargs) -> _T:
|
| 122 |
+
fake_mode = FakeTensorMode()
|
| 123 |
+
|
| 124 |
+
def fakeify_defun(t):
|
| 125 |
+
if isinstance(t, torch.Tensor):
|
| 126 |
+
if torch._is_functional_tensor(t):
|
| 127 |
+
r = torch._from_functional_tensor(t)
|
| 128 |
+
# NB: This assumes that the inner tensor sizes/strides match
|
| 129 |
+
# the outer tensor sizes/strides. This doesn't necessarily have to
|
| 130 |
+
# be the case, see discussion at
|
| 131 |
+
# https://github.com/pytorch/pytorch/pull/87610/files/401ddeda1d769bedc88a12de332c7357b60e51a4#r1007264456
|
| 132 |
+
assert t.size() == r.size()
|
| 133 |
+
assert t.stride() == r.stride()
|
| 134 |
+
else:
|
| 135 |
+
r = t
|
| 136 |
+
# TODO: suppress guards
|
| 137 |
+
return fake_mode.from_tensor(r)
|
| 138 |
+
return t
|
| 139 |
+
|
| 140 |
+
def maybe_detach(t):
|
| 141 |
+
if isinstance(t, torch.Tensor):
|
| 142 |
+
return t.detach()
|
| 143 |
+
else:
|
| 144 |
+
return t
|
| 145 |
+
|
| 146 |
+
# TODO: This probably does the wrong thing if you're running other
|
| 147 |
+
# substantive modes with the normal op outside here
|
| 148 |
+
with (
|
| 149 |
+
torch.utils._python_dispatch._disable_current_modes(),
|
| 150 |
+
suspend_functionalization(),
|
| 151 |
+
):
|
| 152 |
+
f_args, f_kwargs = pytree.tree_map(fakeify_defun, (args, kwargs))
|
| 153 |
+
orig_f_args, orig_f_kwargs = pytree.tree_map(
|
| 154 |
+
maybe_detach, (f_args, f_kwargs)
|
| 155 |
+
)
|
| 156 |
+
with fake_mode:
|
| 157 |
+
f_r = op(*f_args, **f_kwargs) # pyrefly: ignore [invalid-param-spec]
|
| 158 |
+
r = op._op_dk(final_key, *args, **kwargs)
|
| 159 |
+
|
| 160 |
+
def desc():
|
| 161 |
+
fmt_args = ", ".join(
|
| 162 |
+
itertools.chain(
|
| 163 |
+
(repr(pytree.tree_map(_fmt, a)) for a in orig_f_args),
|
| 164 |
+
(
|
| 165 |
+
f"{k}={pytree.tree_map(_fmt, v)}"
|
| 166 |
+
for k, v in orig_f_kwargs.items()
|
| 167 |
+
),
|
| 168 |
+
)
|
| 169 |
+
)
|
| 170 |
+
return f"{op}({fmt_args})"
|
| 171 |
+
|
| 172 |
+
check_metadata_matches(f_r, r, desc)
|
| 173 |
+
return r
|
| 174 |
+
|
| 175 |
+
return handler
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
# NB: enabling this is slow, don't do it in a hot loop. This is purely
|
| 179 |
+
# for debugging purposes.
|
| 180 |
+
@contextmanager
|
| 181 |
+
def enable_crossref_functionalize():
|
| 182 |
+
for op in all_py_loaded_overloads():
|
| 183 |
+
op._uncache_dispatch(torch._C.DispatchKey.Functionalize)
|
| 184 |
+
try:
|
| 185 |
+
with (
|
| 186 |
+
enable_python_dispatcher(),
|
| 187 |
+
unittest.mock.patch("torch._dispatch.python.CROSSREF_FUNCTIONALIZE", True),
|
| 188 |
+
):
|
| 189 |
+
yield
|
| 190 |
+
finally:
|
| 191 |
+
for op in all_py_loaded_overloads():
|
| 192 |
+
op._uncache_dispatch(torch._C.DispatchKey.Functionalize)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/config.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration module for torch.export.export.
|
| 3 |
+
|
| 4 |
+
This module contains various configuration flags and settings that control torch.export's
|
| 5 |
+
behavior, including:
|
| 6 |
+
- Runtime behavior flags
|
| 7 |
+
- Debugging and development options
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import sys
|
| 11 |
+
from typing import Any, TYPE_CHECKING
|
| 12 |
+
|
| 13 |
+
from torch._environment import is_fbcode
|
| 14 |
+
from torch.utils._config_module import install_config_module
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# this flag controls whether we use new functional tracer. It
|
| 18 |
+
# should be True in the long term.
|
| 19 |
+
use_new_tracer_experimental = True
|
| 20 |
+
|
| 21 |
+
# this flag is used to control whether we want to instrument
|
| 22 |
+
# fake tensor creation to track potential leaks. It is off
|
| 23 |
+
# by default, but user can turn it on to debug leaks.
|
| 24 |
+
detect_non_strict_fake_tensor_leaks = False
|
| 25 |
+
|
| 26 |
+
# error on potentially pre-dispatch/non-strict tracing limitation
|
| 27 |
+
# this type of error usually happens when we encounter an op
|
| 28 |
+
# that we don't know how to proxy, resulting in untracked fake tensors
|
| 29 |
+
error_on_lifted_constant_tensors = True
|
| 30 |
+
|
| 31 |
+
# enable auto_functionalized_v2 in export
|
| 32 |
+
# We turn this off in fbcode due to downstream users not
|
| 33 |
+
# being ready to handle auto_functionalized_v2.
|
| 34 |
+
enable_auto_functionalized_v2_for_export = not is_fbcode()
|
| 35 |
+
|
| 36 |
+
use_legacy_dynamo_graph_capture = True
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
if TYPE_CHECKING:
|
| 40 |
+
from torch.utils._config_typing import * # noqa: F401, F403
|
| 41 |
+
|
| 42 |
+
def _make_closure_patcher(**changes: Any) -> Any: ...
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
install_config_module(sys.modules[__name__])
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/error.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from enum import Enum
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class ExportErrorType(Enum):
|
| 5 |
+
# User providing invalid inputs to either tracer, or other public facing APIs
|
| 6 |
+
INVALID_INPUT_TYPE = 1
|
| 7 |
+
|
| 8 |
+
# User returning values from their models that we don't support.
|
| 9 |
+
INVALID_OUTPUT_TYPE = 2
|
| 10 |
+
|
| 11 |
+
# Generated IR does not conform to Export IR Specification.
|
| 12 |
+
VIOLATION_OF_SPEC = 3
|
| 13 |
+
|
| 14 |
+
# User's code contains types and functionalities we don't support.
|
| 15 |
+
NOT_SUPPORTED = 4
|
| 16 |
+
|
| 17 |
+
# User's code didn't provide necessary details for us to successfully trace and export.
|
| 18 |
+
# For example, we use a lot of decorators and ask users to annotate their model.
|
| 19 |
+
MISSING_PROPERTY = 5
|
| 20 |
+
|
| 21 |
+
# User is using an API without proper initialization step.
|
| 22 |
+
UNINITIALIZED = 6
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def internal_assert(pred: bool, assert_msg: str) -> None:
|
| 26 |
+
"""
|
| 27 |
+
This is exir's custom assert method. It internally just throws InternalError.
|
| 28 |
+
Note that the sole purpose is to throw our own error while maintaining similar syntax
|
| 29 |
+
as python assert.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
if not pred:
|
| 33 |
+
raise InternalError(assert_msg)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class InternalError(Exception):
|
| 37 |
+
"""
|
| 38 |
+
Raised when an internal invariance is violated in EXIR stack.
|
| 39 |
+
Should hint users to report a bug to dev and expose the original
|
| 40 |
+
error message.
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
def __init__(self, message: str) -> None:
|
| 44 |
+
super().__init__(message)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class ExportError(Exception):
|
| 48 |
+
"""
|
| 49 |
+
This type of exception is raised for errors that are directly caused by the user
|
| 50 |
+
code. In general, user errors happen during model authoring, tracing, using our public
|
| 51 |
+
facing APIs, and writing graph passes.
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
def __init__(self, error_code: ExportErrorType, message: str) -> None:
|
| 55 |
+
prefix = f"[{error_code}]: "
|
| 56 |
+
super().__init__(prefix + message)
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_export/verifier.py
ADDED
|
@@ -0,0 +1,531 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
import inspect
|
| 3 |
+
import math
|
| 4 |
+
import operator
|
| 5 |
+
from collections.abc import Iterable
|
| 6 |
+
from typing import Any, final, TYPE_CHECKING
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
from torch._library.opaque_object import is_opaque_type
|
| 10 |
+
from torch._ops import HigherOrderOperator, OpOverload
|
| 11 |
+
from torch._subclasses.fake_tensor import FakeTensor
|
| 12 |
+
from torch.export.graph_signature import (
|
| 13 |
+
CustomObjArgument,
|
| 14 |
+
InputKind,
|
| 15 |
+
SymBoolArgument,
|
| 16 |
+
SymFloatArgument,
|
| 17 |
+
SymIntArgument,
|
| 18 |
+
TensorArgument,
|
| 19 |
+
TokenArgument,
|
| 20 |
+
)
|
| 21 |
+
from torch.fx import GraphModule
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
if TYPE_CHECKING:
|
| 25 |
+
from torch.export.exported_program import ExportedProgram
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class SpecViolationError(Exception):
|
| 29 |
+
pass
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def is_functional(op: OpOverload) -> bool:
|
| 33 |
+
return not op._schema.is_mutable
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def _check_has_fake_tensor(node: torch.fx.Node) -> None:
|
| 37 |
+
# TODO(angelayi): remove this in favor of _check_val
|
| 38 |
+
return _check_val(node)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _check_val(node: torch.fx.Node) -> None:
|
| 42 |
+
from torch.fx.experimental.symbolic_shapes import SymBool, SymFloat, SymInt
|
| 43 |
+
|
| 44 |
+
def _check_correct_val(val):
|
| 45 |
+
if val is None:
|
| 46 |
+
return True
|
| 47 |
+
elif isinstance(val, (int, bool, str, float)):
|
| 48 |
+
return True
|
| 49 |
+
elif isinstance(
|
| 50 |
+
val, (torch.memory_format, torch.dtype, torch.device, torch.layout)
|
| 51 |
+
):
|
| 52 |
+
return True
|
| 53 |
+
elif isinstance(
|
| 54 |
+
val, (FakeTensor, torch.Tensor)
|
| 55 |
+
): # TODO(zhxchen17) Remove Tensor.
|
| 56 |
+
return True
|
| 57 |
+
elif isinstance(val, (SymInt, SymFloat, SymBool)):
|
| 58 |
+
return True
|
| 59 |
+
elif isinstance(val, CustomObjArgument):
|
| 60 |
+
return True
|
| 61 |
+
elif isinstance(val, Iterable):
|
| 62 |
+
return all(_check_correct_val(x) for x in val)
|
| 63 |
+
elif is_opaque_type(type(val)):
|
| 64 |
+
return True
|
| 65 |
+
return False
|
| 66 |
+
|
| 67 |
+
def _no_returns(op):
|
| 68 |
+
if not isinstance(op, OpOverload):
|
| 69 |
+
return False
|
| 70 |
+
return len(op._schema.returns) == 0
|
| 71 |
+
|
| 72 |
+
if "val" not in node.meta:
|
| 73 |
+
if node.op == "call_function" and _no_returns(node.target):
|
| 74 |
+
return
|
| 75 |
+
raise SpecViolationError(f"Node.meta {node.name} is missing val field.")
|
| 76 |
+
|
| 77 |
+
val = node.meta["val"]
|
| 78 |
+
if not _check_correct_val(val):
|
| 79 |
+
raise SpecViolationError(f"Node.meta {node.name} has invalid val field {val}")
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def _check_torch_fn(node: torch.fx.Node) -> None:
|
| 83 |
+
torch_fn = node.meta.get("torch_fn")
|
| 84 |
+
if torch_fn is None:
|
| 85 |
+
raise SpecViolationError(
|
| 86 |
+
f"Unable to find torch_fn metadata for node {node.name}"
|
| 87 |
+
)
|
| 88 |
+
if (
|
| 89 |
+
not isinstance(torch_fn, tuple)
|
| 90 |
+
and isinstance(torch_fn[0], str)
|
| 91 |
+
and isinstance(torch_fn[1], str)
|
| 92 |
+
):
|
| 93 |
+
raise SpecViolationError(
|
| 94 |
+
f"Node.meta {node.name} has invalid torch_fn field {torch_fn}"
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
class _VerifierMeta(type):
|
| 99 |
+
_registry: dict[str, type["Verifier"]] = {}
|
| 100 |
+
|
| 101 |
+
def __new__(metacls, name, bases, attrs):
|
| 102 |
+
if bases:
|
| 103 |
+
if "check" in attrs or "_check_graph_module" in attrs:
|
| 104 |
+
raise SyntaxError("Overriding method check is not allowed.")
|
| 105 |
+
assert "dialect" in attrs and attrs["dialect"] != "ATEN"
|
| 106 |
+
else:
|
| 107 |
+
assert "check" in attrs
|
| 108 |
+
assert "_check_graph_module" in attrs
|
| 109 |
+
assert attrs["dialect"] == "ATEN"
|
| 110 |
+
|
| 111 |
+
assert isinstance(attrs["dialect"], str)
|
| 112 |
+
ret = type.__new__(metacls, name, bases, attrs)
|
| 113 |
+
metacls._registry[attrs["dialect"]] = ret # type: ignore[assignment]
|
| 114 |
+
return ret
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def getattr_recursive(obj: Any, target: str) -> Any:
|
| 118 |
+
target_atoms = target.split(".")
|
| 119 |
+
attr_itr = obj
|
| 120 |
+
for i, atom in enumerate(target_atoms):
|
| 121 |
+
if not hasattr(attr_itr, atom):
|
| 122 |
+
raise RuntimeError(
|
| 123 |
+
f"Node referenced nonexistent target {'.'.join(target_atoms[:i])}"
|
| 124 |
+
)
|
| 125 |
+
attr_itr = getattr(attr_itr, atom)
|
| 126 |
+
return attr_itr
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
class Verifier(metaclass=_VerifierMeta):
|
| 130 |
+
dialect = "ATEN"
|
| 131 |
+
|
| 132 |
+
def allowed_builtin_ops(self) -> list:
|
| 133 |
+
return [
|
| 134 |
+
operator.getitem,
|
| 135 |
+
operator.add,
|
| 136 |
+
operator.mul,
|
| 137 |
+
operator.sub,
|
| 138 |
+
operator.truediv,
|
| 139 |
+
operator.ge,
|
| 140 |
+
operator.le,
|
| 141 |
+
operator.gt,
|
| 142 |
+
operator.lt,
|
| 143 |
+
operator.eq,
|
| 144 |
+
operator.ne,
|
| 145 |
+
operator.floordiv,
|
| 146 |
+
operator.mod,
|
| 147 |
+
operator.and_,
|
| 148 |
+
operator.or_,
|
| 149 |
+
operator.not_,
|
| 150 |
+
operator.pow,
|
| 151 |
+
operator.neg,
|
| 152 |
+
operator.abs,
|
| 153 |
+
operator.lshift,
|
| 154 |
+
operator.rshift,
|
| 155 |
+
math.ceil,
|
| 156 |
+
math.floor,
|
| 157 |
+
math.trunc,
|
| 158 |
+
round,
|
| 159 |
+
]
|
| 160 |
+
|
| 161 |
+
def allowed_op_types(self) -> tuple[type[Any], ...]:
|
| 162 |
+
return (OpOverload, HigherOrderOperator)
|
| 163 |
+
|
| 164 |
+
def allowed_getattr_types(self) -> tuple[type[Any], ...]:
|
| 165 |
+
return (torch.fx.GraphModule, torch.utils._pytree.TreeSpec)
|
| 166 |
+
|
| 167 |
+
def allowed_getattr_types_for_subgm(self) -> tuple[type[Any], ...]:
|
| 168 |
+
# subgm in HOP's argument could has have getattr(weight) nodes, thus stateful
|
| 169 |
+
return (
|
| 170 |
+
torch.fx.GraphModule,
|
| 171 |
+
torch.nn.parameter.Parameter,
|
| 172 |
+
torch.Tensor, # for buffer and constant tensor
|
| 173 |
+
torch.utils._pytree.TreeSpec,
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
def check_valid_op(self, op):
|
| 177 |
+
pass
|
| 178 |
+
|
| 179 |
+
def check_additional(self, gm: GraphModule) -> None:
|
| 180 |
+
"""
|
| 181 |
+
Additional checks that are specific to some dialects.
|
| 182 |
+
"""
|
| 183 |
+
|
| 184 |
+
@final
|
| 185 |
+
def check(self, ep: "ExportedProgram") -> None:
|
| 186 |
+
self._check_graph_module(ep.graph_module)
|
| 187 |
+
_verify_exported_program_module_call_graph(ep)
|
| 188 |
+
_verify_exported_program_signature(ep)
|
| 189 |
+
|
| 190 |
+
@final
|
| 191 |
+
def _check_graph_module(self, gm: torch.fx.GraphModule) -> None:
|
| 192 |
+
def _allowed_getattr_types(is_toplevel_gm) -> tuple[type[Any], ...]:
|
| 193 |
+
if is_toplevel_gm:
|
| 194 |
+
ret = self.allowed_getattr_types()
|
| 195 |
+
else:
|
| 196 |
+
ret = self.allowed_getattr_types_for_subgm()
|
| 197 |
+
assert not any(t is object for t in ret)
|
| 198 |
+
return ret
|
| 199 |
+
|
| 200 |
+
def _check_valid_op(op) -> None:
|
| 201 |
+
def _allowed_builtin_ops() -> list:
|
| 202 |
+
ret = self.allowed_builtin_ops()
|
| 203 |
+
assert all(inspect.isbuiltin(op) for op in ret)
|
| 204 |
+
return ret
|
| 205 |
+
|
| 206 |
+
def _allowed_op_types() -> tuple[type[Any], ...]:
|
| 207 |
+
ret = self.allowed_op_types()
|
| 208 |
+
assert not any(t is object for t in ret)
|
| 209 |
+
return ret
|
| 210 |
+
|
| 211 |
+
# TODO Remove this allowlist.
|
| 212 |
+
_allowed_torch_functions = (
|
| 213 |
+
torch.autograd.grad_mode.set_grad_enabled,
|
| 214 |
+
torch.sym_int,
|
| 215 |
+
torch.sym_float,
|
| 216 |
+
torch.sym_ite,
|
| 217 |
+
torch.sym_max,
|
| 218 |
+
torch.sym_min,
|
| 219 |
+
torch.sym_not,
|
| 220 |
+
torch.sym_sqrt,
|
| 221 |
+
torch.sym_sum,
|
| 222 |
+
torch.export.custom_ops._call_custom_autograd_function_in_pre_dispatch,
|
| 223 |
+
# TODO (tmanlaibaatar)
|
| 224 |
+
# Predispatch export is able to contain autograd ops.
|
| 225 |
+
# These will be modeled as HOO later
|
| 226 |
+
torch._C._set_grad_enabled,
|
| 227 |
+
torch.amp.autocast_mode._enter_autocast,
|
| 228 |
+
torch.amp.autocast_mode._exit_autocast,
|
| 229 |
+
torch.fx.experimental.symbolic_shapes.cast_symbool_to_symint_guardless,
|
| 230 |
+
torch._functorch.predispatch._add_batch_dim,
|
| 231 |
+
torch._functorch.predispatch._remove_batch_dim,
|
| 232 |
+
torch._functorch.predispatch._vmap_increment_nesting,
|
| 233 |
+
torch._functorch.predispatch._vmap_decrement_nesting,
|
| 234 |
+
torch._functorch.predispatch.lazy_load_decompositions,
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
if not isinstance(op, _allowed_op_types()):
|
| 238 |
+
if (
|
| 239 |
+
op not in _allowed_builtin_ops()
|
| 240 |
+
and op not in _allowed_torch_functions
|
| 241 |
+
):
|
| 242 |
+
raise SpecViolationError(
|
| 243 |
+
f"Operator '{op}' is not an allowed operator type: {_allowed_op_types()}\n"
|
| 244 |
+
f"Valid builtin ops: {_allowed_builtin_ops()}"
|
| 245 |
+
f"Valid torch functions: {_allowed_torch_functions}"
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
if isinstance(op, OpOverload):
|
| 249 |
+
# All ops functional
|
| 250 |
+
# TODO (tmanlaibaatar) more proper way is needed here
|
| 251 |
+
if self.dialect != "TRAINING" and not is_functional(op):
|
| 252 |
+
raise SpecViolationError(f"operator '{op}' is not functional")
|
| 253 |
+
self.check_valid_op(op)
|
| 254 |
+
|
| 255 |
+
for mod in gm.modules():
|
| 256 |
+
is_toplevel_gm = mod is gm
|
| 257 |
+
|
| 258 |
+
if not isinstance(mod, torch.fx.GraphModule):
|
| 259 |
+
continue
|
| 260 |
+
|
| 261 |
+
mod.graph.lint()
|
| 262 |
+
for node in mod.graph.nodes:
|
| 263 |
+
# TODO(T140410192): should have fake tensor for all dialects
|
| 264 |
+
if node.op in {"call_module", "call_method"}:
|
| 265 |
+
raise SpecViolationError(
|
| 266 |
+
f"call_module is not valid: got a class '{node.target}' ",
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
elif node.op == "call_function":
|
| 270 |
+
_check_val(node)
|
| 271 |
+
|
| 272 |
+
_check_valid_op(node.target)
|
| 273 |
+
|
| 274 |
+
elif node.op == "get_attr":
|
| 275 |
+
if not isinstance(node.target, str):
|
| 276 |
+
raise SpecViolationError(
|
| 277 |
+
f"Expected get_attr target to be string, but got {type(node.target)}"
|
| 278 |
+
)
|
| 279 |
+
|
| 280 |
+
attr = getattr_recursive(mod, node.target)
|
| 281 |
+
if isinstance(attr, torch.nn.Module):
|
| 282 |
+
|
| 283 |
+
def _is_type(name, ty):
|
| 284 |
+
return isinstance(getattr(attr, name, None), ty)
|
| 285 |
+
|
| 286 |
+
if type(attr).__name__ == "LoweredBackendModule":
|
| 287 |
+
if (
|
| 288 |
+
_is_type("backend_id", str)
|
| 289 |
+
and hasattr(attr, "original_module")
|
| 290 |
+
and hasattr(attr, "module_name")
|
| 291 |
+
and getattr(attr, "backend_id", None) == "aoti"
|
| 292 |
+
):
|
| 293 |
+
continue
|
| 294 |
+
if (
|
| 295 |
+
_is_type("backend_id", str)
|
| 296 |
+
and _is_type("processed_bytes", bytes)
|
| 297 |
+
and _is_type("compile_specs", list)
|
| 298 |
+
and hasattr(attr, "original_module")
|
| 299 |
+
):
|
| 300 |
+
continue
|
| 301 |
+
else:
|
| 302 |
+
backend_id = getattr(attr, "backend_id", None)
|
| 303 |
+
processed_bytes = getattr(attr, "processed_bytes", None)
|
| 304 |
+
compile_specs = getattr(attr, "compile_specs", None)
|
| 305 |
+
raise SpecViolationError(
|
| 306 |
+
f"Invalid get_attr type {type(attr)}. \n"
|
| 307 |
+
f"LoweredBackendModule fields: "
|
| 308 |
+
f"backend_id(str) : {type(backend_id)}, "
|
| 309 |
+
f"processed_bytes(bytes) : {type(processed_bytes)}, "
|
| 310 |
+
f"compile_specs(list) : {type(compile_specs)}"
|
| 311 |
+
)
|
| 312 |
+
elif type(attr).__name__ == "AOTInductorEPModule":
|
| 313 |
+
continue
|
| 314 |
+
|
| 315 |
+
elif type(attr).__name__ == "AOTInductorRunnerWrapper":
|
| 316 |
+
continue
|
| 317 |
+
|
| 318 |
+
if not isinstance(attr, _allowed_getattr_types(is_toplevel_gm)):
|
| 319 |
+
raise SpecViolationError(
|
| 320 |
+
f"Invalid get_attr type {type(attr)} on target {node.target}. \n"
|
| 321 |
+
f"Valid get_attr types: {_allowed_getattr_types(is_toplevel_gm)}"
|
| 322 |
+
)
|
| 323 |
+
|
| 324 |
+
elif node.op == "placeholder":
|
| 325 |
+
_check_val(node)
|
| 326 |
+
# TODO(zhxchen17)
|
| 327 |
+
# elif node.op == "output":
|
| 328 |
+
# _check_flattened_outputs()
|
| 329 |
+
|
| 330 |
+
self.check_additional(gm)
|
| 331 |
+
|
| 332 |
+
|
| 333 |
+
class TrainingIRVerifier(Verifier):
|
| 334 |
+
dialect = "TRAINING"
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def _verify_exported_program_module_call_graph(exported_program) -> None:
|
| 338 |
+
module_call_graph = exported_program.module_call_graph
|
| 339 |
+
nodes = {node.name for node in exported_program.graph.nodes}
|
| 340 |
+
for entry in module_call_graph:
|
| 341 |
+
if entry.signature is not None:
|
| 342 |
+
for arg in entry.signature.inputs:
|
| 343 |
+
if arg.name and arg.name not in nodes:
|
| 344 |
+
raise SpecViolationError(
|
| 345 |
+
f"Input {arg.name} does not exist in the graph."
|
| 346 |
+
)
|
| 347 |
+
for arg in entry.signature.outputs:
|
| 348 |
+
if arg.name and arg.name not in nodes:
|
| 349 |
+
raise SpecViolationError(
|
| 350 |
+
f"Output {arg.name} does not exist in the graph."
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
def _verify_exported_program_signature(exported_program) -> None:
|
| 355 |
+
# Check ExportedProgram signature matches
|
| 356 |
+
gs = exported_program.graph_signature
|
| 357 |
+
|
| 358 |
+
# Check every node in the signature exists in the graph
|
| 359 |
+
input_node_names = [
|
| 360 |
+
node.name for node in exported_program.graph.nodes if node.op == "placeholder"
|
| 361 |
+
]
|
| 362 |
+
|
| 363 |
+
if len(input_node_names) != len(gs.input_specs):
|
| 364 |
+
raise SpecViolationError(
|
| 365 |
+
f"Number of graph inputs ({len(input_node_names)}) "
|
| 366 |
+
f"does not match number of inputs in the graph signature ({len(gs.input_specs)})"
|
| 367 |
+
)
|
| 368 |
+
|
| 369 |
+
for input_spec, node in zip(gs.input_specs, input_node_names):
|
| 370 |
+
if isinstance(
|
| 371 |
+
input_spec.arg,
|
| 372 |
+
(TensorArgument, SymIntArgument, SymFloatArgument, SymBoolArgument),
|
| 373 |
+
):
|
| 374 |
+
if input_spec.arg.name != node:
|
| 375 |
+
raise SpecViolationError(
|
| 376 |
+
f"Input spec name {input_spec.arg.name} does not match node name {node}"
|
| 377 |
+
)
|
| 378 |
+
|
| 379 |
+
if input_spec.kind == InputKind.USER_INPUT:
|
| 380 |
+
continue
|
| 381 |
+
|
| 382 |
+
elif input_spec.kind == InputKind.PARAMETER:
|
| 383 |
+
if not isinstance(input_spec.arg, TensorArgument):
|
| 384 |
+
raise SpecViolationError(
|
| 385 |
+
f"Parameter {input_spec.name} is not a tensor argument. Found {input_spec.arg} instead."
|
| 386 |
+
)
|
| 387 |
+
if input_spec.target is None:
|
| 388 |
+
raise SpecViolationError(
|
| 389 |
+
f"InputSpec for {input_spec.name} has no target."
|
| 390 |
+
)
|
| 391 |
+
|
| 392 |
+
param = input_spec.target
|
| 393 |
+
if param not in exported_program.state_dict:
|
| 394 |
+
raise SpecViolationError(f"Parameter {param} is not in the state dict.")
|
| 395 |
+
|
| 396 |
+
if not isinstance(exported_program.state_dict[param], torch.nn.Parameter):
|
| 397 |
+
raise SpecViolationError(
|
| 398 |
+
f"State dict entry for parameter {param} is not an instance of torch.nn.Parameter."
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
+
elif input_spec.kind == InputKind.BUFFER:
|
| 402 |
+
if not isinstance(input_spec.arg, TensorArgument):
|
| 403 |
+
raise SpecViolationError(
|
| 404 |
+
f"Buffer {input_spec.name} is not a tensor argument. Found {input_spec.arg} instead."
|
| 405 |
+
)
|
| 406 |
+
if input_spec.target is None:
|
| 407 |
+
raise SpecViolationError(
|
| 408 |
+
f"InputSpec for {input_spec.name} has no target."
|
| 409 |
+
)
|
| 410 |
+
|
| 411 |
+
buffer = input_spec.target
|
| 412 |
+
if input_spec.persistent is None:
|
| 413 |
+
raise SpecViolationError(
|
| 414 |
+
f"Buffer {buffer} is missing a persistence flag"
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
+
if (
|
| 418 |
+
input_spec.persistent is True
|
| 419 |
+
and buffer not in exported_program.state_dict
|
| 420 |
+
):
|
| 421 |
+
raise SpecViolationError(f"Buffer {buffer} is not in the state dict.")
|
| 422 |
+
|
| 423 |
+
if input_spec.persistent is False and buffer in exported_program.state_dict:
|
| 424 |
+
raise SpecViolationError(
|
| 425 |
+
f"Non-persistent buffer {buffer} is in the state dict, it should not be."
|
| 426 |
+
)
|
| 427 |
+
elif input_spec.kind == InputKind.CONSTANT_TENSOR:
|
| 428 |
+
if not isinstance(input_spec.arg, TensorArgument):
|
| 429 |
+
raise SpecViolationError(
|
| 430 |
+
f"Constant tensor {input_spec.name} is not a tensor argument. Found {input_spec.arg} instead."
|
| 431 |
+
)
|
| 432 |
+
if input_spec.target is None:
|
| 433 |
+
raise SpecViolationError(
|
| 434 |
+
f"InputSpec for {input_spec.name} has no target."
|
| 435 |
+
)
|
| 436 |
+
|
| 437 |
+
tensor_const = input_spec.target
|
| 438 |
+
if tensor_const not in exported_program.constants:
|
| 439 |
+
raise SpecViolationError(
|
| 440 |
+
f"Constant tensor {tensor_const} is not in the constants dictionary."
|
| 441 |
+
)
|
| 442 |
+
elif input_spec.kind == InputKind.CUSTOM_OBJ:
|
| 443 |
+
if not isinstance(input_spec.arg, CustomObjArgument):
|
| 444 |
+
raise SpecViolationError(
|
| 445 |
+
f"Custom object {input_spec.name} is not a custom object argument. Found {input_spec.arg} instead."
|
| 446 |
+
)
|
| 447 |
+
if input_spec.target is None:
|
| 448 |
+
raise SpecViolationError(
|
| 449 |
+
f"InputSpec for {input_spec.name} has no target."
|
| 450 |
+
)
|
| 451 |
+
|
| 452 |
+
custom_obj = input_spec.target
|
| 453 |
+
if custom_obj not in exported_program.constants:
|
| 454 |
+
raise SpecViolationError(
|
| 455 |
+
f"Custom object {custom_obj} is not in the constants dictionary."
|
| 456 |
+
)
|
| 457 |
+
elif input_spec.kind == InputKind.TOKEN:
|
| 458 |
+
if not isinstance(input_spec.arg, TokenArgument):
|
| 459 |
+
raise SpecViolationError(
|
| 460 |
+
f"Constant tensor {input_spec.name} is not a tensor argument. Found {input_spec.arg} instead."
|
| 461 |
+
)
|
| 462 |
+
else:
|
| 463 |
+
raise SpecViolationError(f"Unknown InputKind {input_spec.kind}.")
|
| 464 |
+
|
| 465 |
+
# Check outputs
|
| 466 |
+
output_node = list(exported_program.graph.nodes)[-1]
|
| 467 |
+
assert output_node.op == "output"
|
| 468 |
+
output_nodes = [
|
| 469 |
+
arg.name if isinstance(arg, torch.fx.Node) else arg
|
| 470 |
+
for arg in output_node.args[0]
|
| 471 |
+
]
|
| 472 |
+
|
| 473 |
+
if len(output_nodes) != len(gs.output_specs):
|
| 474 |
+
raise SpecViolationError(
|
| 475 |
+
f"Number of output nodes {len(output_nodes)} is different "
|
| 476 |
+
"Than the number of outputs specified by the graph signature: \n"
|
| 477 |
+
f"Number of mutated buffers: {len(gs.buffers_to_mutate)}. \n"
|
| 478 |
+
f"Number of user outputs: {len(gs.user_outputs)}. \n"
|
| 479 |
+
)
|
| 480 |
+
|
| 481 |
+
num_tokens = len(gs.output_tokens)
|
| 482 |
+
end = (
|
| 483 |
+
len(gs.buffers_to_mutate)
|
| 484 |
+
+ len(gs.parameters_to_mutate)
|
| 485 |
+
+ len(gs.user_inputs_to_mutate)
|
| 486 |
+
+ num_tokens
|
| 487 |
+
)
|
| 488 |
+
mutate_nodes: list[str] = output_nodes[num_tokens:end]
|
| 489 |
+
user_output_nodes = output_nodes[end : end + len(gs.user_outputs)]
|
| 490 |
+
|
| 491 |
+
for mutation_node in mutate_nodes:
|
| 492 |
+
if mutation_node in gs.buffers_to_mutate:
|
| 493 |
+
if gs.buffers_to_mutate[mutation_node] not in gs.buffers:
|
| 494 |
+
raise SpecViolationError(
|
| 495 |
+
f"Buffer output {mutation_node} does not point to a buffer that exists. \n"
|
| 496 |
+
f"Dict of buffers that are mutated, in order: {gs.buffers_to_mutate} \n"
|
| 497 |
+
f"Buffer nodes available: {gs.buffers} \n"
|
| 498 |
+
)
|
| 499 |
+
elif mutation_node in gs.parameters_to_mutate:
|
| 500 |
+
if gs.parameters_to_mutate[mutation_node] not in gs.parameters:
|
| 501 |
+
raise SpecViolationError(
|
| 502 |
+
f"Parameter output {mutation_node} does not point to a parameter that exists. \n"
|
| 503 |
+
f"Dict of parameters that are mutated, in order: {gs.parameters_to_mutate} \n"
|
| 504 |
+
f"Parameter nodes available: {gs.parameters} \n"
|
| 505 |
+
)
|
| 506 |
+
elif mutation_node in gs.user_inputs_to_mutate:
|
| 507 |
+
if gs.user_inputs_to_mutate[mutation_node] not in gs.user_inputs:
|
| 508 |
+
raise SpecViolationError(
|
| 509 |
+
f"User input output {mutation_node} does not point to a user input that exists. \n"
|
| 510 |
+
f"Dict of user inputs that are mutated, in order: {gs.user_inputs_to_mutate} \n"
|
| 511 |
+
f"User input nodes available: {gs.user_inputs} \n"
|
| 512 |
+
)
|
| 513 |
+
else:
|
| 514 |
+
raise SpecViolationError(
|
| 515 |
+
f"Mutation node {mutation_node} is neither a buffer nor a user input. "
|
| 516 |
+
f"Buffers to mutate: {gs.buffers_to_mutate}, User inputs to mutate: {gs.user_inputs_to_mutate}"
|
| 517 |
+
)
|
| 518 |
+
|
| 519 |
+
for user_output_node, user_output_name in zip(user_output_nodes, gs.user_outputs):
|
| 520 |
+
if user_output_node != user_output_name:
|
| 521 |
+
raise SpecViolationError(
|
| 522 |
+
f"User output {user_output_node} is not in the correct "
|
| 523 |
+
"order or is not found in the "
|
| 524 |
+
f"exported program's user_output list: {gs.user_outputs}. "
|
| 525 |
+
)
|
| 526 |
+
|
| 527 |
+
|
| 528 |
+
def load_verifier(dialect: str) -> type[Verifier]:
|
| 529 |
+
if dialect == "ATEN" or dialect == "":
|
| 530 |
+
return _VerifierMeta._registry.get(dialect, Verifier)
|
| 531 |
+
return _VerifierMeta._registry[dialect]
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_inductor/__autotune_main__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
from torch._inductor.autotune_process import TuningProcess
|
| 6 |
+
from torch._inductor.compile_worker.utils import _async_compile_initializer
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
log = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def main() -> None:
|
| 13 |
+
parser = argparse.ArgumentParser()
|
| 14 |
+
parser.add_argument("--parent", type=int)
|
| 15 |
+
parser.add_argument("--read-fd", type=int)
|
| 16 |
+
parser.add_argument("--write-fd", type=int)
|
| 17 |
+
args = parser.parse_args()
|
| 18 |
+
read_pipe = os.fdopen(args.read_fd, "rb")
|
| 19 |
+
write_pipe = os.fdopen(args.write_fd, "wb")
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
# Ensures the subprocess exits if the parent crashes:
|
| 23 |
+
_async_compile_initializer(args.parent)
|
| 24 |
+
TuningProcess.process_main(read_pipe, write_pipe)
|
| 25 |
+
except Exception:
|
| 26 |
+
log.exception("Uncaught exception in autotune subprocess")
|
| 27 |
+
finally:
|
| 28 |
+
read_pipe.close()
|
| 29 |
+
write_pipe.close()
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
if __name__ == "__main__":
|
| 33 |
+
main()
|
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/_inductor/__init__.py
ADDED
|
@@ -0,0 +1,447 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: allow-untyped-defs
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import io
|
| 5 |
+
import logging
|
| 6 |
+
import os
|
| 7 |
+
from typing import Any, IO, Literal, Optional, TYPE_CHECKING, Union
|
| 8 |
+
|
| 9 |
+
import torch.fx
|
| 10 |
+
|
| 11 |
+
from .standalone_compile import CompiledArtifact # noqa: TC001
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
if TYPE_CHECKING:
|
| 15 |
+
from torch._inductor.utils import InputType
|
| 16 |
+
from torch.export import ExportedProgram
|
| 17 |
+
from torch.export.pt2_archive._package import AOTICompiledModel
|
| 18 |
+
from torch.export.pt2_archive._package_weights import Weights
|
| 19 |
+
from torch.types import FileLike
|
| 20 |
+
|
| 21 |
+
__all__ = [
|
| 22 |
+
"compile",
|
| 23 |
+
"list_mode_options",
|
| 24 |
+
"list_options",
|
| 25 |
+
"cudagraph_mark_step_begin",
|
| 26 |
+
"standalone_compile",
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
log = logging.getLogger(__name__)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def compile(
|
| 34 |
+
gm: torch.fx.GraphModule,
|
| 35 |
+
example_inputs: list[InputType],
|
| 36 |
+
options: Optional[dict[str, Any]] = None,
|
| 37 |
+
):
|
| 38 |
+
"""
|
| 39 |
+
Compile a given FX graph with TorchInductor. This allows compiling
|
| 40 |
+
FX graphs captured without using TorchDynamo.
|
| 41 |
+
|
| 42 |
+
Args:
|
| 43 |
+
gm: The FX graph to compile.
|
| 44 |
+
example_inputs: List of tensor inputs.
|
| 45 |
+
options: Optional dict of config options. See `torch._inductor.config`.
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
Callable with same behavior as gm but faster.
|
| 49 |
+
"""
|
| 50 |
+
from .compile_fx import compile_fx
|
| 51 |
+
|
| 52 |
+
return compile_fx(gm, example_inputs, config_patches=options)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def aoti_compile_and_package(
|
| 56 |
+
exported_program: ExportedProgram,
|
| 57 |
+
_deprecated_unused_args=None,
|
| 58 |
+
_deprecated_unused_kwargs=None,
|
| 59 |
+
*,
|
| 60 |
+
package_path: Optional[FileLike] = None,
|
| 61 |
+
inductor_configs: Optional[dict[str, Any]] = None,
|
| 62 |
+
) -> str:
|
| 63 |
+
"""
|
| 64 |
+
Compiles the exported program with AOTInductor, and packages it into a .pt2
|
| 65 |
+
artifact specified by the input package_path. To load the package, you can
|
| 66 |
+
call ``torch._inductor.aoti_load_package(package_path)``.
|
| 67 |
+
|
| 68 |
+
An example usage is as follows:
|
| 69 |
+
|
| 70 |
+
.. code-block:: python
|
| 71 |
+
|
| 72 |
+
ep = torch.export.export(M(), ...)
|
| 73 |
+
aoti_file = torch._inductor.aoti_compile_and_package(
|
| 74 |
+
ep, package_path="my_package.pt2"
|
| 75 |
+
)
|
| 76 |
+
compiled_model = torch._inductor.aoti_load_package("my_package.pt2")
|
| 77 |
+
|
| 78 |
+
To compile and save multiple models into a single ``.pt2`` artifact, you can do
|
| 79 |
+
the following:
|
| 80 |
+
|
| 81 |
+
.. code-block:: python
|
| 82 |
+
|
| 83 |
+
ep1 = torch.export.export(M1(), ...)
|
| 84 |
+
aoti_file1 = torch._inductor.aot_compile(
|
| 85 |
+
ep1, ..., options={"aot_inductor.package": True}
|
| 86 |
+
)
|
| 87 |
+
ep2 = torch.export.export(M2(), ...)
|
| 88 |
+
aoti_file2 = torch._inductor.aot_compile(
|
| 89 |
+
ep2, ..., options={"aot_inductor.package": True}
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
from torch._inductor.package import package_aoti, load_package
|
| 93 |
+
|
| 94 |
+
package_aoti("my_package.pt2", {"model1": aoti_file1, "model2": aoti_file2})
|
| 95 |
+
|
| 96 |
+
compiled_model1 = load_package("my_package.pt2", "model1")
|
| 97 |
+
compiled_model2 = load_package("my_package.pt2", "model2")
|
| 98 |
+
|
| 99 |
+
Args:
|
| 100 |
+
exported_program: An exported program created through a call from torch.export
|
| 101 |
+
package_path: Optional specified path to the generated .pt2 artifact.
|
| 102 |
+
inductor_configs: Optional dictionary of configs to control inductor.
|
| 103 |
+
|
| 104 |
+
Returns:
|
| 105 |
+
Path to the generated artifact
|
| 106 |
+
"""
|
| 107 |
+
from torch.export import ExportedProgram
|
| 108 |
+
|
| 109 |
+
from .debug import aot_inductor_minifier_wrapper
|
| 110 |
+
|
| 111 |
+
if not isinstance(exported_program, ExportedProgram):
|
| 112 |
+
raise ValueError("Only ExportedProgram is supported")
|
| 113 |
+
|
| 114 |
+
if exported_program.example_inputs is None:
|
| 115 |
+
raise RuntimeError(
|
| 116 |
+
"exported_program.example_inputs is required to be set in order "
|
| 117 |
+
"for AOTInductor compilation."
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
if _deprecated_unused_args is not None or _deprecated_unused_kwargs is not None:
|
| 121 |
+
log.warning(
|
| 122 |
+
"You no longer need to specify args/kwargs to aoti_compile_and_package "
|
| 123 |
+
"as we can get this information from exported_program.example_inputs."
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
assert (
|
| 127 |
+
package_path is None
|
| 128 |
+
or (
|
| 129 |
+
isinstance(package_path, (io.IOBase, IO))
|
| 130 |
+
and package_path.writable()
|
| 131 |
+
and package_path.seekable()
|
| 132 |
+
)
|
| 133 |
+
or (
|
| 134 |
+
isinstance(package_path, (str, os.PathLike))
|
| 135 |
+
and os.fspath(package_path).endswith(".pt2")
|
| 136 |
+
)
|
| 137 |
+
), (
|
| 138 |
+
f"Expect package path to be a file ending in .pt2, is None, or is a buffer. Instead got {package_path}"
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
inductor_configs = inductor_configs or {}
|
| 142 |
+
inductor_configs["aot_inductor.package"] = True
|
| 143 |
+
|
| 144 |
+
if inductor_configs.get("aot_inductor.output_path"):
|
| 145 |
+
raise RuntimeError(
|
| 146 |
+
"Please pass in a package path to aot_inductor_compile() instead "
|
| 147 |
+
"of setting the aot_inductor.output_path config."
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
# a wrapper around aoti_compile_and_package_inner.
|
| 151 |
+
return aot_inductor_minifier_wrapper(
|
| 152 |
+
_aoti_compile_and_package_inner,
|
| 153 |
+
exported_program,
|
| 154 |
+
# pyrefly: ignore [bad-argument-type]
|
| 155 |
+
package_path=package_path,
|
| 156 |
+
inductor_configs=inductor_configs,
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def _aoti_compile_and_package_inner(
|
| 161 |
+
gm: torch.nn.Module,
|
| 162 |
+
# flat_example_inputs: List[Any],
|
| 163 |
+
args: tuple[Any],
|
| 164 |
+
kwargs: Optional[dict[str, Any]] = None,
|
| 165 |
+
*,
|
| 166 |
+
load_and_run: bool = False,
|
| 167 |
+
check_accuracy: Optional[str] = None,
|
| 168 |
+
package_path: Optional[Union[str, io.BytesIO]] = None,
|
| 169 |
+
inductor_configs: Optional[dict[str, Any]] = None,
|
| 170 |
+
):
|
| 171 |
+
"""
|
| 172 |
+
See docstring for aoti_compile_and_package.
|
| 173 |
+
|
| 174 |
+
If `load_and_run` is True, this function will load the compiled model and run it.
|
| 175 |
+
This is for the minifier to check the correctness of the compiled model.
|
| 176 |
+
|
| 177 |
+
If `check_accuracy` is set, this function will check the accuracy of the compiled
|
| 178 |
+
model against gm. kwargs must be None if check_accuracy is set.
|
| 179 |
+
"strict_accuracy" means "we will minify any time we see anything that
|
| 180 |
+
diverges", whereas "accuracy" is more conservative, and will only minify if there
|
| 181 |
+
is a meaningful fp64 divergence
|
| 182 |
+
"""
|
| 183 |
+
|
| 184 |
+
if check_accuracy:
|
| 185 |
+
assert kwargs is None or len(kwargs) == 0, (
|
| 186 |
+
"when checking for accuracy, the inputs must have been flattened and kwargs is None"
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
from .package import package_aoti
|
| 190 |
+
|
| 191 |
+
assert isinstance(gm, torch.fx.GraphModule)
|
| 192 |
+
|
| 193 |
+
kwargs = kwargs or {}
|
| 194 |
+
|
| 195 |
+
aoti_files = aot_compile(gm, args, kwargs, options=inductor_configs)
|
| 196 |
+
assert isinstance(aoti_files, list)
|
| 197 |
+
|
| 198 |
+
if package_path is None:
|
| 199 |
+
path = [
|
| 200 |
+
os.path.splitext(file)[0]
|
| 201 |
+
for file in aoti_files
|
| 202 |
+
if isinstance(file, str) and os.path.splitext(file)[1] == ".so"
|
| 203 |
+
]
|
| 204 |
+
if len(path) == 0:
|
| 205 |
+
path = [
|
| 206 |
+
os.path.splitext(file)[0]
|
| 207 |
+
for file in aoti_files
|
| 208 |
+
if isinstance(file, str) and os.path.splitext(file)[1] == ".cpp"
|
| 209 |
+
]
|
| 210 |
+
package_path = path[0] + ".pt2"
|
| 211 |
+
|
| 212 |
+
res = package_aoti(package_path, aoti_files)
|
| 213 |
+
assert res == package_path
|
| 214 |
+
|
| 215 |
+
if load_and_run or check_accuracy:
|
| 216 |
+
compiled_model = aoti_load_package(package_path)
|
| 217 |
+
if check_accuracy:
|
| 218 |
+
from torch._dynamo.debug_utils import AccuracyError, same_two_models
|
| 219 |
+
|
| 220 |
+
# This might look inverted but it's not. strict_accuracy means "we will
|
| 221 |
+
# minify any time we see anything that diverges", whereas accuracy is more
|
| 222 |
+
# conservative, and will only minify if there is a meaningful fp64
|
| 223 |
+
# divergence
|
| 224 |
+
not_strict_accuracy = check_accuracy == "accuracy"
|
| 225 |
+
if not same_two_models(
|
| 226 |
+
gm,
|
| 227 |
+
compiled_model, # type: ignore[arg-type]
|
| 228 |
+
args,
|
| 229 |
+
only_fwd=True,
|
| 230 |
+
require_fp64=not_strict_accuracy,
|
| 231 |
+
ignore_non_fp=not_strict_accuracy,
|
| 232 |
+
):
|
| 233 |
+
raise AccuracyError("Bad accuracy detected")
|
| 234 |
+
else:
|
| 235 |
+
compiled_model(*args, **kwargs)
|
| 236 |
+
|
| 237 |
+
return package_path
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def aoti_load_package(
|
| 241 |
+
path: FileLike, run_single_threaded: bool = False, device_index: int = -1
|
| 242 |
+
) -> AOTICompiledModel:
|
| 243 |
+
"""
|
| 244 |
+
Loads the model from the PT2 package.
|
| 245 |
+
|
| 246 |
+
If multiple models were packaged into the PT2, this will load the default
|
| 247 |
+
model. To load a specific model, you can directly call the load API
|
| 248 |
+
|
| 249 |
+
.. code-block:: python
|
| 250 |
+
|
| 251 |
+
from torch._inductor.package import load_package
|
| 252 |
+
|
| 253 |
+
compiled_model1 = load_package("my_package.pt2", "model1")
|
| 254 |
+
compiled_model2 = load_package("my_package.pt2", "model2")
|
| 255 |
+
|
| 256 |
+
Args:
|
| 257 |
+
path: Path to the .pt2 package
|
| 258 |
+
run_single_threaded (bool): Whether the model should be run without
|
| 259 |
+
thread synchronization logic. This is useful to avoid conflicts with
|
| 260 |
+
CUDAGraphs.
|
| 261 |
+
device_index (int): The index of the device to which the PT2 package is
|
| 262 |
+
to be loaded. By default, `device_index=-1` is used, which corresponds
|
| 263 |
+
to the device `cuda` when using CUDA. Passing `device_index=1` would
|
| 264 |
+
load the package to `cuda:1`, for example.
|
| 265 |
+
"""
|
| 266 |
+
from torch._inductor.package import load_package
|
| 267 |
+
|
| 268 |
+
return load_package(
|
| 269 |
+
path, run_single_threaded=run_single_threaded, device_index=device_index
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
def aot_compile(
|
| 274 |
+
gm: torch.fx.GraphModule,
|
| 275 |
+
args: tuple[Any, ...],
|
| 276 |
+
kwargs: Optional[dict[str, Any]] = None,
|
| 277 |
+
*,
|
| 278 |
+
options: Optional[dict[str, Any]] = None,
|
| 279 |
+
) -> Union[str, list[Union[str, Weights]], torch.fx.GraphModule]:
|
| 280 |
+
"""
|
| 281 |
+
Ahead-of-time compile a given FX graph with TorchInductor into a shared library.
|
| 282 |
+
|
| 283 |
+
Args:
|
| 284 |
+
gm: The FX graph to compile.
|
| 285 |
+
args: Example arguments
|
| 286 |
+
kwargs: Example keyword arguments
|
| 287 |
+
options: Optional dict of config options. See `torch._inductor.config`.
|
| 288 |
+
|
| 289 |
+
Returns:
|
| 290 |
+
Path to the generated shared library, or a list of files generated by
|
| 291 |
+
AOTI if aot_inductor.package=True.
|
| 292 |
+
TODO: make it return a list by default
|
| 293 |
+
"""
|
| 294 |
+
from .compile_fx import _aoti_flatten_inputs, compile_fx_aot
|
| 295 |
+
|
| 296 |
+
if hasattr(gm, "_guards_fn"):
|
| 297 |
+
# Do not compile the guards function, since it may contain checks
|
| 298 |
+
# that are not currently supported by AOTI. In particular, non-Tensor
|
| 299 |
+
# arguments are converted to None and will fail specialization checks.
|
| 300 |
+
node = next(iter(gm.graph.find_nodes(op="call_module", target="_guards_fn")))
|
| 301 |
+
gm.graph.erase_node(node)
|
| 302 |
+
delattr(gm, "_guards_fn")
|
| 303 |
+
gm.recompile()
|
| 304 |
+
|
| 305 |
+
flat_example_inputs, options = _aoti_flatten_inputs(
|
| 306 |
+
gm, args, kwargs, options=options
|
| 307 |
+
)
|
| 308 |
+
from torch._export.utils import _compiling_state_context
|
| 309 |
+
|
| 310 |
+
with _compiling_state_context():
|
| 311 |
+
return compile_fx_aot(
|
| 312 |
+
gm,
|
| 313 |
+
flat_example_inputs, # type: ignore[arg-type]
|
| 314 |
+
config_patches=options,
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
lite_mode_options = {
|
| 319 |
+
# Fallback by default unless users explicitly annotated with
|
| 320 |
+
# regional inductor compile.
|
| 321 |
+
"fallback_by_default": True,
|
| 322 |
+
"selective_decompose": True,
|
| 323 |
+
# Disable reorder optimizations
|
| 324 |
+
"reorder_for_peak_memory": False,
|
| 325 |
+
"reorder_for_compute_comm_overlap": False,
|
| 326 |
+
"triton.reorder_for_reducing_graph_partitions": False,
|
| 327 |
+
# Disable pre-, joint-, post-grad passes
|
| 328 |
+
"use_pre_grad_passes": False,
|
| 329 |
+
"use_joint_graph_passes": False,
|
| 330 |
+
"use_post_grad_passes": False,
|
| 331 |
+
# Disable dead code elimination (dce) and buffer reuse
|
| 332 |
+
"use_dce": False,
|
| 333 |
+
"allow_buffer_reuse": False,
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
|
| 337 |
+
def list_mode_options(
|
| 338 |
+
mode: Optional[str] = None, dynamic: Optional[bool] = None
|
| 339 |
+
) -> dict[str, Any]:
|
| 340 |
+
r"""Returns a dictionary describing the optimizations that each of the available
|
| 341 |
+
modes passed to `torch.compile()` performs.
|
| 342 |
+
|
| 343 |
+
Args:
|
| 344 |
+
mode (str, optional): The mode to return the optimizations for.
|
| 345 |
+
If None, returns optimizations for all modes
|
| 346 |
+
dynamic (bool, optional): Whether dynamic shape is enabled.
|
| 347 |
+
|
| 348 |
+
Example::
|
| 349 |
+
>>> torch._inductor.list_mode_options()
|
| 350 |
+
"""
|
| 351 |
+
|
| 352 |
+
mode_options: dict[str, dict[str, bool]] = {
|
| 353 |
+
"default": {},
|
| 354 |
+
# lite backend for opt-in optimizations
|
| 355 |
+
"lite": lite_mode_options,
|
| 356 |
+
# enable cudagraphs
|
| 357 |
+
"reduce-overhead": {
|
| 358 |
+
"triton.cudagraphs": True,
|
| 359 |
+
},
|
| 360 |
+
# enable max-autotune
|
| 361 |
+
"max-autotune-no-cudagraphs": {
|
| 362 |
+
"max_autotune": True,
|
| 363 |
+
"coordinate_descent_tuning": True,
|
| 364 |
+
},
|
| 365 |
+
# enable max-autotune
|
| 366 |
+
# enable cudagraphs
|
| 367 |
+
"max-autotune": {
|
| 368 |
+
"max_autotune": True,
|
| 369 |
+
"triton.cudagraphs": True,
|
| 370 |
+
"coordinate_descent_tuning": True,
|
| 371 |
+
},
|
| 372 |
+
}
|
| 373 |
+
try:
|
| 374 |
+
return mode_options[mode] if mode else mode_options
|
| 375 |
+
except KeyError as e:
|
| 376 |
+
raise RuntimeError(
|
| 377 |
+
f"Unrecognized mode={mode}, should be one of: {', '.join(mode_options.keys())}"
|
| 378 |
+
) from e
|
| 379 |
+
|
| 380 |
+
|
| 381 |
+
def list_options() -> list[str]:
|
| 382 |
+
r"""Returns a dictionary describing the optimizations and debug configurations
|
| 383 |
+
that are available to `torch.compile()`.
|
| 384 |
+
|
| 385 |
+
The options are documented in `torch._inductor.config`.
|
| 386 |
+
|
| 387 |
+
Example::
|
| 388 |
+
|
| 389 |
+
>>> torch._inductor.list_options()
|
| 390 |
+
"""
|
| 391 |
+
|
| 392 |
+
from torch._inductor import config
|
| 393 |
+
|
| 394 |
+
current_config: dict[str, Any] = config.get_config_copy()
|
| 395 |
+
|
| 396 |
+
return list(current_config.keys())
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
def cudagraph_mark_step_begin():
|
| 400 |
+
"Indicates that a new iteration of inference or training is about to begin."
|
| 401 |
+
from .cudagraph_trees import mark_step_begin
|
| 402 |
+
|
| 403 |
+
mark_step_begin()
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
def standalone_compile(
|
| 407 |
+
gm: torch.fx.GraphModule,
|
| 408 |
+
example_inputs: list[InputType],
|
| 409 |
+
*,
|
| 410 |
+
dynamic_shapes: Literal[
|
| 411 |
+
"from_example_inputs", "from_tracing_context", "from_graph"
|
| 412 |
+
] = "from_graph",
|
| 413 |
+
options: Optional[dict[str, Any]] = None,
|
| 414 |
+
aot: bool = False, # AOT mode, which uses BundledAOTAutogradCache
|
| 415 |
+
) -> CompiledArtifact:
|
| 416 |
+
"""
|
| 417 |
+
Precompilation API for inductor.
|
| 418 |
+
|
| 419 |
+
.. code-block:: python
|
| 420 |
+
|
| 421 |
+
compiled_artifact = torch._inductor.standalone_compile(gm, args)
|
| 422 |
+
compiled_artifact.save(path=path, format="binary")
|
| 423 |
+
|
| 424 |
+
# Later on a new process
|
| 425 |
+
loaded = torch._inductor.CompiledArtifact.load(path=path, format="binary")
|
| 426 |
+
compiled_out = loaded(*args)
|
| 427 |
+
|
| 428 |
+
Args:
|
| 429 |
+
gm: Graph Module
|
| 430 |
+
example_inputs: Inputs for the graph module
|
| 431 |
+
dynamic_shapes: If "from_graph" (default), we will use the dynamic
|
| 432 |
+
shapes in the passed-in graph module.
|
| 433 |
+
If "from_tracing_context", we use the dynamic shape info in the
|
| 434 |
+
ambient tracing context.
|
| 435 |
+
If "from_example_inputs", we will specialize the graph on the
|
| 436 |
+
example_inputs.
|
| 437 |
+
options: Inductor compilation options
|
| 438 |
+
|
| 439 |
+
Returns:
|
| 440 |
+
CompiledArtifact that can be saved to disk or invoked directly.
|
| 441 |
+
"""
|
| 442 |
+
from .standalone_compile import standalone_compile
|
| 443 |
+
|
| 444 |
+
options = options if options else {}
|
| 445 |
+
return standalone_compile(
|
| 446 |
+
gm, example_inputs, dynamic_shapes=dynamic_shapes, options=options, aot=aot
|
| 447 |
+
)
|