koichi12 commited on Feb 12, 2025

Commit

8caf96a

verified ·

1 Parent(s): 6229f35

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Visitor.cpython-311-x86_64-linux-gnu.so +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/lib/libcheckpoint.so +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pkg_resources/__pycache__/__init__.cpython-311.pyc +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_custom_op/impl.py +976 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/fft.py +590 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/dynamic/modules/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/__init__.py +1 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/__init__.py +18 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/ns/fx/utils.py +533 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/_correct_bias.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/_equalize.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/fuser_method_mappings.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/quant_type.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/quantize_pt2e.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/stubs.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__pycache__/native.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/_qnnpack_pt2e.py +160 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/fbgemm.py +116 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/native.py +204 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fuser_method_mappings.py +259 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__init__.py +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/_decomposed.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/_equalize.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/custom_config.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/fuse.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/graph_module.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/lower_to_qnnpack.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/match_utils.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/pattern_utils.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/prepare.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/qconfig_mapping_utils.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/quantize_handler.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_equalize.py +820 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/__pycache__/detector.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/__pycache__/model_report.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/__pycache__/model_report_observer.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/__pycache__/model_report_visualizer.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/detector.py +1539 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/model_report_visualizer.py +666 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/quantize_handler.py +197 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/utils.py +885 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/__pycache__/export_utils.cpython-311.pyc +0 -0

.gitattributes CHANGED Viewed

@@ -69,3 +69,6 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/distl
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/rich/__pycache__/console.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Debugger/__pycache__/libpython.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/FlowControl.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text

 tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/rich/__pycache__/console.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Debugger/__pycache__/libpython.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/FlowControl.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/lib/libcheckpoint.so filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Visitor.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pkg_resources/__pycache__/__init__.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text

tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Visitor.cpython-311-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8597f1985804f6c0c55b84d29a8744f0e2bc6600aaa695402499fbbbcba1decc
+size 374848

tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cuda_cupti/lib/libcheckpoint.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38073c63ab8f022926f58f7cb39c565005f382bdfacd85822e7502a5256b6671
+size 1509528

tuning-competition-baseline/.venv/lib/python3.11/site-packages/pip/_vendor/pkg_resources/__pycache__/__init__.cpython-311.pyc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e41fea31e2f114e2b8bb3065092e62588a33b909a8fa70bc578e734128e529
+size 176864

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_custom_op/impl.py ADDED Viewed

	@@ -0,0 +1,976 @@

+import dataclasses
+import functools
+import inspect
+import sys
+import typing
+import weakref
+from torchgen.model import FunctionSchema, OperatorName, SchemaKind, BaseType, ListType, BaseTy
+import torch
+import torch._C as _C
+import torch.library as library
+from torch._library.abstract_impl import AbstractImplCtx
+from torch.library import get_ctx
+from .autograd import autograd_kernel_indirection, construct_autograd_kernel
+"""
+For a detailed guide on custom ops, please see
+https://docs.google.com/document/d/1aGWtgxV3HppuxQAdddyPrs74_aEntpkYt9MalnCKnhk
+This file includes pieces of the implementation of our custom operator API.
+"""
+__all__ = ["custom_op", "CustomOp", "get_ctx", "AbstractImplCtx"]
+SUPPORTED_DEVICE_TYPE_TO_KEY = {
+    "cpu": "CPU",
+    "cuda": "CUDA",
+}
+# We will not let users register CustomOps with anything that could look like
+# PyTorch internals to avoid confusion.
+RESERVED_NS = {
+    "prim",
+    "prims",
+    "aten",
+    "at",
+    "torch",
+    "pytorch",
+}
+def custom_op(
+    qualname: str, manual_schema: typing.Optional[str] = None
+) -> typing.Callable:
+    r"""Creates a new CustomOp object.
+    WARNING: if you're a user, please do not use this directly
+    (instead use the torch._custom_ops APIs).
+    Also please see the following for a detailed guide on custom ops.
+    https://docs.google.com/document/d/1aGWtgxV3HppuxQAdddyPrs74_aEntpkYt9MalnCKnhk
+    In PyTorch, defining an op (short for "operator") is a two step-process:
+    - we need to define (create) the op
+    - we need to implement behavior for how the operator interacts with
+      various PyTorch subsystems, like CPU/CUDA Tensors, Autograd, etc.
+    This entrypoint defines the CustomOp object (the first step);
+    you must then perform the second step by calling various methods on
+    the CustomOp object.
+    This API is used as a decorator (see examples).
+    Arguments:
+        qualname (str): Should be a string that looks like
+            "namespace::operator_name". Operators in PyTorch need a namespace to
+            avoid name collisions; a given operator may only be created once.
+            If you are writing a Python library, we recommend the namespace to
+            be the name of your top-level module. The operator_name must be
+            the same as the name of the function you pass to custom_op
+            (see examples).
+        manual_schema (Optional[str]): Each PyTorch operator needs a schema that
+            tells PyTorch the types of the inputs/outputs. If None (default),
+            we will infer the schema from the type annotations on the function
+            (see examples). Otherwise, if you don't want to use type annotations,
+            you may provide us the schema string.
+    Example::
+        >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA)
+        >>> import numpy as np
+        >>> from torch import Tensor
+        >>>
+        >>> # Step 1: define the CustomOp.
+        >>> # We need to provide the decorator a "prototype function"
+        >>> # (a function with Python ellipses as the body).
+        >>> @custom_op("my_library::numpy_sin")
+        >>> def numpy_sin(x: Tensor) -> Tensor:
+        >>>     ...
+        >>>
+        >>> # numpy_sin is now an instance of class CustomOp
+        >>> print(type(numpy_sin))
+        >>>
+        >>> # Step 2: Register an implementation for various PyTorch subsystems
+        >>>
+        >>> # Register an implementation for CPU tensors
+        >>> @numpy_sin.impl('cpu')
+        >>> def numpy_sin_impl_cpu(x):
+        >>>     return torch.from_numpy(np.sin(x.numpy()))
+        >>>
+        >>> # Register an implementation for CUDA tensors
+        >>> @numpy_sin.impl('cuda')
+        >>> def numpy_sin_impl_cuda(x):
+        >>>     return torch.from_numpy(np.sin(x.cpu().numpy())).to(x.device)
+        >>>
+        >>> x = torch.randn(3)
+        >>> numpy_sin(x)  # calls numpy_sin_impl_cpu
+        >>>
+        >>> x_cuda = x.cuda()
+        >>> numpy_sin(x)  # calls numpy_sin_impl_cuda
+    """
+    def inner(func):
+        if not inspect.isfunction(func):
+            raise ValueError(
+                f"custom_op(...)(func): Expected `func` to be a Python "
+                f"function, got: {type(func)}"
+            )
+        ns, name = parse_qualname(qualname)
+        validate_namespace(ns)
+        if func.__name__ != name:
+            raise ValueError(
+                f"custom_op(qualname='{qualname}', ...)(func): expected `func` "
+                f"to have name '{name}' but got '{func.__name__}'. "
+                f"Please either change the name of `func` or the qualname that "
+                f"is passed to `custom_op`"
+            )
+        schema = infer_schema(func) if manual_schema is None else manual_schema
+        schema_str = f"{name}{schema}"
+        function_schema = FunctionSchema.parse(schema_str)
+        validate_schema(function_schema)
+        if manual_schema is not None:
+            validate_function_matches_schema(function_schema, func)
+        lib = library.Library(ns, "FRAGMENT")
+        lib.define(schema_str)
+        ophandle = find_ophandle_or_throw(ns, function_schema.name)
+        result = CustomOp(lib, ns, function_schema, name, ophandle, _private_access=True)
+        result.__name__ = func.__name__
+        result.__module__ = func.__module__
+        result.__doc__ = func.__doc__
+        library.impl(lib, result._opname, "Autograd")(
+            autograd_kernel_indirection(weakref.proxy(result))
+        )
+        torch._C._dispatch_set_report_error_callback(
+            ophandle, functools.partial(report_error_callback, weakref.proxy(result))
+        )
+        return result
+    return inner
+# Global dictionary holding references to all CustomOp objects
+# Yes, it keeps all CustomOps alive (see NOTE [CustomOp lifetime])
+# Used to query the CustomOp associated with a specific C++ dispatcher operator.
+# An example usage is FakeTensor: FakeTensor checks if a specific operator
+# has an implementation registered via the CustomOp API.
+# Indexed by qualname (e.g. aten::foo)
+global_registry: typing.Dict[str, "CustomOp"] = {}
+class CustomOp:
+    r"""Class for custom operators in PyTorch.
+    Use the CustomOp API to create user-defined custom operators that behave
+    just like regular PyTorch operators (e.g. torch.sin, torch.mm) when it
+    comes to various PyTorch subsystems (like torch.compile).
+    To construct a `CustomOp`, use `custom_op`.
+    """
+    def __init__(self, lib, cpp_ns, schema, operator_name, ophandle, *, _private_access=False):
+        super().__init__()
+        if not _private_access:
+            raise RuntimeError(
+                "The CustomOp constructor is private and we do not guarantee "
+                "BC for it. Please use custom_op(...) to create a CustomOp object"
+            )
+        name = f"{cpp_ns}::{operator_name}"
+        self._schema = schema
+        self._cpp_ns = cpp_ns
+        self._lib: library.Library = lib
+        self._ophandle: _C._DispatchOperatorHandle = ophandle
+        # Has the name of the op, e.g. "foo". We cache here for convenience.
+        self._opname: str = operator_name
+        # this is _opname but with namespace. e.g. "custom::foo"
+        self._qualname: str = name
+        self.__name__ = None  # mypy requires this
+        # NB: Some of these impls are registered as kernels to DispatchKeys.
+        # Modifying the _impls dict directly won't do anything in that case.
+        self._impls: typing.Dict[str, typing.Optional[FuncAndLocation]] = {}
+        # See NOTE [CustomOp autograd kernel indirection]
+        self._registered_autograd_kernel_indirection = False
+        global_registry[self._qualname] = self
+    def _register_autograd_kernel_indirection(self):
+        assert not self._registered_autograd_kernel_indirection
+        self._lib.impl(self._opname, autograd_kernel_indirection(weakref.proxy(self)), "Autograd")
+        self._registered_autograd_kernel_indirection = True
+    # Records the impl and the source location in self._impls
+    # Note that this doesn't cause torch.library to use the impl, that
+    # needs to be done in a separate self._lib.impl call.
+    def _register_impl(self, kind, func, stacklevel=2):
+        if self._has_impl(kind):
+            func_and_location = self._impls[kind]
+            assert func_and_location is not None  # Pacify mypy
+            location = func_and_location.location
+            raise RuntimeError(
+                f"Attempting to register a {kind} impl for operator {self._qualname} "
+                f"that already has a {kind} impl registered from Python at "
+                f"{location}. This is not supported."
+            )
+        frame = inspect.getframeinfo(sys._getframe(stacklevel))
+        location = f"{frame.filename}:{frame.lineno}"
+        self._impls[kind] = FuncAndLocation(func, location)
+    def _get_impl(self, kind):
+        return self._impls[kind]
+    def _has_impl(self, kind):
+        return kind in self._impls
+    def _destroy(self):
+        # NOTE: [CustomOp lifetime]
+        # A CustomOp, once created, lives forever. The mechanism is that the
+        # global registry holds a reference to it. However, to make testing
+        # easier, we want to be able to destroy CustomOp objects.
+        # CustomOp._destroy does the job, though it leaves the CustomOp
+        # in a garbage state.
+        del self._lib
+        opnamespace = getattr(torch.ops, self._cpp_ns)
+        if hasattr(opnamespace, self._opname):
+            delattr(opnamespace, self._opname)
+        del global_registry[self._qualname]
+    def __repr__(self):
+        return f'<CustomOp(op="{self._qualname}")>'
+    def __call__(self, *args, **kwargs):
+        # Bypass torch.ops.* and directly do OperatorHandle::callBoxed.
+        # Using torch.ops.* is a bit of a pain (it can be slow and it has lifetime
+        # issues from caching operators that make testing CustomOp difficult).
+        result = _C._dispatch_call_boxed(self._ophandle, *args, **kwargs)
+        return result
+    def impl(
+        self, device_types: typing.Union[str, typing.Iterable[str]], _stacklevel=2,
+    ) -> typing.Callable:
+        r"""Register an implementation for a device type for this CustomOp object.
+        WARNING: if you're a user, please do not use this directly
+        (instead use the torch._custom_ops APIs).
+        Also please see the following for a detailed guide on custom ops.
+        https://docs.google.com/document/d/1aGWtgxV3HppuxQAdddyPrs74_aEntpkYt9MalnCKnhk
+        If the CustomOp is passed multiple Tensor inputs with different device
+        types, it will dispatch to the registered implementation for the highest
+        priority device type among those present.
+        The supported device types, in order of priority, are {'cuda', 'cpu'}.
+        This API is used as a decorator (see examples).
+        Arguments:
+            device_types (str or Iterable[str]): the device type(s) to register the function for.
+        Examples::
+            >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA)
+            >>> import numpy as np
+            >>> from torch import Tensor
+            >>>
+            >>> @custom_op("my_library::numpy_cos")
+            >>> def numpy_cos(x: Tensor) -> Tensor:
+            >>>     ...
+            >>>
+            >>> # Register an implementation for CPU Tensors
+            >>> @numpy_cos.impl('cpu')
+            >>> def numpy_cos_impl_cpu(x):
+            >>>     return torch.from_numpy(np.cos(x.numpy()))
+            >>>
+            >>> # Register an implementation for CUDA Tensors
+            >>> @numpy_cos.impl('cuda')
+            >>> def numpy_cos_impl_cuda(x):
+            >>>     return torch.from_numpy(np.cos(x.cpu().numpy())).to(x.device)
+            >>>
+            >>> x = torch.randn(3)
+            >>> numpy_cos(x)  # calls numpy_cos_impl_cpu
+            >>>
+            >>> x_cuda = x.cuda()
+            >>> numpy_cos(x)  # calls numpy_cos_impl_cuda
+        """
+        if isinstance(device_types, str):
+            device_types = [device_types]
+        for device_type in device_types:
+            validate_device_type(device_type)
+        def inner(f):
+            for device_type in set(device_types):
+                self._check_doesnt_have_library_impl(device_type)
+                self._register_impl(device_type, f, stacklevel=_stacklevel)
+                dispatch_key = SUPPORTED_DEVICE_TYPE_TO_KEY[device_type]
+                library.impl(self._lib, self._opname, dispatch_key)(f)
+            return f
+        return inner
+    def _check_doesnt_have_library_impl(self, device_type):
+        if self._has_impl(device_type):
+            return
+        key = SUPPORTED_DEVICE_TYPE_TO_KEY[device_type]
+        if _C._dispatch_has_computed_kernel_for_dispatch_key(self._qualname, key):
+            raise RuntimeError(
+                f"impl(..., device_types={device_type}): the operator {self._qualname} "
+                f"already has an implementation for this device type via a "
+                f"pre-existing torch.library or TORCH_LIBRARY registration.")
+    def impl_factory(self) -> typing.Callable:
+        r"""Register an implementation for a factory function."""
+        def inner(f):
+            self._register_impl("factory", f)
+            library.impl(self._lib, self._opname, "BackendSelect")(f)
+            return f
+        return inner
+    def impl_abstract(self, _stacklevel=2) -> typing.Callable:
+        r"""Register an abstract implementation for this operator.
+        WARNING: please do not use this directly (and instead use the torch._custom_ops
+        APIs). Also please see the following for a detailed guide on custom ops.
+        https://docs.google.com/document/d/1aGWtgxV3HppuxQAdddyPrs74_aEntpkYt9MalnCKnhk
+        An "abstract implementation" specifies the behavior of this operator on
+        Tensors that carry no data. Given some input Tensors with certain properties
+        (sizes/strides/storage_offset/device), it specifies what the properties of
+        the output Tensors are.
+        The abstract implementation has the same signature as the operator.
+        It is run for both FakeTensors and meta tensors. To write an abstract
+        implementation, assume that all Tensor inputs to the operator are
+        regular CPU/CUDA/Meta tensors, but they do not have storage, and
+        you are trying to return regular CPU/CUDA/Meta tensor(s) as output.
+        The abstract implementation must consist of only PyTorch operations
+        (and may not directly access the storage or data of any input or
+        intermediate Tensors).
+        This API is used as a decorator (see examples).
+        Examples::
+            >>> import numpy as np
+            >>> from torch import Tensor
+            >>>
+            >>> # Example 1: an operator without data-dependent output shape
+            >>> @custom_op('my_library::custom_linear')
+            >>> def custom_linear(x: Tensor, weight: Tensor, bias: Tensor) -> Tensor:
+            >>>     ...
+            >>>
+            >>> @custom_linear.impl_abstract()
+            >>> def custom_linear_abstract(x, weight):
+            >>>     assert x.dim() == 2
+            >>>     assert weight.dim() == 2
+            >>>     assert bias.dim() == 1
+            >>>     assert x.shape[1] == weight.shape[1]
+            >>>     assert weight.shape[0] == bias.shape[0]
+            >>>     assert x.device == weight.device
+            >>>
+            >>>     return (x @ weight.t()) + bias
+            >>>
+            >>> # Example 2: an operator with data-dependent output shape
+            >>> @custom_op('my_library::custom_nonzero')
+            >>> def custom_nonzero(x: Tensor) -> Tensor:
+            >>>     ...
+            >>>
+            >>> @custom_nonzero.impl_abstract()
+            >>> def custom_nonzero_abstract(x):
+            >>>     # Number of nonzero-elements is data-dependent.
+            >>>     # Since we cannot peek at the data in an abstract impl,
+            >>>     # we use the ctx object to construct a new symint that
+            >>>     # represents the data-dependent size.
+            >>>     ctx = torch._custom_op.get_ctx()
+            >>>     nnz = ctx.create_unbacked_symint()
+            >>>     shape = [x.dim(), nnz]
+            >>>     result = x.new_empty(shape, dtype=torch.long)
+            >>>     return result
+            >>>
+            >>> @custom_nonzero.impl(['cpu', 'cuda'])
+            >>> def custom_nonzero_impl(x):
+            >>>     x_np = to_numpy(x)
+            >>>     res = np.stack(np.nonzero(x_np), axis=1)
+            >>>     # unbacked symbolic ints in PyTorch must be >= 2, so we
+            >>>     # constrain the range to at least 2
+            >>>     if res.shape[0] <= 1:
+            >>>         raise RuntimeError("not supported")
+            >>>     return torch.tensor(res, device=x.device)
+        """
+        def inner(f):
+            self._check_doesnt_have_library_meta_impl()
+            self._register_impl("abstract", f, stacklevel=_stacklevel)
+            location = self._get_impl("abstract").location
+            qualname = self._qualname
+            # Handle DispatchKey.Meta registration
+            @functools.wraps(f)
+            def f_with_ctx(*args, **kwargs):
+                def error_on_ctx():
+                    raise RuntimeError(
+                        f"Attempted to call get_ctx() for the meta implementation "
+                        f"for {qualname}."
+                        f"You have presumably called get_ctx() because the operator "
+                        f"has a data-dependent output shape; if so, there is no "
+                        f"such meta implementation and this error is the correct "
+                        f"behavior. Otherwise, please remove the call to get_ctx() "
+                        f"in the implementation registered with impl_abstract "
+                        f"at {location}"
+                    )
+                with torch._library.abstract_impl.set_ctx_getter(error_on_ctx):
+                    return f(*args, **kwargs)
+            self._lib.impl(self._opname, f_with_ctx, "Meta")
+            return f
+        return inner
+    def _check_can_register_backward(self):
+        def error(detail):
+            raise RuntimeError(
+                f"Cannot use torch._custom_ops APIs to register backward "
+                f"formula for {detail}. Got operator "
+                f"{self._qualname} with schema: {schema}"
+            )
+        schema = self._schema
+        if schema.kind() != SchemaKind.functional:
+            error("non-functional operator")
+        rets = schema.returns
+        if not schema.returns:
+            error("operator with no returns")
+        assert len(rets) > 0
+        is_non_mutating_view = any(
+            r.annotation is not None and not r.annotation.is_write for r in rets
+        )
+        if is_non_mutating_view:
+            error("operator that returns views")
+        # We make assumptions about the schema's return types.
+        allowed_return_types = {
+            BaseType(BaseTy.int): "int",
+            BaseType(BaseTy.SymInt): "SymInt",
+            BaseType(BaseTy.bool): "bool",
+            BaseType(BaseTy.float): "float",
+            BaseType(BaseTy.Tensor): "Tensor",
+            ListType(BaseType(BaseTy.Tensor), None): "List[Tensor]",
+        }
+        for ret in schema.returns:
+            if ret.type in allowed_return_types:
+                continue
+            error(f"operator with return not in {list(allowed_return_types.values())} (got {ret.type})")
+    def _check_doesnt_have_library_autograd_impl(self):
+        if self._registered_autograd_kernel_indirection:
+            return
+        if _C._dispatch_has_kernel_for_dispatch_key(self._qualname, "CompositeImplicitAutograd"):
+            raise RuntimeError(
+                f"impl_backward/impl_save_for_backward: the operator {self._qualname} "
+                f"already has an implementation for this device type via a "
+                f"pre-existing registration to DispatchKey::CompositeImplicitAutograd."
+                f"CompositeImplicitAutograd operators do not need an autograd formula; "
+                f"instead, the operator will decompose into its constituents and those "
+                f"can have autograd formulas defined on them.")
+        # We can improve this by adding "all Autograd<BACKEND> keys", but
+        # realistically people will just be using this API for CPU/CUDA for now.
+        for key in ["Autograd", "AutogradCPU", "AutogradCUDA"]:
+            if _C._dispatch_has_kernel_for_dispatch_key(self._qualname, key):
+                raise RuntimeError(
+                    f"impl_backward/impl_save_for_backward: "
+                    f"the operator {self._qualname} already has an Autograd kernel "
+                    f"registered to DispatchKey::{key} vi a pre-existing "
+                    f"torch.library or TORCH_LIBRARY registration. Please either "
+                    f"remove those registrations or don't use the torch._custom_ops APIs")
+    def _check_doesnt_have_library_meta_impl(self):
+        if self._has_impl("abstract"):
+            return
+        # If the user's operator is CompositeExplicitAutograd,
+        # allow them to impl_abstract. This is being pragmatic
+        # (existing custom ops may have CompositeExplicitAutograd
+        # registration that don't work with Meta kernels, so this
+        # gives them an escape hatch).
+        if (
+            _C._dispatch_has_kernel_for_dispatch_key(self._qualname, "CompositeExplicitAutograd")
+            and not _C._dispatch_has_kernel_for_dispatch_key(self._qualname, "Meta")
+        ):
+            return
+        # Otherwise, if the user's already has a Meta kernel or their
+        # op is CompositeImplicitAutograd or some other alias dispatch key,
+        # raise.
+        # Special case for CompositeImplicitAutograd
+        if _C._dispatch_has_kernel_for_dispatch_key(self._qualname, "CompositeImplicitAutograd"):
+            raise RuntimeError(
+                f"impl_abstract(...): the operator {self._qualname} "
+                f"already has an implementation for this device type via a "
+                f"pre-existing registration to DispatchKey::CompositeImplicitAutograd."
+                f"CompositeImplicitAutograd operators do not need an abstract impl; "
+                f"instead, the operator will decompose into its constituents and those "
+                f"can have abstract impls defined on them.")
+        if _C._dispatch_has_kernel_for_dispatch_key(self._qualname, "Meta"):
+            raise RuntimeError(
+                f"impl_abstract(...): the operator {self._qualname} "
+                f"already has an DispatchKey::Meta implementation via a "
+                f"pre-existing torch.library or TORCH_LIBRARY registration. "
+                f"Please either remove that registration or don't call impl_abstract.")
+    # NOTE ["backward", "save_for_backward", and "autograd"]
+    # As a part of the explicit autograd API, a user must provide us
+    # a "save_for_backward" function and a "backward" function.
+    # When both of these have been provided, then we automatically
+    # construct the "autograd" kernel.
+    def _register_autograd_kernel(self):
+        assert self._has_impl("backward")
+        assert self._has_impl("save_for_backward")
+        kernel = construct_autograd_kernel(
+            self._schema,
+            self._output_differentiability,
+            self,
+            get_op(self._qualname),
+            self._get_impl("save_for_backward").func,
+            self._get_impl("backward").func)
+        self._register_impl("autograd", kernel)
+    def impl_save_for_backward(self, _stacklevel=2):
+        r"""Register a function that tells us what to save for backward.
+        Please see impl_backward for more details.
+        """
+        def inner(f):
+            self._check_can_register_backward()
+            self._check_doesnt_have_library_autograd_impl()
+            if not self._registered_autograd_kernel_indirection:
+                self._register_autograd_kernel_indirection()
+            self._register_impl("save_for_backward", f, stacklevel=_stacklevel)
+            if self._has_impl("backward"):
+                self._register_autograd_kernel()
+        return inner
+    def impl_backward(self, output_differentiability=None, _stacklevel=2):
+        r"""Registers a backward formula.
+        WARNING: if you're a user, please do not use this directly
+        (instead use the torch._custom_ops APIs).
+        Also please see the following for a detailed guide on custom ops.
+        https://docs.google.com/document/d/1aGWtgxV3HppuxQAdddyPrs74_aEntpkYt9MalnCKnhk
+        In order for the CustomOp to work with autograd, you need to register
+        a backward formula. There are two pieces to this:
+        1. You must give us a function to specify what to save for backward.
+           Call this the "save for backward" function.
+        2. You must give us a function that computes gradients. Call this the
+           "backward" function.
+        Use `impl_save_for_backward` to define a "save for backward" function
+        that specifies what gets saved for backward. The function should accept
+        two arguments ``(inputs, output)`` and return the quantities to be saved
+        for backward.
+        During runtime, when you call the CustomOp, PyTorch will invoke the
+        "save for backward" function with the inputs and output of the CustomOp.
+        Use `impl_backward` to define the "backward" function. The backward
+        function must accept ``(ctx, saved, *grads)``:
+        - ``ctx`` is a context object where we may provide information
+        - ``saved`` is exactly what gets returned from the "save for backward"
+          function
+        - ``grads`` is one or more gradients. The number of gradients matches
+          the number of outputs of the CustomOp.
+        The backward function must return a dict that maps the name of
+        an input to the CustomOp to its corresponding gradient. All inputs that
+        were declared to be Tensors in the CustomOp definition must be accounted
+        for in the dict. The gradient may be a Tensor or None.
+        """
+        if output_differentiability is not None:
+            def yell():
+                raise RuntimeError(
+                    f"impl_backward(output_differentiability): expected "
+                    f"output_differentiability to be a list of bools with "
+                    f"length equal to the number of outputs of this CustomOp "
+                    f"got: {output_differentiability}")
+            if not isinstance(output_differentiability, list):
+                yell()
+            for diff in output_differentiability:
+                if not isinstance(diff, bool):
+                    yell()
+            if len(self._schema.returns) != len(output_differentiability):
+                yell()
+        def inner(f):
+            self._check_can_register_backward()
+            self._check_doesnt_have_library_autograd_impl()
+            if not self._registered_autograd_kernel_indirection:
+                self._register_autograd_kernel_indirection()
+            self._register_impl("backward", f, stacklevel=_stacklevel)
+            self._output_differentiability = output_differentiability
+            if self._has_impl("save_for_backward"):
+                self._register_autograd_kernel()
+        return inner
+@dataclasses.dataclass
+class FuncAndLocation:
+    func: typing.Callable
+    location: str
+def find_ophandle_or_throw(cpp_ns: str, operator_name: OperatorName):
+    overload_name = (
+        "" if operator_name.overload_name is None else operator_name.overload_name
+    )
+    return _C._dispatch_find_schema_or_throw(
+        f"{cpp_ns}::{str(operator_name.name)}", overload_name
+    )
+def validate_namespace(ns: str) -> None:
+    if "." in ns:
+        raise ValueError(
+            f'custom_op(..., ns="{ns}"): expected ns to not contain any . (and be a '
+            f"valid variable name)"
+        )
+    if ns in RESERVED_NS:
+        raise ValueError(
+            f"custom_op(..., ns='{ns}'): '{ns}' is a reserved namespace, "
+            f"please choose something else. "
+        )
+def validate_schema(schema: FunctionSchema) -> None:
+    if not torch._library.utils.is_functional_schema(schema):
+        raise ValueError(
+            f"custom_op only supports functional operators "
+            f"(ops that do not mutate any inputs, do not return "
+            f"views of the inputs, and has at least one return). "
+            f"Got the following non-functional schema: {schema}"
+        )
+    # For simplicity: don't allow self arguments
+    if schema.arguments.self_arg is not None:
+        raise ValueError(
+            f"custom_op does not support arguments named 'self'. Please "
+            f"rename your argument. Got: {schema}"
+        )
+def parse_qualname(qualname: str) -> typing.Tuple[str, str]:
+    names = qualname.split("::", 1)
+    if len(names) != 2:
+        raise ValueError(f"Expected there to be a namespace in {qualname}, i.e. The "
+                         f"operator name should look something like ns::foo")
+    if '.' in names[1]:
+        raise ValueError(f"The torch.custom_ops APIs do not handle overloads, "
+                         f"i.e. operator names with '.' in them. "
+                         f"Please name your operator something like ns::foo. "
+                         f"Got: {qualname}")
+    return names[0], names[1]
+def validate_device_type(device_type: str) -> None:
+    if device_type not in SUPPORTED_DEVICE_TYPE_TO_KEY:
+        raise ValueError(
+            f"CustomOp.impl(device_types=[{device_type}, ...]): we only support device_type "
+            f"in {SUPPORTED_DEVICE_TYPE_TO_KEY.keys()}."
+        )
+def supported_param(param: inspect.Parameter) -> bool:
+    return param.kind in (
+        inspect.Parameter.POSITIONAL_OR_KEYWORD,
+        inspect.Parameter.KEYWORD_ONLY,
+    )
+def validate_function_matches_schema(
+    schema: FunctionSchema, func: typing.Callable
+) -> None:
+    sig = inspect.signature(func)
+    if not all(supported_param(p) for _, p in sig.parameters.items()):
+        raise ValueError(
+            f"custom_op(..., manual_schema)(func): positional-only args, "
+            f"varargs, and kwargs are not supported. Please rewrite `func` "
+            f"to not have them. Got `func` with signature: {sig}"
+        )
+    if (
+        any(
+            p.annotation is not inspect.Parameter.empty
+            for _, p in sig.parameters.items()
+        )
+        or sig.return_annotation is not inspect.Signature.empty
+    ):
+        raise ValueError(
+            f"custom_op(..., manual_schema)(func): When passing in a manual "
+            f"schema, we expect `func` to have no type annotations to avoid "
+            f"ambiguity. Got `func` with signature: {sig}"
+        )
+    positional = [
+        (name, param)
+        for name, param in sig.parameters.items()
+        if param.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD
+    ]
+    kwargonly = [
+        (name, param)
+        for name, param in sig.parameters.items()
+        if param.kind == inspect.Parameter.KEYWORD_ONLY
+    ]
+    def error():
+        raise ValueError(
+            f"custom_op(..., manual_schema)(func): When passing in a manual "
+            f"schema, we expect `func`'s signature to match `manual_schema` "
+            f"(aside from type annotations). "
+            f"func's signature: {sig}, manual_schema: {schema}"
+        )
+    def error_default_args():
+        raise ValueError(
+            f"custom_op(..., manual_schema)(func): "
+            f"neither func nor manual_schema should have default "
+            f"arguments. Got "
+            f"func's signature: {sig}, manual_schema: {schema}"
+        )
+    def compare(sig_args, schema_args):
+        if len(sig_args) != len(schema_args):
+            error()
+        for (name, param), arg in zip(sig_args, schema_args):
+            if name != arg.name:
+                error()
+            if param.default is not inspect.Parameter.empty or arg.default is not None:
+                error_default_args()
+    compare(positional, schema.arguments.flat_positional)
+    compare(kwargonly, schema.arguments.flat_kwarg_only)
+def infer_schema(prototype_function: typing.Callable) -> str:
+    sig = inspect.signature(prototype_function)
+    def error_fn(what):
+        raise ValueError(
+            f"custom_op(...)(func): {what} " f"Got func with signature {sig})"
+        )
+    params = [
+        parse_param(name, param, error_fn) for name, param in sig.parameters.items()
+    ]
+    ret = parse_return(sig.return_annotation, error_fn)
+    return f"({', '.join(params)}) -> {ret}"
+def parse_param(name, param, error_fn):
+    if not supported_param(param):
+        error_fn("We do not support positional-only args, varargs, or varkwargs.")
+    if param.annotation is inspect.Parameter.empty:
+        error_fn(f"Parameter {name} must have a type annotation.")
+    if param.annotation not in SUPPORTED_PARAM_TYPES.keys():
+        error_fn(
+            f"Parameter {name} has unsupported type {param.annotation}. "
+            f"The valid types are: {SUPPORTED_PARAM_TYPES.keys()}."
+        )
+    if param.default is not inspect.Parameter.empty:
+        error_fn(
+            f"Parameter {name} has a default value; this is not supported. "
+            f"If you want to use default values then create a function with "
+            f"default values that calls the CustomOp"
+        )
+    return f"{SUPPORTED_PARAM_TYPES[param.annotation]} {name}"
+def derived_types(
+    base_type, cpp_type, list_base, optional_base_list, optional_list_base
+):
+    result = [
+        (base_type, cpp_type),
+        (typing.Optional[base_type], f"{cpp_type}?"),
+    ]
+    if list_base:
+        result.append((typing.Sequence[base_type], f"{cpp_type}[]"))  # type: ignore[valid-type]
+    if optional_base_list:
+        result.append((typing.Sequence[typing.Optional[base_type]], f"{cpp_type}?[]"))  # type: ignore[valid-type]
+    if optional_list_base:
+        result.append((typing.Optional[typing.Sequence[base_type]], f"{cpp_type}[]?"))  # type: ignore[valid-type]
+    return result
+def get_supported_param_types():
+    data = [
+        # (python type, schema type, type[] variant, type?[] variant, type[]? variant
+        (torch.Tensor, "Tensor", True, True, False),
+        (int, "SymInt", True, False, True),
+        (float, "float", True, False, True),
+        (bool, "bool", True, False, True),
+        (str, "str", False, False, False),
+        (torch.types.Number, "Scalar", True, False, False),
+        (torch.dtype, "ScalarType", False, False, False),
+        (torch.device, "Device", False, False, False),
+    ]
+    result = []
+    for line in data:
+        result.extend(derived_types(*line))
+    return dict(result)
+SUPPORTED_RETURN_TYPES = {
+    torch.Tensor: "Tensor",
+    typing.List[torch.Tensor]: "Tensor[]",
+    int: "SymInt",
+    float: "float",
+    bool: "bool",
+    torch.types.Number: "Scalar",
+}
+def parse_return(annotation, error_fn):
+    origin = typing.get_origin(annotation)
+    if origin is not tuple:
+        if annotation not in SUPPORTED_RETURN_TYPES.keys():
+            error_fn(
+                f"Return has unsupported type {annotation}. "
+                f"The valid types are: {SUPPORTED_RETURN_TYPES}."
+            )
+        return SUPPORTED_RETURN_TYPES[annotation]
+    args = typing.get_args(annotation)
+    for arg in args:
+        if arg not in SUPPORTED_RETURN_TYPES:
+            error_fn(
+                f"Return has unsupported type {annotation}. "
+                f"The valid types are: {SUPPORTED_RETURN_TYPES}."
+            )
+    return "(" + ", ".join([SUPPORTED_RETURN_TYPES[arg] for arg in args]) + ")"
+SUPPORTED_PARAM_TYPES = get_supported_param_types()
+def report_error_callback(custom_op: typing.Any, key: str) -> None:
+    if key == "Undefined":
+        raise NotImplementedError(
+            f"{custom_op}: There were no Tensor inputs to this operator "
+            f"(e.g. you passed an empty list of Tensors). If your operator is a "
+            f"factory function (that is, it takes no Tensors and constructs "
+            f"a new one), then please use CustomOp.impl_factory to register "
+            f"an implementation for it"
+        )
+    if key == "Meta":
+        raise NotImplementedError(
+            f"{custom_op}: when running with device='Meta' tensors: there is no "
+            f"abstract impl registered for this CustomOp. Please register one via "
+            f"CustomOp.impl_abstract to get this CustomOp to work with Meta tensors"
+        )
+    if key in ("CPU", "CUDA"):
+        device = key.lower()
+        raise NotImplementedError(
+            f"{custom_op}: when running with device='{device}' tensors: there is no "
+            f"{device} impl registered for this CustomOp. Please register one via "
+            f"CustomOp.impl(device_type='{device}')"
+        )
+    raise NotImplementedError(
+        f"{custom_op}: No implementation for dispatch key {key}. It is likely "
+        f"that we have not added this functionality yet, please either open an "
+        f"issue or if you're feeling adventurous, use the low-level "
+        f"torch.library API"
+    )
+def custom_op_from_existing(op):
+    ns = op.namespace
+    lib = torch.library.Library(ns, "FRAGMENT")
+    name = op.name().split("::")[-1]
+    schema_str = str(op._schema)
+    # CustomOp expects the schema string without the namespace
+    schema_str = schema_str.split("::")[-1]
+    schema = FunctionSchema.parse(schema_str)
+    return CustomOp(lib, ns, schema, name, op, _private_access=True)
+def get_op(qualname):
+    def error_not_found():
+        raise ValueError(
+            f"Could not find the operator {qualname}. Please make sure you have "
+            f"already registered the operator and (if registered from C++) "
+            f"loaded it via torch.ops.load_library.")
+    ns, name = parse_qualname(qualname)
+    if not hasattr(torch.ops, ns):
+        error_not_found()
+    opnamespace = getattr(torch.ops, ns)
+    if not hasattr(opnamespace, name):
+        error_not_found()
+    packet = getattr(opnamespace, name)
+    if not hasattr(packet, 'default'):
+        error_not_found()
+    return packet.default
+def _find_custom_op(qualname, also_check_torch_library=False):
+    if qualname in global_registry:
+        return global_registry[qualname]
+    if not also_check_torch_library:
+        raise RuntimeError(
+            f"Could not find custom op \"{qualname}\". Did you register it via "
+            f"the torch._custom_ops API?")
+    overload = get_op(qualname)
+    result = custom_op_from_existing(overload)
+    return result
+def get_abstract_impl(qualname):
+    if qualname not in torch._custom_op.impl.global_registry:
+        return None
+    custom_op = torch._custom_op.impl.global_registry[qualname]
+    if custom_op is None:
+        return None
+    if not custom_op._has_impl("abstract"):
+        return None
+    return custom_op._get_impl("abstract").func
+def _custom_op_with_schema(qualname, schema, needs_fixed_stride_order=True):
+    ns, name = qualname.split("::")
+    schema_str = f"{name}{schema}"
+    function_schema = FunctionSchema.parse(schema_str)
+    validate_schema(function_schema)
+    tags = [torch._C.Tag.needs_fixed_stride_order] if needs_fixed_stride_order else []
+    lib = library.Library(ns, "FRAGMENT")
+    lib.define(schema_str, tags=tags)
+    ophandle = find_ophandle_or_throw(ns, function_schema.name)
+    result = CustomOp(lib, ns, function_schema, name, ophandle, _private_access=True)
+    result._register_autograd_kernel_indirection()
+    torch._C._dispatch_set_report_error_callback(
+        ophandle, functools.partial(report_error_callback, weakref.proxy(result))
+    )
+    return get_op(qualname)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/fft.py ADDED Viewed

	@@ -0,0 +1,590 @@

+import math
+from typing import Iterable, List, Literal, NamedTuple, Optional, Sequence, Tuple, Union
+import torch
+import torch._prims as prims
+import torch._prims_common as utils
+from torch._decomp import register_decomposition
+from torch._prims_common import DimsType, ShapeType, TensorLikeType
+from torch._prims_common.wrappers import _maybe_convert_to_dtype, out_wrapper
+__all__ = [
+    # Transforms
+    "fft",
+    "fft2",
+    "fftn",
+    "hfft",
+    "hfft2",
+    "hfftn",
+    "rfft",
+    "rfft2",
+    "rfftn",
+    "ifft",
+    "ifft2",
+    "ifftn",
+    "ihfft",
+    "ihfft2",
+    "ihfftn",
+    "irfft",
+    "irfft2",
+    "irfftn",
+    # Helpers
+    "fftshift",
+    "ifftshift",
+]
+NormType = Union[None, Literal["forward", "backward", "ortho"]]
+_NORM_VALUES = {None, "forward", "backward", "ortho"}
+aten = torch._ops.ops.aten
+def _apply_norm(
+    x: TensorLikeType, norm: NormType, signal_numel: int, forward: bool
+) -> TensorLikeType:
+    """Apply normalization to the un-normalized FFT result"""
+    torch._check(norm in _NORM_VALUES, lambda: f"Invalid normalization mode: {norm}")
+    if norm == "ortho":
+        return x * (1 / math.sqrt(signal_numel))
+    normalize = (not forward and (norm is None or norm == "backward")) or (
+        forward and norm == "forward"
+    )
+    return x * (1 / signal_numel) if normalize else x
+def _promote_type_fft(
+    dtype: torch.dtype, require_complex: bool, device: torch.device
+) -> torch.dtype:
+    """Helper to promote a dtype to one supported by the FFT primitives"""
+    if dtype.is_complex:
+        return dtype
+    # Promote integral to default float type
+    if not dtype.is_floating_point:
+        dtype = torch.get_default_dtype()
+    allowed_types = [torch.float32, torch.float64]
+    maybe_support_half = device.type in ["cuda", "meta"]
+    if maybe_support_half:
+        allowed_types.append(torch.float16)
+    torch._check(dtype in allowed_types, lambda: f"Unsupported dtype {dtype}")
+    if require_complex:
+        dtype = utils.corresponding_complex_dtype(dtype)
+    return dtype
+def _maybe_promote_tensor_fft(
+    t: TensorLikeType, require_complex: bool = False
+) -> TensorLikeType:
+    """Helper to promote a tensor to a dtype supported by the FFT primitives"""
+    cur_type = t.dtype
+    new_type = _promote_type_fft(cur_type, require_complex, t.device)
+    return _maybe_convert_to_dtype(t, new_type)  # type: ignore[return-value]
+def _resize_fft_input(
+    x: TensorLikeType, dims: Tuple[int, ...], sizes: Tuple[int, ...]
+) -> TensorLikeType:
+    """
+    Fixes the shape of x such that x.size(dims[i]) == sizes[i],
+    either by zero-padding, or by slicing x starting from 0.
+    """
+    assert len(dims) == len(sizes)
+    must_copy = False
+    x_sizes = x.shape
+    pad_amount = [0] * len(x_sizes) * 2
+    for i in range(len(dims)):
+        if sizes[i] == -1:
+            continue
+        if x_sizes[dims[i]] < sizes[i]:
+            must_copy = True
+            pad_idx = len(pad_amount) - 2 * dims[i] - 1
+            pad_amount[pad_idx] = sizes[i] - x_sizes[dims[i]]
+        if x_sizes[dims[i]] > sizes[i]:
+            x = x.narrow(dims[i], 0, sizes[i])
+    return torch.constant_pad_nd(x, pad_amount) if must_copy else x
+def _fft_c2r(
+    func_name: str,
+    input: TensorLikeType,
+    n: Optional[int],
+    dim: int,
+    norm: NormType,
+    forward: bool,
+) -> TensorLikeType:
+    """Common code for performing any complex to real FFT (irfft or hfft)"""
+    input = _maybe_promote_tensor_fft(input, require_complex=True)
+    dims = (utils.canonicalize_dim(input.ndim, dim, wrap_scalar=False),)
+    last_dim_size = n if n is not None else 2 * (input.shape[dim] - 1)
+    torch._check(
+        last_dim_size >= 1,
+        lambda: f"Invalid number of data points ({last_dim_size}) specified",
+    )
+    if n is not None:
+        input = _resize_fft_input(input, dims=dims, sizes=(last_dim_size // 2 + 1,))
+    if forward:
+        input = torch.conj(input)
+    output = prims.fft_c2r(input, dim=dims, last_dim_size=last_dim_size)
+    return _apply_norm(output, norm=norm, signal_numel=last_dim_size, forward=forward)
+def _fft_r2c(
+    func_name: str,
+    input: TensorLikeType,
+    n: Optional[int],
+    dim: int,
+    norm: NormType,
+    forward: bool,
+    onesided: bool,
+) -> TensorLikeType:
+    """Common code for performing any real to complex FFT (rfft or ihfft)"""
+    torch._check(
+        not input.dtype.is_complex,
+        lambda: f"{func_name} expects a floating point input tensor, but got {input.dtype}",
+    )
+    input = _maybe_promote_tensor_fft(input)
+    dims = (utils.canonicalize_dim(input.ndim, dim, wrap_scalar=False),)
+    dim_size = n if n is not None else input.shape[dim]
+    torch._check(
+        dim_size >= 1, lambda: f"Invalid number of data points ({dim_size}) specified"
+    )
+    if n is not None:
+        input = _resize_fft_input(input, dims, (n,))
+    ret = prims.fft_r2c(input, dim=dims, onesided=onesided)
+    ret = _apply_norm(ret, norm, dim_size, forward)
+    return ret if forward else torch.conj(ret)
+def _fft_c2c(
+    func_name: str,
+    input: TensorLikeType,
+    n: Optional[int],
+    dim: int,
+    norm: NormType,
+    forward: bool,
+) -> TensorLikeType:
+    """Common code for performing any complex to complex FFT (fft or ifft)"""
+    torch._check(
+        input.dtype.is_complex,
+        lambda: f"{func_name} expects a complex input tensor, but got {input.dtype}",
+    )
+    dims = (utils.canonicalize_dim(input.ndim, dim, wrap_scalar=False),)
+    dim_size = n if n is not None else input.shape[dim]
+    torch._check(
+        dim_size >= 1, lambda: f"Invalid number of data points ({dim_size}) specified"
+    )
+    if n is not None:
+        input = _resize_fft_input(input, dims, (n,))
+    ret = prims.fft_c2c(input, dim=dims, forward=forward)
+    return _apply_norm(ret, norm, dim_size, forward)
+@register_decomposition(aten.fft_fft)
+@out_wrapper()
+def fft(
+    input: TensorLikeType,
+    n: Optional[int] = None,
+    dim: int = -1,
+    norm: NormType = None,
+) -> TensorLikeType:
+    if input.dtype.is_complex:
+        return _fft_c2c("fft", input, n, dim, norm, forward=True)
+    else:
+        return _fft_r2c("fft", input, n, dim, norm, forward=True, onesided=False)
+@register_decomposition(aten.fft_ifft)
+@out_wrapper()
+def ifft(
+    input: TensorLikeType,
+    n: Optional[int] = None,
+    dim: int = -1,
+    norm: NormType = None,
+) -> TensorLikeType:
+    if input.dtype.is_complex:
+        return _fft_c2c("ifft", input, n, dim, norm, forward=False)
+    else:
+        return _fft_r2c("ifft", input, n, dim, norm, forward=False, onesided=False)
+@register_decomposition(aten.fft_rfft)
+@out_wrapper()
+def rfft(
+    input: TensorLikeType,
+    n: Optional[int] = None,
+    dim: int = -1,
+    norm: NormType = None,
+) -> TensorLikeType:
+    return _fft_r2c("rfft", input, n, dim, norm, forward=True, onesided=True)
+@register_decomposition(aten.fft_irfft)
+@out_wrapper()
+def irfft(
+    input: TensorLikeType,
+    n: Optional[int] = None,
+    dim: int = -1,
+    norm: NormType = None,
+) -> TensorLikeType:
+    return _fft_c2r("irfft", input, n, dim, norm, forward=False)
+@register_decomposition(aten.fft_hfft)
+@out_wrapper()
+def hfft(
+    input: TensorLikeType,
+    n: Optional[int] = None,
+    dim: int = -1,
+    norm: NormType = None,
+) -> TensorLikeType:
+    return _fft_c2r("hfft", input, n, dim, norm, forward=True)
+@register_decomposition(aten.fft_ihfft)
+@out_wrapper()
+def ihfft(
+    input: TensorLikeType,
+    n: Optional[int] = None,
+    dim: int = -1,
+    norm: NormType = None,
+) -> TensorLikeType:
+    return _fft_r2c("ihfft", input, n, dim, norm, forward=False, onesided=True)
+class _ShapeAndDims(NamedTuple):
+    shape: Tuple[int, ...]
+    dims: Tuple[int, ...]
+def _canonicalize_fft_shape_and_dim_args(
+    input: TensorLikeType, shape: Optional[ShapeType], dim: Optional[DimsType]
+) -> _ShapeAndDims:
+    """Convert the shape and dim arguments into a canonical form where neither are optional"""
+    input_dim = input.ndim
+    input_sizes = input.shape
+    if dim is not None:
+        if not isinstance(dim, Sequence):
+            dim = (dim,)
+        ret_dims = utils.canonicalize_dims(input_dim, dim, wrap_scalar=False)
+        # Check dims are unique
+        torch._check(
+            len(set(ret_dims)) == len(ret_dims), lambda: "FFT dims must be unique"
+        )
+    if shape is not None:
+        if not isinstance(shape, Sequence):
+            shape = (shape,)
+        # Has shape, might have dim
+        torch._check(
+            dim is None or len(dim) == len(shape),
+            lambda: "When given, dim and shape arguments must have the same length",
+        )
+        transform_ndim = len(shape)
+        torch._check(
+            transform_ndim <= input_dim,
+            lambda: f"Got shape with {transform_ndim} values but input tensor "
+            f"only has {input_dim} dimensions.",
+        )
+        # If shape is given, dims defaults to the last len(shape) dimensions
+        if dim is None:
+            ret_dims = tuple(range(input_dim - transform_ndim, input_dim))
+        # Translate any -1 values in shape to the default length
+        ret_shape = tuple(
+            s if s != -1 else input_sizes[d] for (s, d) in zip(shape, ret_dims)  # type: ignore[possibly-undefined]
+        )
+    elif dim is None:
+        # No shape, no dim
+        ret_dims = tuple(range(input_dim))
+        ret_shape = tuple(input_sizes)
+    else:
+        # No shape, has dim
+        ret_shape = tuple(input_sizes[d] for d in ret_dims)  # type: ignore[possibly-undefined]
+    for n in ret_shape:
+        torch._check(n > 0, lambda: f"Invalid number of data points ({n}) specified")
+    return _ShapeAndDims(shape=ret_shape, dims=ret_dims)  # type: ignore[possibly-undefined]
+def _prod(xs: Iterable[int]) -> int:
+    """Compute product of a list"""
+    prod = 1
+    for x in xs:
+        prod *= x
+    return prod
+def _fftn_c2c(
+    function_name: str,
+    input: TensorLikeType,
+    shape: Tuple[int, ...],
+    dim: Tuple[int, ...],
+    norm: NormType,
+    forward: bool,
+) -> TensorLikeType:
+    """Common code for n-dimensional complex to complex FFTs (fftn or ifftn)"""
+    torch._check(
+        input.dtype.is_complex,
+        lambda: f"{function_name} expects a complex input tensor, "
+        f"but got {input.dtype}",
+    )
+    x = _resize_fft_input(input, dim, shape)
+    output = prims.fft_c2c(x, dim=dim, forward=forward)
+    return _apply_norm(output, norm=norm, signal_numel=_prod(shape), forward=forward)
+@register_decomposition(aten.fft_fftn)
+@out_wrapper()
+def fftn(
+    input: TensorLikeType,
+    s: Optional[ShapeType] = None,
+    dim: Optional[DimsType] = None,
+    norm: NormType = None,
+) -> TensorLikeType:
+    (shape, dim) = _canonicalize_fft_shape_and_dim_args(input, s, dim)
+    x = _maybe_promote_tensor_fft(input, require_complex=True)
+    return _fftn_c2c("fftn", x, shape, dim, norm, forward=True)
+@register_decomposition(aten.fft_ifftn)
+@out_wrapper()
+def ifftn(
+    input: TensorLikeType,
+    s: Optional[ShapeType] = None,
+    dim: Optional[DimsType] = None,
+    norm: NormType = None,
+) -> TensorLikeType:
+    (shape, dim) = _canonicalize_fft_shape_and_dim_args(input, s, dim)
+    x = _maybe_promote_tensor_fft(input, require_complex=True)
+    return _fftn_c2c("ifftn", x, shape, dim, norm, forward=False)
+@register_decomposition(aten.fft_rfftn)
+@out_wrapper()
+def rfftn(
+    input: TensorLikeType,
+    s: Optional[ShapeType] = None,
+    dim: Optional[DimsType] = None,
+    norm: NormType = None,
+) -> TensorLikeType:
+    torch._check(
+        not input.dtype.is_complex,
+        lambda: f"rfftn expects a real-valued input tensor, but got {input.dtype}",
+    )
+    shape, dim = _canonicalize_fft_shape_and_dim_args(input, s, dim)
+    input = _maybe_promote_tensor_fft(input, require_complex=False)
+    input = _resize_fft_input(input, dim, shape)
+    out = prims.fft_r2c(input, dim=dim, onesided=True)
+    return _apply_norm(out, norm=norm, signal_numel=_prod(shape), forward=True)
+@register_decomposition(aten.fft_ihfftn)
+@out_wrapper()
+def ihfftn(
+    input: TensorLikeType,
+    s: Optional[ShapeType] = None,
+    dim: Optional[DimsType] = None,
+    norm: NormType = None,
+) -> TensorLikeType:
+    torch._check(
+        not input.dtype.is_complex,
+        lambda: f"ihfftn expects a real-valued input tensor, but got {input.dtype}",
+    )
+    shape, dim = _canonicalize_fft_shape_and_dim_args(input, s, dim)
+    torch._check(len(shape) > 0, lambda: "ihfftn must transform at least one axis")
+    input = _maybe_promote_tensor_fft(input, require_complex=False)
+    input = _resize_fft_input(input, dim, shape)
+    tmp = prims.fft_r2c(input, dim=dim[-1:], onesided=True)
+    if len(dim) == 1:
+        tmp = _apply_norm(tmp, norm=norm, signal_numel=shape[0], forward=False)
+        return prims.conj(tmp)
+    tmp = prims.conj_physical(tmp)
+    tmp = prims.fft_c2c(tmp, dim=dim[:-1], forward=False)
+    return _apply_norm(tmp, norm=norm, signal_numel=_prod(shape), forward=False)
+class _CanonicalizeC2rReturn(NamedTuple):
+    shape: Tuple[int, ...]
+    dim: Tuple[int, ...]
+    last_dim_size: int
+def _canonicalize_fft_c2r_shape_and_dim_args(
+    fname: str,
+    input: TensorLikeType,
+    s: Optional[ShapeType],
+    dim: Optional[DimsType],
+) -> _CanonicalizeC2rReturn:
+    """Canonicalize shape and dim arguments for n-dimensional c2r transforms,
+    as well as calculating the last_dim_size which is shape[dim[-1]] for the output"""
+    (shape, dim) = _canonicalize_fft_shape_and_dim_args(input, s, dim)
+    torch._check(len(shape) > 0, lambda: f"{fname} must transform at least one axis")
+    if s is None or s[-1] == -1:
+        last_dim_size = 2 * (input.shape[dim[-1]] - 1)
+    else:
+        last_dim_size = shape[-1]
+    torch._check(
+        last_dim_size >= 1,
+        lambda: f"Invalid number of data points ({last_dim_size}) specified",
+    )
+    shape_list = list(shape)
+    shape_list[-1] = last_dim_size // 2 + 1
+    return _CanonicalizeC2rReturn(
+        shape=tuple(shape_list), dim=dim, last_dim_size=last_dim_size
+    )
+@register_decomposition(aten.fft_irfftn)
+@out_wrapper()
+def irfftn(
+    input: TensorLikeType,
+    s: Optional[ShapeType] = None,
+    dim: Optional[DimsType] = None,
+    norm: NormType = None,
+) -> TensorLikeType:
+    shape, dim, last_dim_size = _canonicalize_fft_c2r_shape_and_dim_args(
+        "irfftn", input, s, dim
+    )
+    input = _maybe_promote_tensor_fft(input, require_complex=True)
+    input = _resize_fft_input(input, dim, shape)
+    out = prims.fft_c2r(input, dim=dim, last_dim_size=last_dim_size)
+    return _apply_norm(out, norm, _prod(out.shape[d] for d in dim), forward=False)
+@register_decomposition(aten.fft_hfftn)
+@out_wrapper()
+def hfftn(
+    input: TensorLikeType,
+    s: Optional[ShapeType] = None,
+    dim: Optional[DimsType] = None,
+    norm: NormType = None,
+) -> TensorLikeType:
+    shape, dim, last_dim_size = _canonicalize_fft_c2r_shape_and_dim_args(
+        "hfftn", input, s, dim
+    )
+    input = _maybe_promote_tensor_fft(input, require_complex=True)
+    input = _resize_fft_input(input, dim, shape)
+    tmp = prims.fft_c2c(input, dim=dim[:-1], forward=True) if len(dim) > 1 else input
+    tmp = _apply_norm(tmp, norm, _prod(shape[:-1]), forward=True)
+    tmp = prims.conj_physical(tmp)
+    out = prims.fft_c2r(tmp, dim=dim[-1:], last_dim_size=last_dim_size)
+    return _apply_norm(out, norm, last_dim_size, forward=True)
+@register_decomposition(aten.fft_fft2)
+@out_wrapper()
+def fft2(
+    input: TensorLikeType,
+    s: Optional[ShapeType] = None,
+    dim: Optional[DimsType] = (-2, -1),
+    norm: NormType = None,
+) -> TensorLikeType:
+    return torch.fft.fftn(input, s=s, dim=dim, norm=norm)
+@register_decomposition(aten.fft_ifft2)
+@out_wrapper()
+def ifft2(
+    input: TensorLikeType,
+    s: Optional[ShapeType] = None,
+    dim: Optional[DimsType] = (-2, -1),
+    norm: NormType = None,
+) -> TensorLikeType:
+    return torch.fft.ifftn(input, s=s, dim=dim, norm=norm)
+@register_decomposition(aten.fft_rfft2)
+@out_wrapper()
+def rfft2(
+    input: TensorLikeType,
+    s: Optional[ShapeType] = None,
+    dim: Optional[DimsType] = (-2, -1),
+    norm: NormType = None,
+) -> TensorLikeType:
+    return torch.fft.rfftn(input, s=s, dim=dim, norm=norm)
+@register_decomposition(aten.fft_irfft2)
+@out_wrapper()
+def irfft2(
+    input: TensorLikeType,
+    s: Optional[ShapeType] = None,
+    dim: Optional[DimsType] = (-2, -1),
+    norm: NormType = None,
+) -> TensorLikeType:
+    return torch.fft.irfftn(input, s=s, dim=dim, norm=norm)
+@register_decomposition(aten.fft_hfft2)
+@out_wrapper()
+def hfft2(
+    input: TensorLikeType,
+    s: Optional[ShapeType] = None,
+    dim: Optional[DimsType] = (-2, -1),
+    norm: NormType = None,
+) -> TensorLikeType:
+    return torch.fft.hfftn(input, s=s, dim=dim, norm=norm)
+@register_decomposition(aten.fft_ihfft2)
+@out_wrapper()
+def ihfft2(
+    input: TensorLikeType,
+    s: Optional[ShapeType] = None,
+    dim: Optional[DimsType] = (-2, -1),
+    norm: NormType = None,
+) -> TensorLikeType:
+    return torch.fft.ihfftn(input, s=s, dim=dim, norm=norm)
+def _default_alldims(dim: Optional[DimsType], x: TensorLikeType) -> List[int]:
+    """Convert Optional[DimsType] to a simple list, defaulting to all dimensions"""
+    if dim is None:
+        return list(range(x.ndim))
+    elif not isinstance(dim, Sequence):
+        return [dim]
+    else:
+        return list(dim)
+@register_decomposition(aten.fft_fftshift)
+def fftshift(input: TensorLikeType, dim: Optional[DimsType] = None) -> TensorLikeType:
+    dims = _default_alldims(dim, input)
+    shift = [input.shape[d] // 2 for d in dims]
+    return torch.roll(input, shift, dims)
+@register_decomposition(aten.fft_ifftshift)
+def ifftshift(input: TensorLikeType, dim: Optional[DimsType] = None) -> TensorLikeType:
+    dims = _default_alldims(dim, input)
+    shift = [(input.shape[d] + 1) // 2 for d in dims]
+    return torch.roll(input, shift, dims)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (706 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/intrinsic/quantized/dynamic/modules/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (367 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/qat/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .modules import * # noqa: F403

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (6.64 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from .modules import *  # noqa: F403
+__all__ = [
+    'Linear',
+    'Conv1d',
+    'Conv2d',
+    'Conv3d',
+    'ConvTranspose1d',
+    'ConvTranspose2d',
+    'ConvTranspose3d',
+    'RNNCell',
+    'LSTMCell',
+    'GRUCell',
+    'LSTM',
+    'GRU',
+    'Embedding',
+    'EmbeddingBag',
+]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (814 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/ns/fx/utils.py ADDED Viewed

	@@ -0,0 +1,533 @@

+import enum
+import operator
+import torch
+import torch.nn as nn
+import torch.ao.nn.intrinsic.quantized as nniq
+import torch.ao.nn.quantized as nnq
+toq = torch.ops.quantized
+from typing import Tuple, Callable, Dict, Set, List, Optional, Union
+from torch.fx import GraphModule
+from torch.fx.graph import Node
+from torch.ao.quantization import (
+    ObserverBase,
+    FakeQuantizeBase,
+)
+from torch.ao.quantization.utils import getattr_from_fqn
+from torch.ao.quantization.observer import _is_activation_post_process
+from .ns_types import NSNodeTargetType, NSResultsType
+# TODO(future PR): consider deleting this enum and using the torch types
+# directly.  This might be tricky because it is not a one to one mapping.
+class NodeInputOrOutputType(enum.Enum):
+    FP32 = enum.auto()  # torch.float
+    INT8 = enum.auto()  # torch.qint8 or torch.quint8
+    FP16 = enum.auto()  # torch.float16
+    UNKNOWN = enum.auto()  # we cannot determine input/output dtype
+    # TODO(future PR): while these functions can support multiple dtypes,
+    #   for the purposes of numerical debugging we want to get the actual
+    #   dtype used in the model. We will likely need some kind of dtype
+    #   propagation to estimate this.
+    FP32_OR_INT8 = enum.auto()  # either torch.float or torch.quint8 or torch.qint8
+    # TODO(future PRs): dynamic quant, fake quant, etc
+def get_node_first_input_and_output_type(
+    node: Node,
+    gm: GraphModule,
+    logger_cls: Callable,
+    node_type_to_io_type_map: Dict[str, Set[NSNodeTargetType]],
+) -> Tuple[NodeInputOrOutputType, NodeInputOrOutputType]:
+    # TODO(future PR): clean this up
+    FUNS_IO_TYPE_FP32 = node_type_to_io_type_map["funs_io_type_fp32"]
+    FUNS_IO_TYPE_FP16 = node_type_to_io_type_map["funs_io_type_fp16"]
+    FUNS_IO_TYPE_INT8 = node_type_to_io_type_map["funs_io_type_int8"]
+    FUNS_IO_TYPE_FP32_OR_INT8 = node_type_to_io_type_map["funs_io_type_fp32_or_int8"]
+    MODS_IO_TYPE_FP32 = node_type_to_io_type_map["mods_io_type_fp32"]
+    MODS_IO_TYPE_INT8 = node_type_to_io_type_map["mods_io_type_int8"]
+    MODS_IO_TYPE_FP32_OR_INT8 = node_type_to_io_type_map["mods_io_type_fp32_or_int8"]
+    METHS_IO_TYPE_FP32_OR_INT8 = node_type_to_io_type_map["meths_io_type_fp32_or_int8"]
+    if node.op == "call_function":
+        if node.target in FUNS_IO_TYPE_FP32:
+            return (NodeInputOrOutputType.FP32, NodeInputOrOutputType.FP32)
+        if node.target in FUNS_IO_TYPE_FP16:
+            return (NodeInputOrOutputType.FP16, NodeInputOrOutputType.FP16)
+        elif node.target in FUNS_IO_TYPE_INT8:
+            return (NodeInputOrOutputType.INT8, NodeInputOrOutputType.INT8)
+        elif node.target in FUNS_IO_TYPE_FP32_OR_INT8:
+            first_arg = get_normalized_nth_input(node, gm, 0)
+            assert isinstance(first_arg, Node)
+            (
+                _prev_node_input_type,
+                prev_node_output_type,
+            ) = get_node_first_input_and_output_type(
+                first_arg, gm, logger_cls, node_type_to_io_type_map
+            )
+            return (prev_node_output_type, prev_node_output_type)
+        else:
+            return (NodeInputOrOutputType.UNKNOWN, NodeInputOrOutputType.UNKNOWN)
+    elif node.op == "call_module":
+        assert node.op == "call_module"
+        assert isinstance(node.target, str)
+        mod = getattr_from_fqn(gm, node.target)
+        is_known_fp32_or_int8_input_module = any(
+            isinstance(mod, target_type) for target_type in MODS_IO_TYPE_FP32_OR_INT8  # type: ignore[arg-type]
+        )
+        if (
+            isinstance(mod, (logger_cls, ObserverBase, FakeQuantizeBase))  # type: ignore[arg-type]
+            or is_known_fp32_or_int8_input_module
+        ):
+            # A logger or observer's input and output type is the output
+            # type of the preceding node.
+            first_arg = get_normalized_nth_input(node, gm, 0)
+            assert isinstance(first_arg, Node)
+            (
+                _prev_node_input_type,
+                prev_node_output_type,
+            ) = get_node_first_input_and_output_type(
+                first_arg, gm, logger_cls, node_type_to_io_type_map
+            )
+            return (prev_node_output_type, prev_node_output_type)
+        is_known_fp32_input_module = any(
+            isinstance(mod, target_type) for target_type in MODS_IO_TYPE_FP32  # type: ignore[arg-type]
+        )
+        is_known_int8_input_module = any(
+            isinstance(mod, target_type) for target_type in MODS_IO_TYPE_INT8  # type: ignore[arg-type]
+        )
+        if is_known_fp32_input_module:
+            return (NodeInputOrOutputType.FP32, NodeInputOrOutputType.FP32)
+        elif is_known_int8_input_module:
+            return (NodeInputOrOutputType.INT8, NodeInputOrOutputType.INT8)
+        else:
+            return (NodeInputOrOutputType.UNKNOWN, NodeInputOrOutputType.UNKNOWN)
+    elif node.op == "call_method":
+        if node.target == "dequantize":
+            # Dequantize is a special node because it allows multiple input types.
+            # So, we look up the output type of the previous node and return that
+            # as the input type of this node instance.
+            prev_node = get_normalized_nth_input(node, gm, 0)
+            assert isinstance(prev_node, Node)
+            (
+                _prev_node_input_type,
+                prev_node_output_type,
+            ) = get_node_first_input_and_output_type(
+                prev_node, gm, logger_cls, node_type_to_io_type_map
+            )
+            return (prev_node_output_type, NodeInputOrOutputType.FP32)
+        elif node.target == "to":
+            # to is a special node because it allows multiple input types.
+            # So, we look up the output type of the previous node and return that
+            # as the input type of this node instance. We also look up the target
+            # of to and return the correct output type.
+            prev_node = get_normalized_nth_input(node, gm, 0)
+            assert isinstance(prev_node, Node)
+            (
+                _prev_node_input_type,
+                prev_node_output_type,
+            ) = get_node_first_input_and_output_type(
+                prev_node, gm, logger_cls, node_type_to_io_type_map
+            )
+            cur_node_dtype_target = get_normalized_nth_input(node, gm, 1)
+            assert (
+                cur_node_dtype_target is torch.float16
+            ), f"{cur_node_dtype_target} handling needs to be added"
+            return (prev_node_output_type, NodeInputOrOutputType.FP16)
+        elif node.target in METHS_IO_TYPE_FP32_OR_INT8:
+            first_arg = get_normalized_nth_input(node, gm, 0)
+            assert isinstance(first_arg, Node)
+            (
+                _prev_node_input_type,
+                prev_node_output_type,
+            ) = get_node_first_input_and_output_type(
+                first_arg, gm, logger_cls, node_type_to_io_type_map
+            )
+            return (prev_node_output_type, prev_node_output_type)
+        return (NodeInputOrOutputType.UNKNOWN, NodeInputOrOutputType.UNKNOWN)
+    else:
+        return (NodeInputOrOutputType.UNKNOWN, NodeInputOrOutputType.UNKNOWN)
+def get_node_input_qparams(
+    node: Node,
+    gm: GraphModule,
+    node_type_to_io_type_map: Dict[str, Set[NSNodeTargetType]],
+) -> Optional[Tuple[Union[torch.Tensor, float], Union[torch.Tensor, int]]]:
+    """
+    Returns the qparams (scale, zero_point) of the first input to `node`,
+    if they can be inferred from the graph.
+    """
+    prev_node = get_normalized_nth_input(node, gm, 0)
+    if not isinstance(prev_node, Node):
+        return None
+    MODS_IO_TYPE_FP32_OR_INT8 = node_type_to_io_type_map["mods_io_type_fp32_or_int8"]
+    def _get_scale_zp_from_function_args(node, gm, scale_arg_idx, zp_arg_idx):
+        scale_node = get_normalized_nth_input(node, gm, scale_arg_idx)
+        zp_node = get_normalized_nth_input(node, gm, zp_arg_idx)
+        assert isinstance(scale_node, Node) and isinstance(scale_node.target, str)
+        assert isinstance(zp_node, Node) and isinstance(zp_node.target, str)
+        scale_obj = getattr_from_fqn(gm, scale_node.target)
+        zp_obj = getattr_from_fqn(gm, zp_node.target)
+        return (scale_obj, zp_obj)
+    if prev_node.op == "call_function":
+        # quantize - read the args directly
+        if prev_node.target == torch.quantize_per_tensor:
+            return _get_scale_zp_from_function_args(prev_node, gm, 1, 2)
+        elif prev_node.target in (toq.add, toq.add_relu, toq.mul, toq.mul_relu):
+            return _get_scale_zp_from_function_args(prev_node, gm, 2, 3)
+        return None
+        # TODO(future PR): handle more functionals
+        # TODO(future PR): handle functional ops which inherit qparams from input
+    elif prev_node.op == "call_module":
+        # get type of the module
+        assert isinstance(prev_node.target, str)
+        module_obj = getattr_from_fqn(gm, prev_node.target)
+        if isinstance(
+            module_obj,
+            (
+                nnq.Linear,
+                nnq.Conv1d,
+                nnq.Conv2d,
+                nniq.ConvReLU2d,
+                nnq.Conv3d,
+                nnq.BatchNorm2d,
+                nnq.BatchNorm3d,
+                nnq.ConvTranspose1d,
+                nnq.ConvTranspose2d,
+                nnq.ELU,
+                nnq.GroupNorm,
+                nnq.InstanceNorm1d,
+                nnq.InstanceNorm2d,
+                nnq.InstanceNorm3d,
+                nnq.LayerNorm,
+                nnq.Hardswish,
+                nnq.LeakyReLU,
+                nnq.ReLU6,
+                nniq.BNReLU2d,
+                nniq.BNReLU3d,
+                nniq.ConvReLU1d,
+                nniq.ConvReLU2d,
+                nniq.ConvReLU3d,
+                nniq.LinearReLU,
+            ),
+        ):
+            return (module_obj.scale, module_obj.zero_point)  # type: ignore[return-value]
+        is_known_fp32_or_int8_input_module = any(
+            isinstance(module_obj, target_type) for target_type in MODS_IO_TYPE_FP32_OR_INT8  # type: ignore[arg-type]
+        )
+        if is_known_fp32_or_int8_input_module:
+            return get_node_input_qparams(prev_node, gm, node_type_to_io_type_map)
+    return None
+def return_first_non_observer_node(
+    node: Node,
+    gm: GraphModule,
+) -> Node:
+    """
+    If node is not an observer, returns it.  If node is an observer,
+    navigates up the graph and returns the first parent which is not an
+    observer.  For example,
+    graph: (node_non_obs), node = node_non_obs : returns node_non_obs
+    graph: (node_non_obs -> obs0), node = obs0 : returns node_non_obs
+    graph: (node_non_obs -> obs0 -> fq0), node = fq0 : returns node_non_obs
+    """
+    if node.op == "call_module":
+        node_obj = getattr_from_fqn(gm, node.target)  # type: ignore[arg-type]
+        if _is_activation_post_process(node_obj):
+            assert len(node.args) == 1
+            assert isinstance(node.args[0], Node)
+            node = node.args[0]
+            # code duplication intended, not worth refactoring
+            assert isinstance(node.target, str)
+            node_obj = getattr_from_fqn(gm, node.target)
+            if _is_activation_post_process(node_obj):
+                assert len(node.args) == 1
+                assert isinstance(node.args[0], Node)
+                node = node.args[0]
+    return node
+def get_number_of_non_param_args(
+    node: Node,
+    gm: GraphModule,
+) -> int:
+    """
+    Assumes that all non-param args occur first. Returns the number of
+    non-param args expected for a node.  For example, for
+      F.linear(x, weight, bias)
+    Returns 1, because x is a non-param arg and weight and bias are params.
+    For
+      lstm_mod(x, hid)
+    Returns 2, because both x and hid are non-param args.
+    """
+    if node.op == "call_module":
+        node_obj = getattr_from_fqn(gm, node.target)  # type: ignore[arg-type]
+        if isinstance(node_obj, nn.LSTM):
+            return 2
+    # default is 1
+    return 1
+def get_arg_indices_of_inputs_to_log(node: Node) -> List[int]:
+    """
+    Returns the indices of args of the node which we should attach
+    loggers to, if input logging is enabled.
+    For example,
+    * for (x + y), returns [0, 1]
+    * for (1 + y), returns [1]
+    * for (x + 1), returns [0]
+    * for (linear(x, w, b)) returns [0]
+    * by default, returns [0]
+    """
+    if len(node.args) == 0:
+        return []
+    if node.op == "call_function" and (
+        # TODO(future PR): use relationship map instead of hardcoding
+        node.target in (torch.add, torch.ops.quantized.add, operator.add)
+        or node.target in (torch.mul, torch.ops.quantized.mul, operator.mul)
+    ):
+        result = []
+        for i in range(2):
+            if type(node.args[i]) == Node:
+                result.append(i)
+        return result
+    return [0]
+def get_target_type_str(node: Node, gm: GraphModule) -> str:
+    """
+    Returns a string representation of the type of the function or module
+    pointed to by this node, or '' for other node types.
+    """
+    target_type = ""
+    if node.op in ("call_function", "call_method"):
+        target_type = torch.typename(node.target)
+    elif node.op == "call_module":
+        assert isinstance(node.target, str)
+        target_mod = getattr_from_fqn(gm, node.target)
+        target_type = torch.typename(target_mod)
+    return target_type
+def rekey_logger_info_on_node_name_of_model(
+    results: NSResultsType,
+    model_name: str,
+) -> NSResultsType:
+    """
+    Rekeys the layer name of a results dictionary to use node names
+    from `model_name`.
+    For example, transforms
+        {'base_op_1_0': {'node_output': {'model_a':
+          [{'ref_node_name': 'linear1', ...}]}}}
+    into
+        {'linear1': {'node_output': {'model_a':
+          [{'ref_node_name': 'linear1', ...}]}}}
+    Note: we cannot use these node names directly because they are not
+    guaranteed to be consistent across models. This is why we extract
+    the results first and rekey afterwards.
+    """
+    new_results = {}
+    for old_layer_name, result_type_to_results in results.items():
+        new_layer_name = None
+        for model_name_to_results in result_type_to_results.values():
+            for cur_model_name, list_of_results in model_name_to_results.items():
+                if cur_model_name == model_name:
+                    assert len(list_of_results)
+                    new_layer_name = list_of_results[0]["ref_node_name"]
+                else:
+                    continue
+        if new_layer_name is not None:
+            new_results[new_layer_name] = result_type_to_results
+        else:
+            new_results[old_layer_name] = result_type_to_results
+    return new_results
+def maybe_add_missing_fqns(results: NSResultsType) -> None:
+    """
+    If `fqn` entries are filled in for one of the models in `results`, copies
+    them over to any models which do not have them filled out.
+    A common use case benefitting from this is comparing a model prepared by
+    quantization to a quantized model. In this case, the model prepared by
+    quantization would have `fqn` entries, and the quantized model would not.
+    """
+    # Check in the first result to find any model with fqn entries defined.
+    model_name_with_fqns = None
+    for result_type_to_results in results.values():
+        for model_name_to_results in result_type_to_results.values():
+            for model_name, model_results in model_name_to_results.items():
+                if len(model_results) > 0:
+                    if model_results[0]["fqn"] is not None:
+                        model_name_with_fqns = model_name
+                        break
+            break
+        break
+    if model_name_with_fqns:
+        for result_type_to_results in results.values():
+            for model_name_to_results in result_type_to_results.values():
+                ref_model_results = model_name_to_results[model_name_with_fqns]
+                for model_name, model_results in model_name_to_results.items():
+                    if model_name == model_name_with_fqns:
+                        continue
+                    for i in range(len(model_results)):
+                        fqn = ref_model_results[i]["fqn"]
+                        model_results[i]["fqn"] = fqn
+def maybe_dequantize_first_two_tensor_args_and_handle_tuples(f):
+    def inner(*args, **kwargs):
+        a0, a1, *a_other = args
+        if (isinstance(a0, tuple) and isinstance(a1, tuple)) or (
+            isinstance(a0, list) and isinstance(a1, list)
+        ):
+            results = []
+            for el0, el1 in zip(a0, a1):
+                new_args = (el0, el1, *a_other)
+                results.append(inner(*new_args, **kwargs))
+            return results
+        elif isinstance(a0, torch.Tensor) and isinstance(a1, torch.Tensor):
+            if a0.is_quantized:
+                a0 = a0.dequantize()
+            if a1.is_quantized:
+                a1 = a1.dequantize()
+        # for the purposes of this util, only handle floats
+        if a0.dtype != torch.float or a1.dtype != torch.float:
+            return None
+        new_args = (a0, a1, *a_other)
+        return f(*new_args, **kwargs)
+    return inner
+@maybe_dequantize_first_two_tensor_args_and_handle_tuples
+def compute_sqnr(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+    """
+    Computes the SQNR between `x` and `y`.
+    Args:
+        x: Tensor or tuple of tensors
+        y: Tensor or tuple of tensors
+    Return:
+        float or tuple of floats
+    """
+    Ps = torch.norm(x)
+    Pn = torch.norm(x - y)
+    return 20 * torch.log10(Ps / Pn)
+@maybe_dequantize_first_two_tensor_args_and_handle_tuples
+def compute_normalized_l2_error(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+    """
+    Computes the normalized L2 error between `x` and `y`.
+    Args:
+        x: Tensor or tuple of tensors
+        y: Tensor or tuple of tensors
+    Return:
+        float or tuple of floats
+    """
+    return torch.sqrt(((x - y) ** 2).sum() / (x ** 2).sum())
+@maybe_dequantize_first_two_tensor_args_and_handle_tuples
+def compute_cosine_similarity(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+    """
+    Computes the cosine similarity between `x` and `y`.
+    Args:
+        x: Tensor or tuple of tensors
+        y: Tensor or tuple of tensors
+    Return:
+        float or tuple of floats
+    """
+    # For convolutions, the shape of the quantized weight has one additional
+    # dimension compared to the shape of the fp32 weight. Match the shapes
+    # to enable cosine similarity comparison.
+    x = x.reshape(1, -1)
+    y = y.reshape(1, -1)
+    return torch.nn.functional.cosine_similarity(x, y)
+def op_type_supports_shadowing(node: Node) -> bool:
+    if node.op == 'call_function':
+        if node.target in (torch.add, torch.mul, operator.add, operator.mul, torch.cat, torch.stack):
+            # shadowing for ops with multiple tensor inputs is not implemented yet
+            return False
+    return True
+def get_normalized_nth_input(node: Node, gm: GraphModule, idx: int) -> Node:
+    """
+    Given a node, gets the n'th input to that node, normalizing
+    args and kwargs to the best of its ability.
+    """
+    try:
+        norm_args_and_kwargs = node.normalized_arguments(
+            gm, normalize_to_only_use_kwargs=True)
+        if norm_args_and_kwargs is not None:
+            norm_args, norm_kwargs = norm_args_and_kwargs
+            assert len(norm_args) + len(norm_kwargs) > idx
+            if idx < len(norm_args):
+                return norm_args[idx]
+            else:
+                # note: in Python 3.7+ dicts are ordered
+                return list(norm_kwargs.values())[idx]
+        else:
+            assert len(node.args) + len(node.kwargs) > idx
+            if idx < len(node.args):
+                return node.args[idx]  # type: ignore[return-value]
+            else:
+                kwargs_idx = idx + len(node.args)
+                return list(node.kwargs.values())[kwargs_idx]  # type: ignore[return-value]
+    except RuntimeError:
+        # this RuntimeError happens when node argument normalization
+        # requires typehints to proceed, such as for torch.add where
+        # either the first, second or both arguments could be tensors
+        assert len(node.args) + len(node.kwargs) > idx
+        if idx < len(node.args):
+            return node.args[idx]  # type: ignore[return-value]
+        else:
+            kwargs_idx = idx + len(node.args)
+            return list(node.kwargs.values())[kwargs_idx]  # type: ignore[return-value]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (6.95 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/_correct_bias.cpython-311.pyc ADDED Viewed

Binary file (7.2 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/_equalize.cpython-311.pyc ADDED Viewed

Binary file (9.7 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/fuser_method_mappings.cpython-311.pyc ADDED Viewed

Binary file (13.4 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/quant_type.cpython-311.pyc ADDED Viewed

Binary file (1.43 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/quantize_pt2e.cpython-311.pyc ADDED Viewed

Binary file (10.8 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/stubs.cpython-311.pyc ADDED Viewed

Binary file (4.07 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__pycache__/native.cpython-311.pyc ADDED Viewed

Binary file (7.07 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/_qnnpack_pt2e.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import operator
+import torch
+from torch.ao.quantization.backend_config import (
+    BackendConfig,
+    DTypeConfig,
+    ObservationType,
+    BackendPatternConfig,
+)
+weighted_op_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.quint8,
+    weight_dtype=torch.qint8,
+    bias_dtype=torch.float,
+)
+from typing import List
+def get_linear_configs():
+    linear_configs = []
+    observation_type = ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT
+    dtype_configs = [weighted_op_quint8_dtype_config]
+    # TODO: need to fix the way we insert observers for this pattern
+    # should be solved in the new fusion API
+    # reason that this doesn't work: the pattern is a bit complicated and we don't
+    # have a way to specify which input of the pattern we would like to observe
+    # pattern:
+    # bias input weight
+    # \     |    /
+    #  \    |   t
+    #   \   |  /
+    #    addmm
+    # we want to observe "weight" as weight, but there is not way to convey this
+    # information with current pattern language
+    #
+    # right now:
+    # original:
+    #         weight - t \
+    #         input  - addmm
+    # observed (no hack):
+    #      weight - t - observer \
+    #       input - observer - addmm
+    # target:
+    #      weight - observer - t \
+    #        input - observer - addmm
+    # def root_node_getter(node_pattern):
+    #     addmm, bias, act, weight = node_pattern
+    #     return addmm
+    # linear_configs.append(
+    #     BackendPatternConfig((torch.ops.aten.addmm.default, MatchAllNode, MatchAllNode, torch.ops.aten.t.default))
+    #     .set_observation_type(observation_type)  # noqa: E131
+    #     .set_dtype_configs(dtype_configs)
+    #     ._set_root_node_getter(root_node_getter))
+    linear_configs.append(
+        BackendPatternConfig(torch.ops.aten.addmm.default)
+        .set_observation_type(observation_type)  # noqa: E131
+        .set_dtype_configs(dtype_configs)
+        ._set_input_type_to_index({"weight": 2, "bias": 0})
+    )
+    # linear is decomposed to `t - mm` if bias is not present
+    linear_configs.append(
+        BackendPatternConfig(torch.ops.aten.mm.default)
+        .set_observation_type(observation_type)  # noqa: E131
+        .set_dtype_configs(dtype_configs)
+        ._set_input_type_to_index({"weight": 1})
+    )
+    return linear_configs
+def get_conv_configs():
+    conv_configs = []
+    observation_type = ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT
+    dtype_configs = [weighted_op_quint8_dtype_config]
+    conv_configs.append(
+        BackendPatternConfig(torch.ops.aten.convolution.default)
+        .set_observation_type(observation_type)  # noqa: E131
+        .set_dtype_configs(dtype_configs)
+        ._set_input_type_to_index({"weight": 1, "bias": 2})
+    )
+    conv_configs.append(
+        BackendPatternConfig((torch.ops.aten.convolution.default, torch.ops.aten.relu.default))
+        .set_observation_type(observation_type)  # noqa: E131
+        .set_dtype_configs(dtype_configs)
+        ._set_input_type_to_index({"weight": 1, "bias": 2})
+    )
+    # TODO: remove when functionalization is supported in PT2 mode
+    conv_configs.append(
+        BackendPatternConfig((torch.ops.aten.convolution.default, torch.ops.aten.relu_.default))
+        .set_observation_type(observation_type)  # noqa: E131
+        .set_dtype_configs(dtype_configs)
+        ._set_input_type_to_index({"weight": 1, "bias": 2})
+    )
+    return conv_configs
+def get_pooling_configs():
+    backend_pattern_configs = []
+    observation_type = ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT
+    dtype_configs = [weighted_op_quint8_dtype_config]
+    def root_node_getter(node_pattern):
+        getitem, maxpool, index = node_pattern
+        return maxpool
+    backend_pattern_configs.append(
+        BackendPatternConfig()
+        ._set_pattern_complex_format((operator.getitem, torch.ops.aten.max_pool2d_with_indices.default, 0))
+        .set_observation_type(observation_type)  # noqa: E131
+        .set_dtype_configs(dtype_configs)
+        ._set_root_node_getter(root_node_getter)
+    )
+    return backend_pattern_configs
+def get_relu_configs():
+    backend_pattern_configs = []
+    observation_type = ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT
+    dtype_configs = [weighted_op_quint8_dtype_config]
+    backend_pattern_configs.append(
+        BackendPatternConfig(torch.ops.aten.relu.default)
+        .set_observation_type(observation_type)  # noqa: E131
+        .set_dtype_configs(dtype_configs))
+    return backend_pattern_configs
+def get_binary_op_configs():
+    binary_op_configs: List[BackendPatternConfig] = []
+    dtype_configs = [weighted_op_quint8_dtype_config]
+    num_tensor_args_to_observation_type_mapping = {
+        # TODO: this is not used right now since we have extra check in prepare
+        # will need to change this to NO_OBSERVER later after we implemented
+        # Tensor dtype inference properly
+        0: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
+        1: ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT,
+        2: ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT,
+    }
+    for op_with_quantized_bop_scalar_variant in [torch.ops.aten.add.Tensor, torch.ops.aten.add_.Tensor]:
+        bop_patterns = [
+            (op_with_quantized_bop_scalar_variant, torch.ops.aten.relu.default),
+            op_with_quantized_bop_scalar_variant,
+            # TODO: remove when functionalization is supported in pt2_mode
+            (op_with_quantized_bop_scalar_variant, torch.ops.aten.relu_.default),
+        ]
+        for bop_pattern in bop_patterns:
+            binary_op_configs.append(
+                BackendPatternConfig(bop_pattern)
+                    .set_dtype_configs(dtype_configs)  # noqa: E131
+                    ._set_num_tensor_args_to_observation_type(num_tensor_args_to_observation_type_mapping))
+    return binary_op_configs
+def get_qnnpack_pt2e_backend_config():
+    return (
+        BackendConfig("qnnpack_pytorch_2.0_export")
+        .set_backend_pattern_configs(get_linear_configs())
+        .set_backend_pattern_configs(get_binary_op_configs())
+        .set_backend_pattern_configs(get_conv_configs())
+        .set_backend_pattern_configs(get_pooling_configs())
+        .set_backend_pattern_configs(get_relu_configs())
+    )

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/fbgemm.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import torch
+from ._common_operator_config_utils import (
+    _get_binary_op_configs,
+    _get_bn_configs,
+    _get_cat_config,
+    _get_conv_configs,
+    _get_default_op_configs,
+    _get_embedding_op_configs,
+    _get_fixed_qparams_op_configs,
+    _get_linear_configs,
+    _get_rnn_op_configs,
+    _get_share_qparams_op_configs,
+    _get_tensor_info_op_configs,
+)
+from .backend_config import BackendConfig, DTypeConfig
+__all__ = [
+    "get_fbgemm_backend_config",
+]
+# ===================
+# |  DTYPE CONFIGS  |
+# ===================
+# TODO: For now, these DTypeConfigs are identical to the ones defined in native.py
+# In the future, once we support specifying quant_min/quant_max and scale_min/scale_max,
+# these will diverge. In particular, for FBGEMM, we will restrict the activation quantized
+# values to within [0, 127].
+fbgemm_weighted_op_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.quint8,
+    weight_dtype=torch.qint8,
+    bias_dtype=torch.float,
+)
+fbgemm_default_op_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.quint8,
+)
+fbgemm_default_op_fp16_dtype_config = DTypeConfig(
+    input_dtype=torch.float16,
+    output_dtype=torch.float16,
+    weight_dtype=torch.float16,
+    bias_dtype=torch.float16,
+)
+fbgemm_default_dynamic_int8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.float,
+    weight_dtype=torch.qint8,
+    bias_dtype=torch.float,
+    is_dynamic=True,
+)
+fbgemm_default_dynamic_float16_dtype_config = DTypeConfig(
+    input_dtype=torch.float16,
+    output_dtype=torch.float,
+    weight_dtype=torch.float16,
+    bias_dtype=torch.float,
+    is_dynamic=True,
+)
+fbgemm_weight_only_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.float,
+    output_dtype=torch.float,
+    weight_dtype=torch.quint8,
+)
+fbgemm_weight_only_quint4x2_dtype_config = DTypeConfig(
+    input_dtype=torch.float,
+    output_dtype=torch.float,
+    weight_dtype=torch.quint4x2,
+)
+# =====================
+# |  BACKEND CONFIGS  |
+# =====================
+def get_fbgemm_backend_config() -> BackendConfig:
+    """
+    Return the `BackendConfig` for PyTorch's native FBGEMM backend.
+    """
+    conv_dtype_configs = [fbgemm_weighted_op_quint8_dtype_config]
+    linear_dtype_configs = [
+        fbgemm_weighted_op_quint8_dtype_config,
+        fbgemm_default_dynamic_int8_dtype_config,
+        fbgemm_default_dynamic_float16_dtype_config,
+    ]
+    binary_op_dtype_configs = [fbgemm_default_op_quint8_dtype_config]
+    default_op_dtype_configs = [fbgemm_default_op_quint8_dtype_config]
+    fixed_qparams_op_dtype_configs = [fbgemm_default_op_quint8_dtype_config]
+    share_qparams_op_dtype_configs = [fbgemm_default_op_quint8_dtype_config]
+    tensor_info_op_dtype_configs = [fbgemm_default_op_quint8_dtype_config]
+    rnn_op_dtype_configs = [
+        fbgemm_default_dynamic_int8_dtype_config,
+        fbgemm_default_dynamic_float16_dtype_config,
+    ]
+    embedding_op_dtype_configs = [
+        fbgemm_weight_only_quint8_dtype_config,
+        fbgemm_weight_only_quint4x2_dtype_config,
+    ]
+    return BackendConfig("fbgemm") \
+        .set_backend_pattern_configs(_get_conv_configs(conv_dtype_configs)) \
+        .set_backend_pattern_configs(_get_linear_configs(linear_dtype_configs)) \
+        .set_backend_pattern_configs(_get_binary_op_configs(binary_op_dtype_configs)) \
+        .set_backend_pattern_config(_get_cat_config(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_default_op_configs(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_fixed_qparams_op_configs(fixed_qparams_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_share_qparams_op_configs(share_qparams_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_tensor_info_op_configs(tensor_info_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_bn_configs(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_rnn_op_configs(rnn_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_embedding_op_configs(embedding_op_dtype_configs))

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/native.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import torch
+from ._common_operator_config_utils import (
+    _get_binary_op_configs,
+    _get_bn_configs,
+    _get_cat_config,
+    _get_conv_configs,
+    _get_default_op_configs,
+    _get_embedding_op_configs,
+    _get_fixed_qparams_op_configs,
+    _get_linear_configs,
+    _get_ln_configs,
+    _get_rnn_op_configs,
+    _get_share_qparams_op_configs,
+    _get_tensor_info_op_configs,
+)
+from .backend_config import BackendConfig, DTypeConfig
+__all__ = [
+    "get_test_only_legacy_native_backend_config",
+    "default_op_quint8_dtype_config",
+    "default_op_fp16_dtype_config",
+    "default_dynamic_int8_dtype_config",
+    "default_dynamic_float16_dtype_config",
+    "input_output_only_quint8_dtype_config",
+    "weight_only_quint8_dtype_config",
+    "weight_only_quint4x2_dtype_config",
+    "get_native_backend_config",
+    "get_native_backend_config_dict",
+    "get_test_only_legacy_native_backend_config_dict",
+]
+# ===================
+# |  DTYPE CONFIGS  |
+# ===================
+# weighted op int8 dtype config
+# this is config for ops that has quantized weights, like linear, conv
+weighted_op_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.quint8,
+    weight_dtype=torch.qint8,
+    bias_dtype=torch.float,
+)
+default_op_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.quint8,
+)
+default_op_fp16_dtype_config = DTypeConfig(
+    input_dtype=torch.float16,
+    output_dtype=torch.float16,
+    weight_dtype=torch.float16,
+    bias_dtype=torch.float16,
+)
+default_dynamic_int8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.float,
+    weight_dtype=torch.qint8,
+    bias_dtype=torch.float,
+    # currently the dtype check is not yet enabled, so we provided the dtype_configs but
+    # it is not really used yet,
+    # we will enable it a bit later after we moved everything to backend_config_dict
+    is_dynamic=True,
+)
+default_dynamic_float16_dtype_config = DTypeConfig(
+    input_dtype=torch.float16,
+    output_dtype=torch.float,
+    weight_dtype=torch.float16,
+    bias_dtype=torch.float,
+    # currently the dtype check is not yet enabled, so we provided the dtype_configs but
+    # it is not really used yet,
+    # we will enable it a bit later after we moved everything to backend_config_dict
+    is_dynamic=True,
+)
+# Needed for LayerNorm and f.layer_norm, since currently the kernel only supports float weights
+input_output_only_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.quint8,
+    weight_dtype=torch.float,
+    bias_dtype=torch.float,
+)
+weight_only_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.float,
+    output_dtype=torch.float,
+    weight_dtype=torch.quint8,
+)
+weight_only_quint4x2_dtype_config = DTypeConfig(
+    input_dtype=torch.float,
+    output_dtype=torch.float,
+    weight_dtype=torch.quint4x2,
+)
+# =====================
+# |  BACKEND CONFIGS  |
+# =====================
+def get_test_only_legacy_native_backend_config() -> BackendConfig:
+    """
+    Return the `BackendConfig` for PyTorch Native backend (fbgemm/qnnpack) with various additional fp16 ops.
+    """
+    conv_dtype_configs = [weighted_op_quint8_dtype_config]
+    linear_dtype_configs = [
+        weighted_op_quint8_dtype_config,
+        default_dynamic_int8_dtype_config,
+        default_dynamic_float16_dtype_config,
+        default_op_fp16_dtype_config,
+    ]
+    binary_op_dtype_configs = [
+        default_op_quint8_dtype_config,
+        default_op_fp16_dtype_config,
+    ]
+    default_op_dtype_configs = [default_op_quint8_dtype_config]
+    fixed_qparams_op_dtype_configs = [
+        default_op_quint8_dtype_config,
+        default_op_fp16_dtype_config,
+    ]
+    share_qparams_op_dtype_configs = [
+        default_op_quint8_dtype_config,
+        default_op_fp16_dtype_config
+    ]
+    tensor_info_op_dtype_configs = [
+        default_op_quint8_dtype_config,
+    ]
+    rnn_op_dtype_configs = [
+        default_dynamic_int8_dtype_config,
+        default_dynamic_float16_dtype_config,
+    ]
+    embedding_op_dtype_configs = [
+        weight_only_quint8_dtype_config,
+        weight_only_quint4x2_dtype_config,
+    ]
+    layer_norm_op_dtype_configs = [input_output_only_quint8_dtype_config]
+    return BackendConfig("_native_and_fp16") \
+        .set_backend_pattern_configs(_get_conv_configs(conv_dtype_configs)) \
+        .set_backend_pattern_configs(_get_linear_configs(linear_dtype_configs)) \
+        .set_backend_pattern_configs(_get_binary_op_configs(binary_op_dtype_configs)) \
+        .set_backend_pattern_config(_get_cat_config(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_default_op_configs(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_fixed_qparams_op_configs(fixed_qparams_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_share_qparams_op_configs(share_qparams_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_tensor_info_op_configs(tensor_info_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_bn_configs(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_ln_configs(layer_norm_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_rnn_op_configs(rnn_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_embedding_op_configs(embedding_op_dtype_configs))
+def get_native_backend_config() -> BackendConfig:
+    """
+    Return the `BackendConfig` for PyTorch Native backend (fbgemm/qnnpack).
+    """
+    # TODO: express this BackendConfig as a union of the FBGEMM and QNNPACK BackendConfigs
+    conv_dtype_configs = [weighted_op_quint8_dtype_config]
+    linear_dtype_configs = [
+        weighted_op_quint8_dtype_config,
+        default_dynamic_int8_dtype_config,
+        default_dynamic_float16_dtype_config,
+    ]
+    binary_op_dtype_configs = [default_op_quint8_dtype_config]
+    default_op_dtype_configs = [default_op_quint8_dtype_config]
+    fixed_qparams_op_dtype_configs = [default_op_quint8_dtype_config]
+    share_qparams_op_dtype_configs = [default_op_quint8_dtype_config]
+    tensor_info_op_dtype_configs = [default_op_quint8_dtype_config]
+    rnn_op_dtype_configs = [
+        default_dynamic_int8_dtype_config,
+        default_dynamic_float16_dtype_config,
+    ]
+    embedding_op_dtype_configs = [
+        weight_only_quint8_dtype_config,
+        weight_only_quint4x2_dtype_config,
+    ]
+    layer_norm_op_dtype_configs = [input_output_only_quint8_dtype_config]
+    return BackendConfig("native") \
+        .set_backend_pattern_configs(_get_conv_configs(conv_dtype_configs)) \
+        .set_backend_pattern_configs(_get_linear_configs(linear_dtype_configs)) \
+        .set_backend_pattern_configs(_get_binary_op_configs(binary_op_dtype_configs)) \
+        .set_backend_pattern_config(_get_cat_config(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_default_op_configs(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_fixed_qparams_op_configs(fixed_qparams_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_share_qparams_op_configs(share_qparams_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_tensor_info_op_configs(tensor_info_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_bn_configs(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_ln_configs(layer_norm_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_rnn_op_configs(rnn_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_embedding_op_configs(embedding_op_dtype_configs))
+def get_native_backend_config_dict():
+    """
+    Return the `BackendConfig` for PyTorch Native backend (fbgemm/qnnpack) in dictionary form.
+    """
+    return get_native_backend_config().to_dict()
+def get_test_only_legacy_native_backend_config_dict():
+    """
+    Return the `BackendConfig` for PyTorch Native backend (fbgemm/qnnpack) with various additional
+    fp16 ops in dictionary form.
+    """
+    return get_test_only_legacy_native_backend_config().to_dict()

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fuser_method_mappings.py ADDED Viewed

	@@ -0,0 +1,259 @@

+import torch.nn as nn
+import torch.ao.nn.intrinsic as nni
+from typing import Any, Union, Callable, List, Tuple, Dict, Optional, Type
+from torch.ao.quantization.utils import Pattern, get_combined_dict, MatchAllNode
+import itertools
+__all__ = [
+    "fuse_conv_bn",
+    "fuse_conv_bn_relu",
+    "fuse_linear_bn",
+    "fuse_convtranspose_bn",
+    "get_fuser_method",
+    "get_fuser_method_new",
+]
+def fuse_conv_bn(is_qat, conv, bn):
+    r"""Return the fused the conv and bn modules.
+    Given the conv and bn modules, fuses them and returns the fused module
+    Args:
+        is_qat: a flag for whether we are using quantization aware training fusion
+        or post training quantization fusion
+        conv: Module instance of type conv2d/conv3d
+        bn: Spatial BN instance that needs to be fused with the conv
+    Examples::
+        >>> m1 = nn.Conv2d(10, 20, 3)
+        >>> b1 = nn.BatchNorm2d(20)
+        >>> # xdoctest: +SKIP
+        >>> m2 = fuse_conv_bn(m1, b1)
+    """
+    assert conv.training == bn.training, \
+        "Conv and BN both must be in the same mode (train or eval)."
+    fused_module_class_map = {
+        nn.Conv1d: nni.ConvBn1d,
+        nn.Conv2d: nni.ConvBn2d,
+        nn.Conv3d: nni.ConvBn3d,
+    }
+    if is_qat:
+        assert bn.num_features == conv.out_channels, 'Output channel of Conv2d must match num_features of BatchNorm2d'
+        assert bn.affine, 'Only support fusing BatchNorm2d with affine set to True'
+        assert bn.track_running_stats, 'Only support fusing BatchNorm2d with tracking_running_stats set to True'
+        fused_module_class = fused_module_class_map.get((type(conv)), None)
+        if fused_module_class is not None:
+            return fused_module_class(conv, bn)
+        else:
+            raise NotImplementedError(f"Cannot fuse train modules: {(conv, bn)}")
+    else:
+        return nn.utils.fuse_conv_bn_eval(conv, bn)
+def fuse_conv_bn_relu(is_qat, conv, bn, relu):
+    r"""Return the fused conv and bv modules.
+    Given the conv and bn modules, fuses them and returns the fused module
+    Args:
+        is_qat: a flag for whether we are using quantization aware training fusion
+        or post training quantization fusion
+        conv: Module instance of type conv2d/conv3d
+        bn: Spatial BN instance that needs to be fused with the conv
+    Examples::
+        >>> m1 = nn.Conv2d(10, 20, 3)
+        >>> b1 = nn.BatchNorm2d(20)
+        >>> r1 = nn.ReLU(inplace=False)
+        >>> # xdoctest: +SKIP
+        >>> m2 = fuse_conv_bn_relu(m1, b1, r1)
+    """
+    assert conv.training == bn.training == relu.training, \
+        "Conv and BN both must be in the same mode (train or eval)."
+    fused_module : Optional[Type[nn.Sequential]] = None
+    if is_qat:
+        map_to_fused_module_train = {
+            nn.Conv1d: nni.ConvBnReLU1d,
+            nn.Conv2d: nni.ConvBnReLU2d,
+            nn.Conv3d: nni.ConvBnReLU3d,
+        }
+        assert bn.num_features == conv.out_channels, 'Output channel of Conv must match num_features of BatchNorm'
+        assert bn.affine, 'Only support fusing BatchNorm with affine set to True'
+        assert bn.track_running_stats, 'Only support fusing BatchNorm with tracking_running_stats set to True'
+        fused_module = map_to_fused_module_train.get(type(conv), None)
+        if fused_module is not None:
+            return fused_module(conv, bn, relu)
+        else:
+            raise NotImplementedError(f"Cannot fuse train modules: {(conv, bn, relu)}")
+    else:
+        map_to_fused_module_eval = {
+            nn.Conv1d: nni.ConvReLU1d,
+            nn.Conv2d: nni.ConvReLU2d,
+            nn.Conv3d: nni.ConvReLU3d,
+        }
+        fused_module = map_to_fused_module_eval.get(type(conv), None)
+        if fused_module is not None:
+            fused_conv = nn.utils.fusion.fuse_conv_bn_eval(conv, bn)
+            return fused_module(fused_conv, relu)
+        else:
+            raise NotImplementedError(f"Cannot fuse eval modules: {(conv, bn, relu)}")
+def fuse_linear_bn(is_qat, linear, bn):
+    r"""Return the fused linear and bn modules.
+    Given the linear and bn modules, fuses them and returns the fused module
+    Args:
+        is_qat: a flag for whether we are using quantization aware training fusion
+        or post training quantization fusion
+        linear: Module instance of type Linear
+        bn: BatchNorm1d instance that needs to be fused with the linear layer
+    Examples::
+        >>> m1 = nn.Linear(20, 10)
+        >>> b1 = nn.BatchNorm1d(10)
+        >>> # xdoctest: +SKIP
+        >>> m2 = fuse_linear_bn(m1, b1)
+    """
+    assert linear.training == bn.training, \
+        "Linear and BN both must be in the same mode (train or eval)."
+    if is_qat:
+        assert bn.num_features == linear.out_features, \
+            "Output features of Linear must match num_features of BatchNorm1d"
+        assert bn.affine, "Only support fusing BatchNorm1d with affine set to True"
+        assert bn.track_running_stats, \
+            "Only support fusing BatchNorm1d with tracking_running_stats set to True"
+        return nni.LinearBn1d(linear, bn)
+    else:
+        return nn.utils.fusion.fuse_linear_bn_eval(linear, bn)
+def fuse_convtranspose_bn(is_qat, convt, bn):
+    r"""Return the fused ConvTranspose and bn modules.
+    Given ConvTranspose and bn modules, fuses them and returns the fused module
+    Args:
+        convt: Module instance of type ConvTransposeNd
+        bn: BatchNormNd instance that needs to be fused with the linear layer.
+            batch norm N should match the ConvTranspose N
+    Examples::
+        >>> m1 = nn.ConvTranspose2d(10, 20, 3)
+        >>> b1 = nn.BatchNorm2d(20)
+        >>> # xdoctest: +SKIP
+        >>> m2 = fuse_convtranspose_bn(m1, b1)
+    """
+    assert convt.training == bn.training, \
+        "ConvTranspose and BN both must be in the same mode (train or eval)."
+    if is_qat:
+        raise Exception("Fusing ConvTranspose+BatchNorm not yet supported in QAT.")
+    else:
+        return nn.utils.fusion.fuse_conv_bn_eval(convt, bn, transpose=True)
+def _sequential_wrapper2(sequential):
+    """Return a sequential wrapped that for is_qat and two modules.
+    Given a sequential class for two modules, return a function that takes
+    is_qat, and then two modules as argument, that ignores the is_qat flag
+    and always returns the sequential that combines the two input modules
+    """
+    def fuser_method(is_qat, m1, m2):
+        return sequential(m1, m2)
+    return fuser_method
+_DEFAULT_OP_LIST_TO_FUSER_METHOD: Dict[Tuple, Union[nn.Sequential, Callable]] = {
+    (nn.Conv1d, nn.BatchNorm1d): fuse_conv_bn,
+    (nn.Conv1d, nn.BatchNorm1d, nn.ReLU): fuse_conv_bn_relu,
+    (nn.Conv2d, nn.BatchNorm2d): fuse_conv_bn,
+    (nn.Conv2d, nn.BatchNorm2d, nn.ReLU): fuse_conv_bn_relu,
+    (nn.Conv3d, nn.BatchNorm3d): fuse_conv_bn,
+    (nn.Conv3d, nn.BatchNorm3d, nn.ReLU): fuse_conv_bn_relu,
+    (nn.Conv1d, nn.ReLU): _sequential_wrapper2(nni.ConvReLU1d),
+    (nn.Conv2d, nn.ReLU): _sequential_wrapper2(nni.ConvReLU2d),
+    (nn.Conv3d, nn.ReLU): _sequential_wrapper2(nni.ConvReLU3d),
+    (nn.Linear, nn.BatchNorm1d): fuse_linear_bn,
+    (nn.Linear, nn.ReLU): _sequential_wrapper2(nni.LinearReLU),
+    (nn.BatchNorm2d, nn.ReLU): _sequential_wrapper2(nni.BNReLU2d),
+    (nn.BatchNorm3d, nn.ReLU): _sequential_wrapper2(nni.BNReLU3d),
+    (nn.ConvTranspose1d, nn.BatchNorm1d): fuse_convtranspose_bn,
+    (nn.ConvTranspose2d, nn.BatchNorm2d): fuse_convtranspose_bn,
+    (nn.ConvTranspose3d, nn.BatchNorm3d): fuse_convtranspose_bn,
+}
+def get_fuser_method(op_list, additional_fuser_method_mapping=None):
+    """Get fuser method for the given list of module types.
+    Get fuser method for the given list of module types,
+    return None if fuser method does not exist
+    """
+    if additional_fuser_method_mapping is None:
+        additional_fuser_method_mapping = {}
+    all_mappings = get_combined_dict(_DEFAULT_OP_LIST_TO_FUSER_METHOD,
+                                     additional_fuser_method_mapping)
+    fuser_method = all_mappings.get(op_list, None)
+    assert fuser_method is not None, f"did not find fuser method for: {op_list} "
+    return fuser_method
+def _reverse2(f):
+    def reversed(is_qat, x, y):
+        return f(is_qat, y, x)
+    return reversed
+def _reverse3(f):
+    def reversed(is_qat, x, w):
+        y, z = w
+        return f(is_qat, z, y, x)
+    return reversed
+def _get_valid_patterns(op_pattern):
+    """Return a list of valid patterns generated from the op_pattern.
+    Returns a list of valid patterns generated from the op_pattern,
+    since MatchAllNode can match all types of nodes,
+    e.g. pattern (torch.nn.Conv2d, torch.add) should also be able to match keys like
+    (MatchAllNode, torch.add) and (torch.nn.Conv2d, MatchAllNode)
+    Example Input:
+    (torch.add, (torch.nn.ReLU, torch.nn.Conv2d))
+    Example Output:
+    [(torch.add, (torch.nn.ReLU, torch.nn.Conv2d)),
+     (torch.add, (torch.nn.ReLU, MatchAllNode)),
+     (torch.add, (MatchAllNode, torch.nn.Conv2d)),
+     (torch.add, (MatchAllNode, MatchAllNode)),
+     (MatchAllNode, (torch.nn.ReLU, torch.nn.Conv2d)),
+     (MatchAllNode, (torch.nn.ReLU, MatchAllNode)),
+     (MatchAllNode, (MatchAllNode, torch.nn.Conv2d)),
+     (MatchAllNode, (MatchAllNode, MatchAllNode)),
+    ]
+    """
+    result: List[Any]
+    if isinstance(op_pattern, (tuple, list)):
+        sub_combs = []
+        for sub_pattern in op_pattern:
+            sub_combs.append(_get_valid_patterns(sub_pattern))
+        result = list(itertools.product(*sub_combs))
+    else:
+        result = [op_pattern, MatchAllNode]
+    return result
+def get_fuser_method_new(
+        op_pattern: Pattern,
+        fuser_method_mapping: Dict[Pattern, Union[nn.Sequential, Callable]]):
+    """Get fuser method.
+    This will be made default after we deprecate the get_fuser_method
+    Would like to implement this first and have a separate PR for deprecation
+    """
+    op_patterns = _get_valid_patterns(op_pattern)
+    fuser_method = None
+    for op_pattern in op_patterns:
+        fuser_method = fuser_method_mapping.get(op_pattern, None)
+        if fuser_method is not None:
+            break
+    assert fuser_method is not None, f"did not find fuser method for: {op_pattern} "
+    return fuser_method

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from .prepare import prepare
+from .convert import convert
+from .fuse import fuse

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/_decomposed.cpython-311.pyc ADDED Viewed

Binary file (46.2 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/_equalize.cpython-311.pyc ADDED Viewed

Binary file (40.1 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/custom_config.cpython-311.pyc ADDED Viewed

Binary file (24.2 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/fuse.cpython-311.pyc ADDED Viewed

Binary file (7.17 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/graph_module.cpython-311.pyc ADDED Viewed

Binary file (10.2 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/lower_to_qnnpack.cpython-311.pyc ADDED Viewed

Binary file (1.02 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/match_utils.cpython-311.pyc ADDED Viewed

Binary file (8.89 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/pattern_utils.cpython-311.pyc ADDED Viewed

Binary file (4.54 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/prepare.cpython-311.pyc ADDED Viewed

Binary file (65.7 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/qconfig_mapping_utils.cpython-311.pyc ADDED Viewed

Binary file (14.8 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/__pycache__/quantize_handler.cpython-311.pyc ADDED Viewed

Binary file (9.98 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_equalize.py ADDED Viewed

	@@ -0,0 +1,820 @@

+import warnings
+from collections import namedtuple
+from typing import Any, Dict, List, Optional, Tuple
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.ao.nn.intrinsic as nni
+from torch.fx import GraphModule
+from torch.fx.graph import Node
+from torch.ao.quantization.fx.graph_module import _get_observed_graph_module_attr
+from ..observer import _with_args, ObserverBase, PerChannelMinMaxObserver
+from ..utils import _parent_name, check_min_max_valid
+from .utils import (
+    get_new_attr_name_with_prefix,
+    maybe_get_next_module,
+    node_arg_is_weight,
+)
+CUSTOM_MODULE_SUPP_LIST: List[Any] = []
+def reshape_scale(scale: torch.Tensor, axis: int, input: torch.Tensor) -> torch.Tensor:
+    """Reshapes the scale so that we can multiply it to the input by the given axis.
+    """
+    new_shape = [1] * input.ndim
+    new_shape[axis] = input.size(axis)
+    return scale.view(new_shape)
+qsheme_mapping_per_tensor_to_per_channel = {
+    torch.per_tensor_affine: torch.per_channel_affine,
+    torch.per_tensor_symmetric: torch.per_channel_symmetric,
+}
+class _InputEqualizationObserver(nn.Module):
+    r"""Observer for tracking the running min/max values of input columns, and
+    computing the quantization parameters for the overall min/max input values.
+    Args:
+        dtype: Quantized data type
+        qscheme: Quantization scheme
+        quant_min: Minimum quantization value. If unspecified, it will
+            follow the 8-bit setup.
+        quant_max: Maximum quantization value. If unspecified, it will
+            follow the 8-bit setup.
+    The running minimum/maximum :math:`x_\text{min/max}` are computed in the
+    same way as :class:`~torch.ao.quantization.observer.PerChannelMinMaxObserver`,
+    with the difference that the running min/max values are stored per column.
+    This observer is intended to be used along with a WeightEqualizationObserver
+    to calculate the equalization scale.
+    """
+    def __init__(self, dtype=torch.quint8, qscheme=torch.per_tensor_affine,
+                 quant_min=None, quant_max=None, factory_kwargs=None) -> None:
+        super().__init__()
+        if qscheme not in {torch.per_tensor_affine, torch.per_tensor_symmetric}:
+            raise TypeError("Input qscheme must be per-tensor")
+        self.dtype = dtype
+        self.qscheme = qscheme
+        per_channel_qscheme = qsheme_mapping_per_tensor_to_per_channel[qscheme]
+        self.input_obs = PerChannelMinMaxObserver(ch_axis=1, dtype=dtype,
+                                                  qscheme=per_channel_qscheme,
+                                                  quant_min=quant_min,
+                                                  quant_max=quant_max,
+                                                  factory_kwargs=factory_kwargs)
+        self.equalization_scale = torch.tensor(1)
+        self.equalization_shape: List[int] = []
+    def forward(self, x_orig):
+        if not (x_orig.ndim >= 2 and x_orig.ndim <= 5):
+            raise ValueError("InputEqualizationObserver only supports Linear and Conv layers")
+        # Calculate the shape needed to reshape the equalization scale later (needed for Conv layers)
+        self.equalization_shape = [1] * x_orig.ndim
+        self.equalization_shape[1] = x_orig.size(1)
+        return self.input_obs(x_orig)
+    def get_input_minmax(self):
+        return (self.input_obs.min_val, self.input_obs.max_val)
+    def set_equalization_scale(self, equalization_scale):
+        # Reshape the equalization scale along axis=1 so that it can be
+        # multiplied with the input along axis=1
+        if equalization_scale.nelement() == 1 and equalization_scale == torch.tensor(1):
+            return
+        self.equalization_scale = torch.reshape(equalization_scale, self.equalization_shape)
+    def calculate_scaled_minmax(self):
+        r""" Returns the scaled min/max inputs
+        """
+        if self.equalization_scale.nelement() == 1 and self.equalization_scale == torch.tensor(1):
+            warnings.warn(
+                "Must call calculate_equalization_scale before calling calculate_scaled_minmax. " +
+                "Will not scale the next quantization observer."
+            )
+            return None, None
+        # Calculate qparams for the scaled min/max inputs
+        # Scale the input by the equalization scale located at the same column
+        # index
+        (min_inputs, max_inputs) = self.get_input_minmax()
+        equalization_scale_reshaped = reshape_scale(self.equalization_scale, 0, min_inputs)
+        min_input_scaled = torch.min(torch.mul(min_inputs, equalization_scale_reshaped))
+        max_input_scaled = torch.max(torch.mul(max_inputs, equalization_scale_reshaped))
+        return min_input_scaled, max_input_scaled
+    with_args = classmethod(_with_args)
+class _WeightEqualizationObserver(nn.Module):
+    r"""Observer for tracking the running min/max values of weight columns and
+    rows, and computing the quantization parameters for the weight rows.
+    Args:
+        dtype: Quantized data type
+        qscheme: Quantization scheme
+        quant_min: Minimum quantization value. If unspecified, it will
+            follow the 8-bit setup.
+        quant_max: Maximum quantization value. If unspecified, it will
+            follow the 8-bit setup.
+    This observer is made up of 1 PerChannelMinMaxObserver `weight_col_obs` used
+    to record the running minimum and maximum of columns of incoming weight
+    tensors. This observer is intended to be used along with an
+    InputEqualizationObserver to calculate the equalization scale.
+    The running minimum/maximum :math:`w_\text{min/max}` are computed in the
+    same way as :class:`~torch.ao.quantization.observer.PerChannelMinMaxObserver`.
+    """
+    def __init__(self, dtype=torch.qint8, qscheme=torch.per_tensor_affine, quant_min=None,
+                 quant_max=None, factory_kwargs=None) -> None:
+        super().__init__()
+        self.dtype = dtype
+        self.qscheme = qscheme
+        self.ch_axis = 1
+        per_channel_qscheme = qscheme
+        if qscheme in {torch.per_tensor_affine, torch.per_tensor_symmetric}:
+            per_channel_qscheme = qsheme_mapping_per_tensor_to_per_channel[qscheme]
+        self.weight_col_obs = PerChannelMinMaxObserver(ch_axis=1, dtype=dtype,
+                                                       qscheme=per_channel_qscheme,
+                                                       quant_min=quant_min,
+                                                       quant_max=quant_max,
+                                                       factory_kwargs=factory_kwargs)
+        self.equalization_scale = torch.tensor(1)
+    def forward(self, w_orig):
+        if not (w_orig.ndim >= 2 and w_orig.ndim <= 5):
+            raise ValueError("InputEqualizationObserver only supports Linear and Conv layers")
+        return self.weight_col_obs(w_orig)
+    def get_weight_col_minmax(self):
+        return (self.weight_col_obs.min_val, self.weight_col_obs.max_val)
+    def set_equalization_scale(self, equalization_scale):
+        self.equalization_scale = equalization_scale
+    with_args = classmethod(_with_args)
+def calculate_equalization_scale(input_obs: _InputEqualizationObserver,
+                                 weight_obs: _WeightEqualizationObserver) -> torch.Tensor:
+    r""" Calculates the equalization scale and sets the equalization_scale value
+    in the observers.
+    Args:
+        input_obs: Observer that tracks the ranges for the input columns
+        weight_obs: Observer that tracks the ranges for the weight columns
+    """
+    (min_inputs, max_inputs) = input_obs.get_input_minmax()
+    (min_weights, max_weights) = weight_obs.get_weight_col_minmax()
+    if not (check_min_max_valid(min_inputs, max_inputs) and check_min_max_valid(min_weights, max_weights)):
+        warnings.warn(
+            "Must run observer before calling calculate_equalization_scale. " +
+            "Returning default equalization scale torch.tensor(1)."
+        )
+        return torch.tensor(1)
+    if not (min_inputs.shape == min_weights.shape):
+        raise ValueError(
+            "Input and Weight must have the same column dimension. " +
+            f"Found {min_inputs.shape} and {min_weights.shape} shapes instead."
+        )
+    equalization_scale = torch.sqrt((max_weights - min_weights) / (max_inputs - min_inputs))
+    # Replace all 'inf', 'nan', 0's with 1s to prevent errors
+    equalization_scale[equalization_scale == 0.] = 1
+    equalization_scale = torch.nan_to_num(equalization_scale, nan=1, posinf=1, neginf=1)
+    return equalization_scale
+class EqualizationQConfig(namedtuple('EqualizationQConfig', ['input_activation', 'weight'])):
+    """
+    Describes how to quantize a layer or a part of the network specifically for
+    input-weight equalization by providing settings (observer classes) for
+    inputs, outputs, and weights.
+    Note that EqualizationQConfig needs to contain observer **classes** (like
+    MinMaxObserver) or a callable that returns instances on invocation, not the
+    concrete observer instances themselves.
+    Quantization function will instantiate observers multiple times for each of
+    the layers.
+    Observer classes have usually reasonable default arguments, but they can be
+    overwritten with `with_args` method (that behaves like functools.partial):
+    my_qconfig = EqualizationQConfig(input_activation=_InputEqualizationObserver.with_args(dtype=torch.qint8),
+                                    weight=_WeightEqualizationObserver.with_args(dtype=torch.qint8))
+    """
+    def __new__(cls, input_activation=torch.nn.Identity, weight=torch.nn.Identity):
+        if isinstance(input_activation, nn.Module) or isinstance(weight, nn.Module):
+            raise ValueError("EqualizationQConfig received observer instance, please pass observer class instead. " +
+                             "Use MyObserver.with_args(x=1) to override arguments to constructor if needed")
+        self = super().__new__(cls, input_activation, weight)
+        return self
+input_equalization_observer = _InputEqualizationObserver.with_args(
+    dtype=torch.quint8, qscheme=torch.per_tensor_symmetric)
+weight_equalization_observer = _WeightEqualizationObserver.with_args(
+    dtype=torch.qint8, qscheme=torch.per_channel_symmetric)
+default_equalization_qconfig = EqualizationQConfig(input_activation=input_equalization_observer,
+                                                   weight=weight_equalization_observer)
+def fused_module_supports_equalization(module) -> bool:
+    """ Checks if the fused node supports equalization. """
+    return type(module) in [nni.LinearReLU, nni.ConvReLU1d, nni.ConvReLU2d, nni.ConvReLU3d]
+def nn_module_supports_equalization(module) -> bool:
+    """ Checks if the torch.nn node supports equalization. """
+    return type(module) in [nn.Linear, nn.Conv1d, nn.Conv2d, nn.Conv3d]
+def custom_module_supports_equalization(module) -> bool:
+    """ Checks if the custom node supports equalization. """
+    return type(module) in CUSTOM_MODULE_SUPP_LIST
+def node_supports_equalization(node: Node, modules) -> bool:
+    """ Checks if the current node supports equalization
+    Currently we only support nn.Linear/F.Linear and nn.Conv/F.conv layers
+    """
+    if node.op == 'call_module':
+        return nn_module_supports_equalization(modules[str(node.target)]) or \
+            fused_module_supports_equalization(modules[str(node.target)]) or \
+            custom_module_supports_equalization(modules[str(node.target)])
+    elif node.op == 'call_function':
+        return node.target in [F.linear, F.conv1d, F.conv2d, F.conv3d]
+    return False
+def is_equalization_observer(observer: nn.Module) -> bool:
+    return (isinstance(observer, (_InputEqualizationObserver, _WeightEqualizationObserver)))
+###############################################################################
+# Functions for equalization during convert                                   #
+###############################################################################
+def get_op_node_and_weight_eq_obs(
+    input_eq_obs_node: Node,
+    model: GraphModule,
+    modules: Dict[str, nn.Module]
+) -> Tuple[Optional[Node], Optional[_WeightEqualizationObserver]]:
+    """ Gets the following weight equalization observer. There should always
+    exist a weight equalization observer after an input equalization observer.
+    Returns the operation node that follows the input equalization observer node
+    and the weight equalization observer
+    """
+    # Find the op node that comes directly after the input equalization observer
+    op_node = None
+    for user in input_eq_obs_node.users.keys():
+        if node_supports_equalization(user, modules):
+            op_node = user
+            break
+    assert op_node is not None
+    if op_node.op == 'call_module':
+        # If the op_node is a nn.Linear layer, then it must have a
+        # WeightEqualizationObserver configuration
+        maybe_equalization_node_name_to_config = _get_observed_graph_module_attr(model, "equalization_node_name_to_qconfig")
+        assert maybe_equalization_node_name_to_config is not None
+        equalization_node_name_to_qconfig: Dict[str, Any] = maybe_equalization_node_name_to_config  # type: ignore[assignment]
+        assert equalization_node_name_to_qconfig.get(op_node.name, None) is not None
+        weight_eq_obs = equalization_node_name_to_qconfig.get(op_node.name, None).weight()
+        assert isinstance(weight_eq_obs, _WeightEqualizationObserver)
+        return op_node, weight_eq_obs
+    elif op_node.op == 'call_function':
+        weight_node = maybe_get_weight_eq_obs_node(op_node, modules)
+        if weight_node is not None:
+            weight_eq_obs = modules[str(weight_node.target)]
+            assert isinstance(weight_eq_obs, _WeightEqualizationObserver)
+            return op_node, weight_eq_obs
+    return None, None
+def maybe_get_weight_eq_obs_node(op_node: Node, modules: Dict[str, nn.Module]) -> Optional[Node]:
+    """ Gets the weight equalization observer node if it exists.
+    """
+    assert op_node.op == 'call_function'
+    for node_arg in op_node.args:
+        if node_arg_is_weight(op_node, node_arg):
+            assert (isinstance(node_arg, Node) and node_arg.op == 'call_module' and
+                   isinstance(modules[str(node_arg.target)], _WeightEqualizationObserver))
+            return node_arg
+    return None
+def maybe_get_next_input_eq_obs(node: Node, modules: Dict[str, nn.Module]) -> Optional[_InputEqualizationObserver]:
+    """ Gets the following input equalization observer if it exists.
+    For example, in the case of connecting linear layers:
+        x -> inp_obs1 -> eq_obs1 -> linear1 -> out_obs1 -> eq_obs2 -> linear2 -> out_obs2
+    If the node being passed in is the linear1 node, then we want to return eq_obs2,
+    the following equalization observer for linear2.
+    However, if there are no connecting layers:
+        x -> inp_obs1 -> eq_obs1 -> linear1 -> out_obs1 -> add
+    Then we want to return None.
+    In the case of an unfused linear-relu layer with a connecting linear layer:
+        linear1 -> relu -> out_obs1 -> eq_obs2 -> linear2 -> out_obs2
+    Since it is unfused, we want to skip over the relu layer and return eq_obs2,
+    the following equalization observer for linear2.
+    """
+    assert node_supports_equalization(node, modules)
+    # Locate the following nn.ReLU or F.relu node if it exists
+    maybe_relu_node = maybe_get_next_module(node, modules, nn.ReLU)
+    if maybe_relu_node is None:
+        maybe_relu_node = maybe_get_next_module(node, modules, target_functional_type=F.relu)
+    # Locate the following output observer if it exists.
+    # We will skip the relu node if it exists.
+    maybe_obs_node = (
+        maybe_get_next_module(node, modules, ObserverBase)
+        if maybe_relu_node is None
+        else maybe_get_next_module(maybe_relu_node, modules, ObserverBase)
+    )
+    if maybe_obs_node is None:
+        return None
+    maybe_eq_obs_node = maybe_get_next_module(maybe_obs_node, modules, _InputEqualizationObserver)
+    if maybe_eq_obs_node is None:
+        return None
+    maybe_eq_obs = modules[str(maybe_eq_obs_node)]
+    assert isinstance(maybe_eq_obs, _InputEqualizationObserver)
+    return maybe_eq_obs
+def maybe_get_next_equalization_scale(node: Node, modules: Dict[str, nn.Module]) -> Optional[torch.Tensor]:
+    """ If the next next node is an InputEqualizationObserver then we want to
+    return its equalization scale, else we return 1
+    This is used in the case where there are two connecting linear layers:
+        linear1 -> LinearOutObs -> InputEqObs -> linear2
+    In this case, the node given is linear1 and we want to locate the InputEqObs.
+    """
+    next_inp_eq_obs = maybe_get_next_input_eq_obs(node, modules)
+    if next_inp_eq_obs:
+        if next_inp_eq_obs.equalization_scale.nelement() == 1 and \
+           next_inp_eq_obs.equalization_scale == torch.tensor(1):
+            return None
+        return next_inp_eq_obs.equalization_scale
+    return None
+def scale_input_observer(node: Node, modules: Dict[str, nn.Module]) -> None:
+    """ Scales the following input quantization observer's min/max values by
+    updating the values with the scaled min/max values calculated by the input
+    equalization observer
+    """
+    input_eq_obs = modules[str(node.target)]
+    assert isinstance(input_eq_obs, _InputEqualizationObserver)
+    input_quant_obs_node = node.args[0]
+    assert isinstance(input_quant_obs_node, Node)
+    input_quant_obs = modules[str(input_quant_obs_node.target)]
+    if not isinstance(input_quant_obs, ObserverBase):
+        return
+    min_input_scaled, max_input_scaled = input_eq_obs.calculate_scaled_minmax()
+    if min_input_scaled is None and max_input_scaled is None:
+        return
+    input_quant_obs.min_val = min_input_scaled
+    input_quant_obs.max_val = max_input_scaled
+def scale_weight_node(
+    node: Node,
+    modules: Dict[str, nn.Module],
+    equalization_scale: torch.Tensor,
+    next_equalization_scale: Optional[torch.Tensor],
+) -> None:
+    """ Scale the weights for input-weight equalization by multiplying the
+    weight by 1/equalization_scale and next_equalization_scale
+    Args:
+        node: Current node whose weights we want to scale
+        equalization_scale: Current node's calculated equalization scale
+        next_equalization_scale: Next node's calculated equalization scale if
+           the following node needs to be equalized, 1 otherwise
+    """
+    if equalization_scale is None:
+        return
+    if fused_module_supports_equalization(modules[str(node.target)]):
+        op_module = modules[str(node.target)][0]    # type: ignore[index]
+    else:
+        op_module = modules[str(node.target)]
+    assert nn_module_supports_equalization(op_module) or custom_module_supports_equalization(op_module)
+    # Scale the weights for input-weight equalization
+    # If the following layer needs to be equalized then we will multiply its scale
+    weight = op_module.weight
+    assert isinstance(weight, torch.Tensor)
+    # Scale the weights by the reciprocal of the equalization scale
+    # Reshape the equalization scale so that we can multiply it to the weight along axis=1
+    equalization_scale_reshaped = reshape_scale(equalization_scale, 1, weight)
+    scaled_weight = torch.mul(weight, torch.reciprocal(equalization_scale_reshaped))
+    if next_equalization_scale is None:
+        op_module.weight = nn.Parameter(scaled_weight)
+        return
+    # Multiply the weights row wise by the next equalization scale
+    # Reshape the equalization scale so that we can multiply it to the weight along axis=0
+    next_equalization_scale_reshaped = reshape_scale(next_equalization_scale, 0, weight)
+    scaled_weight = torch.mul(scaled_weight, next_equalization_scale_reshaped)
+    op_module.weight = nn.Parameter(scaled_weight)
+    # Multiply the bias element wise by the next equalization scale
+    bias = op_module.bias
+    if bias is None:
+        return
+    assert isinstance(bias, torch.Tensor)
+    # Reshape the equalization scale so that we can multiply it element-wise to the bias
+    next_equalization_scale_reshaped = reshape_scale(next_equalization_scale, 0, bias)
+    scaled_bias = torch.mul(bias, next_equalization_scale_reshaped)
+    op_module.bias = nn.Parameter(scaled_bias)
+def scale_weight_functional(
+    op_node: Node,
+    model: GraphModule,
+    modules: Dict[str, nn.Module],
+    equalization_scale: torch.Tensor,
+    next_equalization_scale: Optional[torch.Tensor],
+) -> None:
+    """ Scales the weight value for functional layers
+    """
+    if equalization_scale is None:
+        return
+    # From the given op_node, the path looks like:
+    #   get_attr(weight) -> weight_quant_obs -> weight_eq_obs -> op_node
+    # So we want to trace back from the op_node to get the equalization observer
+    # node, then the quantization observer node, and then finally the weight
+    # node which contains the weight values.
+    # Get the equalization observer node
+    weight_eq_obs_node = maybe_get_weight_eq_obs_node(op_node, modules)
+    if weight_eq_obs_node is None:
+        return
+    # Get the quantization observer node
+    weight_quant_obs_node = weight_eq_obs_node.args[0]
+    if weight_quant_obs_node is None:
+        return
+    assert (isinstance(weight_quant_obs_node, Node) and
+           isinstance(modules[str(weight_quant_obs_node.target)], ObserverBase))
+    # Get the get_attr(weight) node
+    weight_node = weight_quant_obs_node.args[0]
+    if weight_node is None:
+        return
+    assert isinstance(weight_node, Node) and weight_node.op == 'get_attr'
+    weight_parent_name, weight_name = _parent_name(weight_node.target)
+    weight = getattr(modules[weight_parent_name], weight_name)
+    # Scale the weights for input-weight equalization
+    # If the following layer needs to be equalized then we will multiply its scale
+    # Reshape the equalization scale so that we can multiply it to the weight along axis=1
+    equalization_scale_reshaped = reshape_scale(equalization_scale, 1, weight)
+    scaled_weight = torch.mul(weight, torch.reciprocal(equalization_scale_reshaped))
+    if next_equalization_scale is None:
+        setattr(modules[weight_parent_name], weight_name, scaled_weight)
+        return
+    # Multiply the weights row wise by the next equalization scale
+    # Reshape the equalization scale so that we can multiply it to the weight along axis=1
+    next_equalization_scale_reshaped = reshape_scale(next_equalization_scale, 0, scaled_weight)
+    scaled_weight = torch.mul(scaled_weight, next_equalization_scale_reshaped)
+    setattr(modules[weight_parent_name], weight_name, scaled_weight)
+    assert torch.allclose(model.get_buffer(str(weight_node.target)), scaled_weight)
+    # Multiply the bias element wise by the next equalization scale
+    bias_node = None
+    for node in op_node.args:
+        # Find the node containing the weight values
+        if isinstance(node, Node) and node.op == 'get_attr' and 'bias' in node.name:
+            bias_node = node
+            break
+    if bias_node is None:
+        return
+    bias_parent_name, bias_name = _parent_name(bias_node.target)
+    bias = getattr(modules[bias_parent_name], bias_name)
+    # Reshape the equalization scale so that we can multiply it element-wise to the bias
+    next_equalization_scale_reshaped = reshape_scale(next_equalization_scale, 0, bias)
+    scaled_bias = torch.mul(bias, next_equalization_scale_reshaped)
+    setattr(modules[bias_parent_name], bias_name, scaled_bias)
+def clear_weight_quant_obs_node(op_node: Node, modules: Dict[str, nn.Module]) -> None:
+    """ Given the operation node, we want find the corresponding quantization
+    observer and reset its min/max values
+    """
+    weight_eq_obs_node = maybe_get_weight_eq_obs_node(op_node, modules)
+    if weight_eq_obs_node is None:
+        return
+    weight_quant_obs_node = weight_eq_obs_node.args[0]
+    if weight_quant_obs_node is None:
+        return
+    assert isinstance(weight_quant_obs_node, Node)
+    weight_quant_obs = modules[str(weight_quant_obs_node.target)]
+    assert isinstance(modules[str(weight_quant_obs_node.target)], ObserverBase)
+    weight_quant_obs.reset_min_max_vals()   # type: ignore[operator]
+def remove_node(model: GraphModule, node: Node, prev_node: Node):
+    """ Removes the given node from the model by replacing all of its users with
+    the given previous node
+    """
+    # For all of the current node's users, replace the current node with
+    # the input quantization observer node
+    orig_users = list(node.users.keys())
+    for user_node in orig_users:
+        user_node.replace_input_with(node, prev_node)
+    # Erase the InputEqualizationObserver node
+    model.graph.erase_node(node)
+def update_obs_for_equalization(model: GraphModule, modules: Dict[str, nn.Module]) -> Dict[str, _WeightEqualizationObserver]:
+    """ Update all of the observer's equalization scale. For each
+    InputEqualizationObserver, we will find the location of the next
+    WeightEqualizationObserver, create it, and calculate the equalization scale
+    based on the two observers.
+    We will then return a dictionary mapping operation node names to
+    the corresponding WeightEqualizationObservers for that operation.
+    """
+    weight_eq_obs_dict = {}
+    for node in model.graph.nodes:
+        if node.op == 'call_module' and isinstance(modules[node.target], _InputEqualizationObserver):
+            input_eq_obs = modules[node.target]
+            assert isinstance(input_eq_obs, _InputEqualizationObserver)
+            op_node, weight_eq_obs = get_op_node_and_weight_eq_obs(node, model, modules)
+            if op_node is None or weight_eq_obs is None:
+                continue
+            if op_node.op == 'call_module':
+                # Calibrate the weight equalization observer since it has just
+                # been created
+                if fused_module_supports_equalization(modules[str(op_node.target)]):
+                    module = modules[str(op_node.target)][0]   # type: ignore[index]
+                    assert nn_module_supports_equalization(module)
+                    weight_eq_obs(module.weight)
+                else:
+                    weight_eq_obs(modules[str(op_node.target)].weight)
+            # Calculate and set the equalization scale values
+            equalization_scale = calculate_equalization_scale(input_eq_obs, weight_eq_obs)
+            input_eq_obs.set_equalization_scale(equalization_scale)
+            weight_eq_obs.set_equalization_scale(equalization_scale)
+            weight_eq_obs_dict[op_node.name] = weight_eq_obs
+    return weight_eq_obs_dict
+def convert_eq_obs(
+    model: GraphModule,
+    modules: Dict[str, nn.Module],
+    weight_eq_obs_dict: Dict[str, _WeightEqualizationObserver],
+) -> None:
+    """ Converts the equalization operations and updates the other nodes in the
+    following way:
+        - Removes the input equalization observers and inserts a mul operator
+          along with an equalization scale node wherever applicable (we do not
+          want to insert a mul operator between connecting linear layers).
+        - Updates the input quantization observers with the scaled input min/max
+          values.
+        - Scales the weights by the current and next equalization scales.
+        - Removes the weight equalization observer node if it exists.
+    Before (after prepare):
+                                    weight values
+                                          |
+                                    WeightQuantObs
+                                          |
+                                      WeightEqObs
+                                          |
+        x -> InpQuantObs -> InpEqObs -> linear -> OutQuantObs
+    After this function:
+                                              scaled weight values
+                                                      |
+       equalization scale                       WeightQuantObs
+              |                                       |
+        x -> mul -> InpQuantObs (scaled min/max) -> linear -> OutQuantObs
+    After convert:
+       equalization scale                 scaled weight values
+              |                                    |
+        x -> mul -> quantize_per_tensor -> quantized::linear
+    Note that although the equalization observer appeared after the quantization
+    observer after prepare_fx, the mul node appears before the quantization node
+    after convert_fx. This is because placing the equalization observer after
+    the quantization observer in prepare_fx would allow us to keep the invariant
+    that the graph before the current node inserts its observers is not
+    modified.
+    Having the equalization observer before the quantization observer would also
+    cause some inconsistences between the ordering of the quantization and
+    equalization observers.
+    For example, a single linear layer would look like:
+        x -> InpEqObs1 -> InpQuantObs1 -> linear1 -> OutQuantObs1
+    But between two connected linear layers, it would look like:
+        linear1 -> OutQuantObs1 -> InpEqObs2 -> linear2 -> OutQuantObs2
+    """
+    for node in model.graph.nodes:
+        if node.op == 'call_module' and isinstance(modules[node.target], _InputEqualizationObserver):
+            inp_quant_obs_node = node.args[0]
+            prev_node = inp_quant_obs_node.args[0]
+            # If the previous node is a layer that needs to be equalized, then
+            # we will remove the current node because we do not need to add any
+            # equalization nodes between two layers that need to be equalized
+            # Before: linear1/relu (prev_node) -> output_quant_obs1 (inp_quant_obs_node) -> input_eq_obs2 (node) -> linear2
+            # After: linear1/relu (prev_node) -> output_quant_obs1 (inp_quant_obs_node) -> linear2
+            if node_supports_equalization(prev_node, modules) or "relu" in prev_node.name:
+                remove_node(model, node, inp_quant_obs_node)
+                continue
+            # Update the following input quantization observer's min/max values
+            scale_input_observer(node, modules)
+            # Remove the InputEqualization node and add a mul operator before
+            # the quantization observer node that appears before the equalization node
+            # Before: x -> input_quant_obs -> input_eq_obs -> linear
+            # After: x -> mul -> input_quant_obs -> linear
+            # Create a node containing the equalization scale
+            with model.graph.inserting_before(inp_quant_obs_node):
+                get_new_eq_scale_name = get_new_attr_name_with_prefix(prev_node.name + '_equalization_scale')
+                name = get_new_eq_scale_name(modules)
+                setattr(model, name, modules[node.target].equalization_scale)
+                eq_scale_node = model.graph.create_node('get_attr', name)
+            # Create a node multiplying the input with the equalization scale
+            with model.graph.inserting_after(eq_scale_node):
+                inputs = (prev_node, eq_scale_node)
+                mul_node = model.graph.create_node("call_function", torch.mul, inputs)
+            # Set the mul nod to be the input_quant_obs_node's input instead of
+            # the previous node
+            inp_quant_obs_node.replace_input_with(prev_node, mul_node)
+            remove_node(model, node, inp_quant_obs_node)
+        elif weight_eq_obs_dict.get(node.name, None) is not None:
+            weight_eq_obs = weight_eq_obs_dict.get(node.name)
+            assert isinstance(weight_eq_obs, _WeightEqualizationObserver)
+            equalization_scale = weight_eq_obs.equalization_scale
+            if equalization_scale.nelement() == 1 and equalization_scale == torch.tensor(1):
+                equalization_scale = None  # type: ignore[assignment]
+            maybe_next_equalization_scale = maybe_get_next_equalization_scale(node, modules)
+            # Scale the weight nodes
+            if node.op == 'call_module':
+                scale_weight_node(node, modules, equalization_scale, maybe_next_equalization_scale)
+            elif node.op == 'call_function':
+                scale_weight_functional(node, model, modules, equalization_scale, maybe_next_equalization_scale)
+                weight_eq_obs_node = maybe_get_weight_eq_obs_node(node, modules)
+                if weight_eq_obs_node is None:
+                    return
+                assert isinstance(modules[str(weight_eq_obs_node.target)], _WeightEqualizationObserver)
+                # Clear the quantization observer's min/max values so that they
+                # can get updated later based on the new scale values
+                clear_weight_quant_obs_node(node, modules)
+                # Erase the weight equalization observer node
+                prev_node = weight_eq_obs_node.args[0]
+                remove_node(model, weight_eq_obs_node, prev_node)
+            else:
+                raise ValueError("Expected operation node to be 'call_module' or 'call_function" +
+                                 f"Instead got node {node.name} as '{node.op}'.")
+def _convert_equalization_ref(model: GraphModule):
+    """ Reference function which applies changes needed for equalization, but
+    does not quantize the nodes
+    """
+    modules = dict(model.named_modules(remove_duplicate=False))
+    # Calculate the equalization scale, update the observers with the scaled
+    # inputs, and scale the weight
+    weight_eq_obs_dict = update_obs_for_equalization(model, modules)
+    convert_eq_obs(model, modules, weight_eq_obs_dict)
+    return GraphModule(model, model.graph)
+###############################################################################
+# Functions for running the equalized model on the Numeric Suite              #
+###############################################################################
+def get_layer_sqnr_dict(model_a: nn.Module, model_b: nn.Module, x: torch.Tensor) -> Dict[str, float]:
+    """ Runs the Numeric Suite on model_a and model_b and returns a dictionary
+    containing the SQNR between layers in model_a and model_b.
+    Note: In order to support equalized models, this function has a hacky fix in
+    which we do not match any torch.mul operators. This is because equalized
+    models contain extra mul operators to scale the input by the equalization
+    scale, but this edge case has not been resolved yet within the numeric suite code.
+    Args:
+        model_a: A float model
+        model_b: A quantized model
+        x: Inputs to use during calibration
+    """
+    import torch.ao.ns._numeric_suite_fx as ns
+    from torch.ao.ns.fx.mappings import get_unmatchable_types_map
+    unmatchable_types_map = get_unmatchable_types_map()
+    unmatchable_types_map["funs_unmatchable"].add(torch.mul)
+    model_a_ns, model_b_ns = ns.add_loggers(
+        'fp32', model_a,
+        'int8', model_b,
+        ns.OutputLogger,
+        unmatchable_types_map=unmatchable_types_map
+    )
+    model_a_ns(x)
+    model_b_ns(x)
+    activation_comparison_dict = ns.extract_logger_info(
+        model_a_ns,
+        model_b_ns,
+        ns.OutputLogger,
+        'int8')
+    ns.extend_logger_results_with_comparison(
+        activation_comparison_dict,
+        'fp32', 'int8',
+        torch.ao.ns.fx.utils.compute_sqnr, 'sqnr'
+    )
+    # Construct a dictionary mapping layer names to the SQNR values
+    layer_sqnr_dict = {}
+    for key in activation_comparison_dict:
+        layer = activation_comparison_dict[key]['node_output']['int8'][0]['fqn']
+        sqnr = activation_comparison_dict[key]['node_output']['int8'][0]['sqnr'][0]
+        layer_sqnr_dict[layer] = sqnr
+    return layer_sqnr_dict
+def get_equalization_qconfig_dict(
+    layer_sqnr_dict: Dict[str, float],
+    num_layers_to_equalize: int
+) -> Any:
+    """ Given the layer to SQNR dictionary, find the layers with the highest
+    quantization errors, and return an equalization_qconfig_dict
+    specifying to only equalize those top layers.
+    Args:
+        layer_sqnr_dict: Dictionary mapping layer names to SQNR values (found
+            when comparing an equalized model against a float model)
+        num_layers_to_equalize: Number of layers with the highest quantization
+           errors to equalize
+    """
+    # Sort the layer_sqnr_dictionary values and get the layers with the lowest
+    # SQNR values (aka highest quantization errors)
+    layer_sqnr_sorted = sorted(layer_sqnr_dict.items(), key=lambda item: item[1])
+    layers_to_equalize = layer_sqnr_sorted[:num_layers_to_equalize]
+    # Constructs an equalization_qconfig_dict that specifies to only equalize
+    # the layers with the highest quantization errors
+    module_to_qconfig_list = [(item[0], default_equalization_qconfig) for item in layers_to_equalize]
+    equalization_qconfig_dict = {"module_name": module_to_qconfig_list}
+    return equalization_qconfig_dict

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (239 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/__pycache__/detector.cpython-311.pyc ADDED Viewed

Binary file (66.2 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/__pycache__/model_report.cpython-311.pyc ADDED Viewed

Binary file (26.7 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/__pycache__/model_report_observer.cpython-311.pyc ADDED Viewed

Binary file (13.6 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/__pycache__/model_report_visualizer.cpython-311.pyc ADDED Viewed

Binary file (29.8 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/detector.py ADDED Viewed

	@@ -0,0 +1,1539 @@

+from typing import Any, Dict, Set, Tuple, Callable, List
+import torch
+import torch.nn as nn
+import torch.ao.nn.qat as nnqat
+from abc import ABC, abstractmethod
+from torch.ao.quantization.fake_quantize import FakeQuantize
+from torch.ao.quantization.fx.graph_module import GraphModule
+from torch.ao.quantization.fx._model_report.model_report_observer import ModelReportObserver
+from torch.ao.quantization.qconfig import (
+    QConfig,
+    default_qconfig,
+    _assert_valid_qconfig,
+)
+from torch.ao.quantization.observer import (
+    ObserverBase,
+    default_dynamic_quant_observer,
+    default_per_channel_weight_observer,
+    default_observer,
+    default_weight_observer,
+)
+from torch.ao.quantization.fx._equalize import (
+    default_equalization_qconfig,
+    EqualizationQConfig,
+)
+from torch.ao.quantization.observer import _is_activation_post_process
+# Names for observer insert keys
+DETECTOR_TARGET_NODE_KEY = "target_node"
+DETECTOR_OBS_TO_INSERT_KEY = "observer_to_insert"
+DETECTOR_IS_POST_OBS_KEY = "is_post_observer"
+DETECTOR_OBS_ARGS_KEY = "observer_args"
+# Mapping related code
+class DetectorQConfigInfo:
+    r"""
+    This class contains the QConfig information for a single module.
+    The list of variables / values this contains can grow depending on the
+    extensibility of the qconfig mapping feature set but this currently includes:
+    - if activation observer is dynamic
+    - if weight observer is per channel
+    Args:
+        module_fqn (str): The fully qualified name (fqn) of the module that this
+            information contains info relevant to qconfig for
+    """
+    def __init__(self, module_fqn: str):
+        super().__init__()
+        self.module_fqn = module_fqn
+        # populate this section with all the variables we might find important
+        # change from none if your detector is actually using this
+        self.is_activation_dynamic = False
+        self.is_weight_per_channel = False
+        # equalization related options
+        self.is_equalization_recommended = False
+    def generate_quantization_qconfig(self, module: torch.nn.Module) -> QConfig:
+        r"""
+        Args:
+            module (torch.nn.Module) The module we are generating
+            the qconfig for
+        Returns the generated quantization QConfig according to what a valid configuration is
+        """
+        # Apply suggestions to new qconfig
+        module_qconfig = default_qconfig
+        # keep track of dynamic and per_channel recommendations
+        recommendations_list = []
+        # append as if a list of combinations
+        recommendations_list.append((self.is_activation_dynamic, self.is_weight_per_channel))
+        recommendations_list.append((self.is_activation_dynamic, False))  # only trying dynamic rec
+        recommendations_list.append((False, self.is_weight_per_channel))  # only trying dynamic
+        # now we try each of the combinations
+        for rec in recommendations_list:
+            # rec[0] -> dynamic recommended
+            # rec[1] -> per channel recommended
+            activation = default_dynamic_quant_observer if rec[0] else default_observer
+            weight = default_per_channel_weight_observer if rec[1] else default_weight_observer
+            test_config = QConfig(activation, weight)
+            try:
+                _assert_valid_qconfig(test_config, module)
+                module_qconfig = test_config
+                break
+            except AssertionError:
+                # if not a valid configuration, we move on to the next one in priority
+                continue
+        # return the QConfig chosen
+        return module_qconfig
+    def generate_equalization_qconfig(self) -> EqualizationQConfig:
+        r"""
+        This returns the equalization configuration for a module.
+        For now, it just returns the default, but as more equalization options become
+        possible, this method can get more fleshed out with more nuanced granularity.
+        Returns the generated equalization QConfig according to what a valid configuration is
+        """
+        # in this case, we just return default equalization config
+        # we know this is valid because only valid modules would even
+        # have this option
+        return default_equalization_qconfig
+# Adding base class for detectors
+class DetectorBase(ABC):
+    r""" Base Detector Module
+    Any detector class should derive from this class.
+    Concrete detectors should follow the same general API, which includes:
+    - A method to calculate and return observer insertion points
+        - Should return both the fqns and the Observer class to insert
+    - A method to return a report based on the detector
+        - Should return a str-based report and dict info in Tuple[str,Dict] format
+    """
+    def __init__(self):
+        super().__init__()
+        self.detector_config_info = None
+    @abstractmethod
+    def determine_observer_insert_points(self, model) -> Dict:
+        r"""
+        Args
+            model (nn.Module or subclass): model to find observer insertion points
+        Returns a Dict mapping from unique observer fqns (where we want to insert them) to a Dict.
+            This dict maps string keys to detector specific information
+        """
+        pass
+    @abstractmethod
+    def get_detector_name(self) -> str:
+        r""" Returns the name of the current detector """
+        pass
+    @abstractmethod
+    def get_qconfig_info(self, model) -> Dict[str, DetectorQConfigInfo]:
+        r""" Returns the DetectorQConfigInfo for each module_fqn relevant
+        Args
+            model (nn.Module or subclass): model to find observer insertion points
+        Returns a Dict mapping from unique observer fqns (where we want to insert them) to:
+            A DetectorQConfigInfo with the information to generate a QConfig for a specific module
+        """
+        pass
+    def _get_targeting_node(self, prepared_fx_model: GraphModule, target_fqn: str) -> torch.fx.node.Node:
+        r"""
+        Takes in a GraphModule and the target_fqn and finds the node whose target is this fqn.
+        If it's not found, it means it is most likely inside a fused layer
+            We just go one layer up in terms of the fqn we are searching for until we find parent node
+            If we get to empty string, then we know that it doesn't exist
+        The reason for the recursion is that if the model that we are looking for got fused,
+        we will have module fqn as e.g. x.linear.0 but the graph will only have a node for the fused module,
+        which would have fqn as x.linear so they will not match.
+        To handle this, if we don't match, we then take off the last bit of the fqn e.g. x.linear.0 -> x.linear,
+        or more generally foo.bar.baz -> foo.bar and search again, this will allow us to locate the correct module
+        even in cases with fusion
+        Args:
+            prepared_fx_model (GraphModule):  The prepared Fx GraphModule
+            target_fqn (str): The fqn of the layer we are trying to target
+        Returns the node object we are trying to add observers around
+        """
+        for node in prepared_fx_model.graph.nodes:
+            # if the node's target is our target, return it
+            if node.target == target_fqn:
+                return node
+        # getting here means node not found
+        # if no "." we are already at base and failed
+        parent_fqn_sep_index = target_fqn.rfind(".")
+        if parent_fqn_sep_index == -1:
+            raise ValueError("passed in target_fqn not found in graph's targets.")
+        else:
+            # recursively call it with parent fqn
+            return self._get_targeting_node(prepared_fx_model, target_fqn[:parent_fqn_sep_index])
+    @abstractmethod
+    def generate_detector_report(self, model) -> Tuple[str, Dict[str, Any]]:
+        r"""
+        Args
+            model (nn.Module or subclass): model to find observer insertion points
+        Returns a Tuple of two elements:
+            Str: string report of the suggested improvements
+            Dict: contains useful data collected by the observer pertinent to this report
+        """
+        pass
+class PerChannelDetector(DetectorBase):
+    r""" This class is used to detect if any Linear or Conv layers in a model utilize per_channel quantization.
+        Only Linear and Conv layers can use per_channel as of now so only these two are currently checked.
+        per_channel quantization can lead to major benefits in the form of accuracy.
+        Therefore, if the backend used by the user supports it, it is recommended to use
+        Args:
+            backend (str, optional): the backend the user wishes to use in production
+                Default value is current torch.backends.quantized.engine
+    """
+    # Keys for return dictionary
+    BACKEND_KEY = "backend"
+    PER_CHAN_SUPPORTED_KEY = "per_channel_quantization_supported"
+    PER_CHAN_USED_KEY = "per_channel_quantization_used"
+    # Default map for representing supported per channel quantization modules for different backends
+    DEFAULT_BACKEND_PER_CHANNEL_SUPPORTED_MODULES: Dict[str, Set[Any]] = {
+        "fbgemm": {nn.Linear, nn.Conv1d, nn.Conv2d, nn.Conv3d, nnqat.Linear, nnqat.Conv1d, nnqat.Conv2d, nnqat.Conv3d},
+        "qnnpack": {nn.Linear, nn.Conv1d, nn.Conv2d, nn.Conv3d, nnqat.Linear, nnqat.Conv1d, nnqat.Conv2d, nnqat.Conv3d},
+        "onednn": {nn.Linear, nn.Conv1d, nn.Conv2d, nn.Conv3d, nnqat.Linear, nnqat.Conv1d, nnqat.Conv2d, nnqat.Conv3d},
+        "x86": {nn.Linear, nn.Conv1d, nn.Conv2d, nn.Conv3d, nnqat.Linear, nnqat.Conv1d, nnqat.Conv2d, nnqat.Conv3d},
+    }
+    def __init__(self, backend: str = torch.backends.quantized.engine):
+        super().__init__()
+        # store the backend information
+        self.backend_chosen = backend
+        self.supported_modules = set()
+        if self.backend_chosen in self.DEFAULT_BACKEND_PER_CHANNEL_SUPPORTED_MODULES:
+            self.supported_modules = self.DEFAULT_BACKEND_PER_CHANNEL_SUPPORTED_MODULES[self.backend_chosen]
+        else:
+            raise ValueError(f"Not configured to work with {self.backend_chosen}. Try a different default backend")
+    def get_detector_name(self) -> str:
+        r""" returns the string name of this detector"""
+        return "per_channel_detector"
+    def get_qconfig_info(self, model) -> Dict[str, DetectorQConfigInfo]:
+        r""" Returns the DetectorQConfigInfo for each module_fqn relevant
+        Args
+            model (nn.Module or subclass): model to find observer insertion points
+        Returns a Dict mapping from unique observer fqns (where we want to insert them) to:
+            A DetectorQConfigInfo with the information to generate a QConfig for a specific module
+        """
+        # run the helper function to populate the dictionary
+        per_channel_info = self._detect_per_channel_helper(model)
+        # we actually have a qconfig info object we are populating
+        module_fqn_to_detector_qconfig_info = {}
+        for module_fqn in per_channel_info:
+            # create a detector info instance
+            detector_qconfig_info = DetectorQConfigInfo(module_fqn)
+            # see if per channel quantization is supported
+            per_chan_supported: bool = per_channel_info[module_fqn][self.PER_CHAN_SUPPORTED_KEY]
+            detector_qconfig_info.is_weight_per_channel = per_chan_supported
+            module_fqn_to_detector_qconfig_info[module_fqn] = detector_qconfig_info
+        return module_fqn_to_detector_qconfig_info
+    def determine_observer_insert_points(self, model: nn.Module) -> Dict:
+        r"""
+        There is no observers inserted for the PerChannelDetector.
+        Returns an empty dictionary since no observers are added or needed
+        """
+        return {}
+    def _detect_per_channel_helper(self, model: nn.Module):
+        r"""
+        determines if per_channel quantization is supported in modules and submodules.
+        Returns a dictionary in the higher level _detect_per_channel function.
+        Each entry maps the fully-qualified-name to information on whether per_channel quantization.
+        Args:
+            model: The current module that is being checked to see if it is per_channel quantizable
+        Returns dictionary mapping fqns to if per_channel quantization is possible
+        """
+        # create dict we will return
+        per_channel_info: Dict = {}
+        # get the fully qualified name and check if in list of modules to include and list of modules to ignore
+        for fqn, module in model.named_modules():
+            is_in_include_list = sum([isinstance(module, x) for x in self.supported_modules]) > 0
+            # check if the module per_channel is supported
+            # based on backend
+            per_channel_supported = False
+            if is_in_include_list:
+                per_channel_supported = True
+                # assert statement for MyPy
+                q_config_file = module.qconfig
+                assert isinstance(q_config_file, QConfig)
+                # this object should either be fake quant or observer
+                q_or_s_obj = module.qconfig.weight.p.func()
+                assert isinstance(q_or_s_obj, (FakeQuantize, ObserverBase))
+                per_channel_used = False  # will be true if found in qconfig
+                if hasattr(q_or_s_obj, "ch_axis"):  # then we know that per_channel quantization used
+                    # all fake quants have channel axis so need to check is_per_channel
+                    if isinstance(q_or_s_obj, FakeQuantize):
+                        if hasattr(q_or_s_obj, "is_per_channel") and q_or_s_obj.is_per_channel:
+                            per_channel_used = True
+                    elif isinstance(q_or_s_obj, ObserverBase):
+                        # should be an observer otherwise
+                        per_channel_used = True
+                    else:
+                        raise ValueError("Should be either observer or fake quant")
+                per_channel_info[fqn] = {
+                    self.PER_CHAN_SUPPORTED_KEY: per_channel_supported,
+                    self.PER_CHAN_USED_KEY: per_channel_used,
+                    self.BACKEND_KEY: self.backend_chosen
+                }
+        return per_channel_info
+    def generate_detector_report(self, model: nn.Module) -> Tuple[str, Dict[str, Any]]:
+        r"""Checks if any Linear or Conv layers in the model utilize per_channel quantization.
+        Only Linear and Conv layers can use per_channel as of now so only these two are currently checked.
+        Looks at q_config format and backend to determine if per_channel can be utilized.
+        Uses the DEFAULT_BACKEND_PER_CHANNEL_SUPPORTED_MODULES structure to determine support
+        Args:
+            model: The prepared and calibrated model we want to check if using per_channel
+        Returns a tuple with two elements:
+            String report of potential actions to improve model (if per_channel quantization is available in backend)
+            Dictionary mapping per_channel quantizable elements to:
+                whether per_channel quantization is supported by the backend
+                if it is being utilized in the current model
+        """
+        # run the helper function to populate the dictionary
+        per_channel_info = self._detect_per_channel_helper(model)
+        # String to let the user know of further optimizations
+        further_optims_str = f"Further Optimizations for backend {self.backend_chosen}: \n"
+        optimizations_possible = False
+        for fqn in per_channel_info:
+            fqn_dict = per_channel_info[fqn]
+            if fqn_dict[self.PER_CHAN_SUPPORTED_KEY] and not fqn_dict[self.PER_CHAN_USED_KEY]:
+                optimizations_possible = True
+                further_optims_str += f"Module {fqn} can be configured to use per_channel quantization.\n"
+        if optimizations_possible:
+            further_optims_str += (
+                "To use per_channel quantization, make sure the qconfig has a per_channel weight observer."
+            )
+        else:
+            further_optims_str += "No further per_channel optimizations possible."
+        # return the string and the dictionary form of same information
+        return (further_optims_str, per_channel_info)
+class DynamicStaticDetector(DetectorBase):
+    r"""
+    Determines whether dynamic or static quantization is more appropriate for a given module.
+    Takes advantage of the ModelReportObserver that records range information.
+    Stationary distribution of data are strictly above tolerance level for the comparison statistic:
+        S = average_batch_activation_range/epoch_activation_range
+    Nonstationary distributions are below or at the tolerance level for this metric.
+    If the distribution of data right after the module is non-stationary, recommend dynamic quantization
+        Otherwise recommend static quantization
+    Args:
+        tolerance (float, optional): The threshold where S metric is stationary above and non-stationary otherwise. Default: 0.5
+    """
+    # names for the pre and post observers that are inserted
+    DEFAULT_PRE_OBSERVER_NAME = "model_report_pre_observer"
+    DEFAULT_POST_OBSERVER_NAME = "model_report_post_observer"
+    # naming conventions for stationary vs non-stationary data
+    STATIONARY_STR = "stationary"
+    NON_STATIONARY_STR = "non-stationary"
+    # naming for activation
+    INPUT_ACTIVATION_PREFIX = "input_activation_"
+    OUTPUT_ACTIVATION_PREFIX = "output_activation_"
+    # naming conventions for the keys of the return module info
+    TOLERANCE_KEY = "dynamic_static_tolerance"
+    DEFAULT_DYNAMIC_REC_KEY = "dynamic_recommended"
+    PRE_OBS_COMP_STAT_KEY = INPUT_ACTIVATION_PREFIX + "dynamic_static_comp_stat"
+    POST_OBS_COMP_STAT_KEY = OUTPUT_ACTIVATION_PREFIX + "dynamic_static_comp_stat"
+    PRE_OBS_DATA_DIST_KEY = INPUT_ACTIVATION_PREFIX + "dynamic_static_data_classification"
+    POST_OBS_DATA_DIST_KEY = OUTPUT_ACTIVATION_PREFIX + "dynamic_static_data_classification"
+    IS_CURRENTLY_SUPPORTED_KEY = "is_dynamic_supported"
+    # modules that are supported both dynamic and static for this report function
+    DEFAULT_DYNAMIC_STATIC_CHECK_SUPPORTED = {nn.Linear}
+    # modules that will be supported soon for both
+    DEFAULT_DYNAMIC_STATIC_FUTURE_SUPPORTED = {nn.Conv1d, nn.Conv2d, nn.Conv3d}
+    def __init__(self, tolerance=0.5):
+        super().__init__()
+        # set tolerance level and initialize a set to keep track of useful fqn locations
+        self.tolerance = tolerance
+        self.useful_observer_fqns: Set[str] = set()
+    def determine_observer_insert_points(self, prepared_fx_model: GraphModule) -> Dict[str, Dict[str, Any]]:
+        r"""
+        Determines where observers need to be inserted for the Dynamic vs Static detector.
+        For this detector, we want to place observers on either side of linear layers in the model.
+        Currently inserts observers for:
+            linear layers
+        Args:
+            prepared_fx_model (GraphModule):  The prepared Fx GraphModule
+        Returns a Dict mapping from unique observer fqns (where we want to insert them) to a Dict with:
+            key "target_node" -> the node we are trying to observe with this observer (torch.fx.node.Node)
+            key "observer_to_insert" -> the observer we wish to insert (ObserverBase)
+            key "is_post_observer" -> True if this is meant to be a post-observer for target_node, False if pre-observer
+            key "observer_args" -> The arguments that are meant to be passed into the observer
+        """
+        # observer for this detector is ModelReportObserver
+        obs_ctr = ModelReportObserver
+        # return dict
+        obs_fqn_to_info: Dict[str, Dict[str, Any]] = {}
+        for fqn, module in prepared_fx_model.named_modules():
+            # make sure module is supported
+            if self._is_supported(module, insert=True):
+                # if it's a supported type, we want to get node and add observer insert locations
+                targeted_node = self._get_targeting_node(prepared_fx_model, fqn)
+                # add entry for pre-observer
+                pre_obs_fqn = fqn + "." + self.DEFAULT_PRE_OBSERVER_NAME
+                obs_fqn_to_info[pre_obs_fqn] = {
+                    DETECTOR_TARGET_NODE_KEY: targeted_node,
+                    DETECTOR_OBS_TO_INSERT_KEY: obs_ctr(),
+                    DETECTOR_IS_POST_OBS_KEY: False,
+                    DETECTOR_OBS_ARGS_KEY: targeted_node.args
+                }
+                # add entry for post-observer
+                post_obs_fqn = fqn + "." + self.DEFAULT_POST_OBSERVER_NAME
+                obs_fqn_to_info[post_obs_fqn] = {
+                    DETECTOR_TARGET_NODE_KEY: targeted_node,
+                    DETECTOR_OBS_TO_INSERT_KEY: obs_ctr(),
+                    DETECTOR_IS_POST_OBS_KEY: True,
+                    DETECTOR_OBS_ARGS_KEY: (targeted_node,)
+                }
+        return obs_fqn_to_info
+    def get_detector_name(self) -> str:
+        r""" returns the string name of this detector"""
+        return "dynamic_vs_static_detector"
+    def get_qconfig_info(self, model) -> Dict[str, DetectorQConfigInfo]:
+        r""" Returns the DetectorQConfigInfo for each module_fqn relevant
+        Args
+            model (nn.Module or subclass): model to find observer insertion points
+        Returns a Dict mapping from unique observer fqns (where we want to insert them) to:
+            A DetectorQConfigInfo with the information to generate a QConfig for a specific module
+        """
+        # run the helper function to populate the dictionary
+        dynamic_static_info = self._generate_dict_info(model)
+        # we actually have a qconfig info object we are populating
+        module_fqn_to_detector_qconfig_info = {}
+        for module_fqn in dynamic_static_info:
+            # create a detector info instance
+            detector_qconfig_info = DetectorQConfigInfo(module_fqn)
+            # see if per channel quantization is supported
+            dynamic_static_recommended: bool = dynamic_static_info[module_fqn][self.DEFAULT_DYNAMIC_REC_KEY]
+            detector_qconfig_info.is_activation_dynamic = dynamic_static_recommended
+            module_fqn_to_detector_qconfig_info[module_fqn] = detector_qconfig_info
+        return module_fqn_to_detector_qconfig_info
+    def _is_supported(self, module: nn.Module, insert: bool = False) -> bool:
+        r"""Returns whether the given module is supported for observers
+        Args
+            module: The module to check and ensure is supported
+            insert: True if this is check for observer insertion, false if for report gen
+        Returns True if the module is supported by observer, False otherwise
+        """
+        # check to see if module is of a supported type
+        is_supported_type = sum([isinstance(module, x) for x in self.DEFAULT_DYNAMIC_STATIC_CHECK_SUPPORTED]) > 0
+        # check if it will be supported
+        future_supported_type = sum([isinstance(module, x) for x in self.DEFAULT_DYNAMIC_STATIC_FUTURE_SUPPORTED]) > 0
+        # supported
+        supported = is_supported_type or future_supported_type
+        # this is check for observer insertion
+        if insert:
+            return supported
+        else:
+            # this is for report gen and we also need to check if it contains observers
+            has_obs = hasattr(module, self.DEFAULT_PRE_OBSERVER_NAME) and hasattr(module, self.DEFAULT_POST_OBSERVER_NAME)
+            return supported and has_obs
+    def _generate_dict_info(self, model: GraphModule) -> Dict[str, Any]:
+        r"""
+        Helper function for generate_detector_report that does the generation of the dictionary.
+        This process is done as specified in generate_detector_report documentation
+        Args:
+            model (GraphModule): The prepared and calibrated GraphModule with inserted ModelReportObservers
+        Returns a Dictionary mapping modules with ModelReportObservers around them to:
+                whether dynamic quantization is recommended
+                their S metric of input to module
+                whether input to module is stationary or non-stationary
+                their S metric of output of module
+                whether output of module is stationary or non-stationary
+                the tolerance level to decided whether input/output is stationary or non-stationary
+                whether it is currently supported or planned for the future
+        """
+        # store modules dynamic vs static information
+        module_dynamic_static_info = {}
+        # This for loop goes through the modules, and extracts all relevant information into module_dynamic_static_info
+        #   This information primary includes whether the data distributions around a supported module is stationary or not
+        #   Based on this, it is recorded whether dynamic or static quantization is recommended
+        # loop through all submodules included nested ones
+        for fqn, module in model.named_modules():
+            # if module is Linear has the ModelReportObserver attached to it
+            if self._is_supported(module):
+                # get pre and post observers for the module
+                pre_obs = getattr(module, self.DEFAULT_PRE_OBSERVER_NAME)
+                post_obs = getattr(module, self.DEFAULT_POST_OBSERVER_NAME)
+                # get the statistics for each module
+                pre_stat = pre_obs.get_batch_to_epoch_ratio()
+                post_stat = post_obs.get_batch_to_epoch_ratio()
+                # record module, pre and post stat, and whether to do dynamic or static based off it
+                # true if post observer data distribution is non-stationary, false if it's stationary
+                dynamic_recommended = post_stat <= self.tolerance
+                # specify the classifications for whether data distributions considered stationary or non-stationary
+                pre_obs_dist_classif = self.STATIONARY_STR if pre_stat > self.tolerance else self.NON_STATIONARY_STR
+                post_obs_dist_classif = self.STATIONARY_STR if post_stat > self.tolerance else self.NON_STATIONARY_STR
+                # check if current support or future support
+                is_supported_type = sum([isinstance(module, x) for x in self.DEFAULT_DYNAMIC_STATIC_CHECK_SUPPORTED]) > 0
+                # store the set of important information for this module
+                module_info = {
+                    self.TOLERANCE_KEY: self.tolerance,
+                    self.DEFAULT_DYNAMIC_REC_KEY: dynamic_recommended,
+                    self.PRE_OBS_COMP_STAT_KEY: pre_stat,
+                    self.PRE_OBS_DATA_DIST_KEY: pre_obs_dist_classif,
+                    self.POST_OBS_COMP_STAT_KEY: post_stat,
+                    self.POST_OBS_DATA_DIST_KEY: post_obs_dist_classif,
+                    self.IS_CURRENTLY_SUPPORTED_KEY: is_supported_type,
+                }
+                module_dynamic_static_info[fqn] = module_info
+        return module_dynamic_static_info
+    def generate_detector_report(self, model: GraphModule) -> Tuple[str, Dict[str, Any]]:
+        r"""
+        Determines whether dynamic or static quantization is more appropriate for a given module.
+        Takes advantage of the ModelReportObserver that records range information.
+        Stationary distribution of data are strictly above tolerance level for the comparison statistic:
+            S = average_batch_activation_range/epoch_activation_range
+        Nonstationary distributions are below or at the tolerance level for this metric.
+        If the distribution of data right after the module is non-stationary, recommend dynamic quantization
+            Otherwise recommend static quantization
+        This will then generate suggestions for dynamic vs static quantization focused around Linear.
+        Args:
+            model (GraphModule): The prepared and calibrated GraphModule with inserted ModelReportObservers
+        Returns a tuple with two elements:
+            String report of of whether dynamic or static quantization is recommended for certain modules
+            Dictionary mapping modules with ModelReportObservers around them to:
+                whether dynamic quantization is recommended
+                their S metric of input to module
+                whether input to module is stationary or non-stationary
+                their S metric of output of module
+                whether output of module is stationary or non-stationary
+                the tolerance level to decided whether input/output is stationary or non-stationary
+                whether it is currently supported or planned for the future
+        """
+        # get the dictionary of the information to format the string report
+        module_dynamic_static_info = self._generate_dict_info(model)
+        dynamic_vs_static_string = "Dynamic vs. Static Quantization suggestions: \n"
+        modules_added: bool = False  # check to make sure at least 1 module added.
+        dynamic_benefit = " You will get more accurate results if you use dynamic quantization"
+        static_benefit = " You can increase model efficiency if you use static quantization"
+        future_support_str = ". This layer is not yet supported for dynamic quantization"
+        # This for loop goes through the information collected in module_dynamic_static_info and:
+        #   Populates the string based report with the information from module_dynamic_static_info
+        #   Compiles the complete report by appending relevant formatted strings
+        for module_fqn in module_dynamic_static_info.keys():
+            # there is at least 1 module for suggestion
+            modules_added = True
+            module_info = module_dynamic_static_info[module_fqn]
+            suggestion_string_template = "For module {} it is suggested to use {} quantization because {}.\n"
+            # decide what string formatting values will be
+            quantization_type = ""
+            quantization_reasoning = "the distribution of data before {} is {} and the distribution after is {}."
+            benefit_str = ""
+            # strings for if dynamic quantized per tensor is needed
+            recommend_per_tensor = ". We recommend to add a {} before this module if it is static."
+            rec_lay_to_add = "dynamic quantize per tensor layer"
+            dynamic_per_tensor_string = recommend_per_tensor.format(rec_lay_to_add)
+            dynamic_per_tensor_reasoning_string = (
+                " This is because the input to this module has a non-stationary distribution"
+            )
+            # start composing explanation
+            if module_info[self.DEFAULT_DYNAMIC_REC_KEY]:
+                quantization_type = "dynamic"
+                # check if currently supported or future supported
+                benefit_str = dynamic_benefit
+                if not module_info[self.IS_CURRENTLY_SUPPORTED_KEY]:
+                    benefit_str += future_support_str
+            else:
+                quantization_type = "static"
+                benefit_str = static_benefit
+            # now set the quantization explanation string
+            quantization_reasoning = (
+                quantization_reasoning.format(
+                    module_fqn, module_info[self.PRE_OBS_DATA_DIST_KEY], module_info[self.POST_OBS_DATA_DIST_KEY]
+                )
+                + benefit_str
+            )
+            # if we have a non-stationary input -> linear -> stationary we suggested static
+            # however, we want to also recommend they add a dynamic quantize per tensor right if this change is made
+            if (
+                module_info[self.PRE_OBS_DATA_DIST_KEY] == self.NON_STATIONARY_STR
+                and module_info[self.POST_OBS_DATA_DIST_KEY] == self.STATIONARY_STR
+            ):
+                quantization_reasoning = (
+                    quantization_reasoning + dynamic_per_tensor_string + dynamic_per_tensor_reasoning_string
+                )
+            # format the overall suggestion string with the specific inputs
+            module_suggestion_string = suggestion_string_template.format(
+                module_fqn, quantization_type, quantization_reasoning
+            )
+            # append to overall suggestion
+            dynamic_vs_static_string += module_suggestion_string
+        if not modules_added:
+            dynamic_vs_static_string += "No applicable layers for suggestions. Only linear and conv are valid.\n"
+        # return the string as well as the dictionary of information
+        return (dynamic_vs_static_string, module_dynamic_static_info)
+class InputWeightEqualizationDetector(DetectorBase):
+    r"""
+    Determines whether input-weight equalization can help improve quantization for certain modules.
+    Specifically, this list of modules includes:
+        linear
+        conv
+    Determines whether input-weight equalization is recommended based on the comp stat:
+        s_c = sqrt(w_c/W)/sqrt(i_c/I)
+        where:
+            w_c is range of weight for channel c, W is range of weight over all channels
+            i_c is range of input for channel c, I is range of input over all channels
+        if s_c >= threshold or <= 1 / threshold, recommends input-weight equalization
+    Args:
+        ratio_threshold (float): The threshold for s_c to determine if input-weight equalization is suggested
+            Should be between 0 and 1 (both non-inclusive)
+        ch_axis (int, optional): The channel axis being observed to determine input weight equalization
+            Default: 1
+    * :attr:`ratio_threshold`: The threshold for s_c to determine if input-weight equalization is suggested
+        Should be between 0 and 1
+    * :attr:`ch_axis`: The channel axis being observed to determine input weight equalization
+    * :attr:`SUPPORTED_MODULES`: This specifies the modules that are supported for input-weight equalization
+    * :attr:`DEFAULT_PRE_OBSERVER_NAME`: The name of the pre-observer to be inserted for this detector
+    """
+    SUPPORTED_MODULES: Set[Callable] = {nn.Linear,
+                                        nn.Conv1d,
+                                        nn.Conv2d,
+                                        nn.Conv3d,
+                                        nnqat.Linear,
+                                        nnqat.Conv1d,
+                                        nnqat.Conv2d,
+                                        nnqat.Conv3d}
+    # names for the pre and post observers that are inserted
+    DEFAULT_PRE_OBSERVER_NAME: str = "model_report_pre_observer"
+    # weight / activation prefix for each of the below info
+    WEIGHT_PREFIX = "weight_"
+    ACTIVATION_PREFIX = "input_activation_"
+    # string names for keys of info dictionaries
+    PER_CHANNEL_MAX_KEY = "per_channel_max"
+    PER_CHANNEL_MIN_KEY = "per_channel_min"
+    GLOBAL_MAX_KEY = "global_max"
+    GLOBAL_MIN_KEY = "global_min"
+    # keys for return dict of recommendations
+    RECOMMENDED_KEY = "input_weight_equalization_recommended"
+    COMP_METRIC_KEY = "input_weight_channel_comparison_metrics"
+    THRESHOLD_KEY = "input_weight_threshold"
+    CHANNEL_KEY = "input_weight_channel_axis"
+    # default weight and info strings
+    WEIGHT_STR = "weight"
+    INPUT_STR = "input"
+    # default for what ratio we recommend input weight
+    DEFAULT_RECOMMEND_INPUT_WEIGHT_CHANNEL_RATIO = 0.4
+    def __init__(self, ratio_threshold: float, ch_axis: int = 1):
+        # ensure passed in inputs are valid
+        if ratio_threshold <= 0 or ratio_threshold >= 1:
+            raise ValueError("Make sure threshold is > 0 and < 1")
+        # initialize attributes based on args
+        self.ratio_threshold: float = ratio_threshold
+        self.ch_axis: int = ch_axis
+    def _is_supported(self, module: nn.Module, insert: bool = False) -> bool:
+        r"""Returns whether the given module is supported for observers
+        Args
+            module: The module to check and ensure is supported
+            insert: True if this is check for observer insertion, false if for report gen
+        Returns True if the module is supported by observer, False otherwise
+        """
+        # check to see if module is of a supported type
+        is_supported_type = sum([type(module) is x for x in self.SUPPORTED_MODULES]) > 0
+        # this is check for observer insertion
+        if insert:
+            return is_supported_type
+        else:
+            # this is for report gen and we also need to check if it contains observers
+            has_obs = hasattr(module, self.DEFAULT_PRE_OBSERVER_NAME)
+            return is_supported_type and has_obs
+    def get_qconfig_info(self, model) -> Dict[str, DetectorQConfigInfo]:
+        r""" Returns the DetectorQConfigInfo for each module_fqn relevant
+        Args
+            model (nn.Module or subclass): model to find observer insertion points
+        Returns a Dict mapping from unique observer fqns (where we want to insert them) to:
+            A DetectorQConfigInfo with the information to generate a QConfig for a specific module
+        """
+        # run the helper function to populate the dictionary
+        # find the range of inputs
+        input_values: Dict[str, Dict] = self._extract_input_info(model)
+        # find the range of weights
+        weight_values: Dict[str, Dict] = self._extract_weight_info(model)
+        # calculate per_channel comparison statistic s_c
+        comp_stats: Dict[str, torch.Tensor] = self._generate_comparison_values(input_values, weight_values)
+        # generate the return dictionary
+        input_weight_equalization_info: Dict[str, Dict] = self._generate_dict_info(input_values, weight_values, comp_stats)
+        # we actually have a qconfig info object we are populating
+        module_fqn_to_detector_qconfig_info = {}
+        for module_fqn in input_weight_equalization_info:
+            # create a detector info instance
+            detector_qconfig_info = DetectorQConfigInfo(module_fqn)
+            # see if per channel quantization is supported
+            input_weight_recommended: bool = input_weight_equalization_info[module_fqn][self.RECOMMENDED_KEY]
+            detector_qconfig_info.is_equalization_recommended = input_weight_recommended
+            module_fqn_to_detector_qconfig_info[module_fqn] = detector_qconfig_info
+        return module_fqn_to_detector_qconfig_info
+    def determine_observer_insert_points(self, prepared_fx_model: GraphModule) -> Dict[str, Dict[str, Any]]:
+        r"""Determines where observers need to be inserted for the Input Weight Equalization Detector.
+        For this detector, we want to place observers in front of supported layers.
+        Currently inserts observers for:
+            linear layers
+            conv layers
+        Args:
+            prepared_fx_model (GraphModule):  The prepared Fx GraphModule
+        Returns a Dict mapping from unique observer fqns (where we want to insert them) to a Dict with:
+            key "target_node" -> the node we are trying to observe with this observer (torch.fx.node.Node)
+            key "observer_to_insert" -> the observer we wish to insert (ObserverBase)
+            key "is_post_observer" -> True if this is meant to be a post-observer for target_node, False if pre-observer
+            key "observer_args" -> The arguments that are meant to be passed into the observer
+        """
+        # observer for this detector is ModelReportObserver
+        obs_ctr = ModelReportObserver
+        # return dict
+        obs_fqn_to_info: Dict[str, Dict[str, Any]] = {}
+        for fqn, module in prepared_fx_model.named_modules():
+            # check to see if module is of a supported type
+            if self._is_supported(module, insert=True):
+                # if it's a supported type, we want to get node and add observer insert locations
+                targeted_node = self._get_targeting_node(prepared_fx_model, fqn)
+                # add entry for pre-observer
+                pre_obs_fqn = fqn + "." + self.DEFAULT_PRE_OBSERVER_NAME
+                obs_fqn_to_info[pre_obs_fqn] = {
+                    DETECTOR_TARGET_NODE_KEY: targeted_node,
+                    DETECTOR_OBS_TO_INSERT_KEY: obs_ctr(ch_axis=self.ch_axis),
+                    DETECTOR_IS_POST_OBS_KEY: False,
+                    DETECTOR_OBS_ARGS_KEY: targeted_node.args,
+                }
+        return obs_fqn_to_info
+    def get_detector_name(self) -> str:
+        r"""Returns the name of this detector"""
+        return "input_weight_equalization_detector"
+    def _extract_input_info(self, model: GraphModule) -> Dict[str, Dict]:
+        r"""
+        Takes in a calibrated GraphModule and then finds the relevant observers.
+        It then extracts the input information for each observer returns it
+        Args
+            model (GraphModule): The prepared and calibrated GraphModule with inserted ModelReportObservers
+        Returns a dict mapping relevant module fqns (str) to a dict with keys:
+            "input_activation_per_channel_max" : maps to the per_channel max values
+            "input_activation_per_channel_min" : maps to the per_channel min values
+            "input_activation_global_max" : maps to the global max recorded
+            "input_activation_global_min" : maps to the global min recorded
+        """
+        # return dictionary mapping observer fqns to desired info
+        input_info: Dict[str, Dict] = {}
+        for fqn, module in model.named_modules():
+            # if module is supported and it has a pre-observer
+            if self._is_supported(module):
+                # get pre observer for the module
+                pre_obs = getattr(module, self.DEFAULT_PRE_OBSERVER_NAME)
+                input_info[fqn] = {
+                    self.ACTIVATION_PREFIX + self.PER_CHANNEL_MAX_KEY: pre_obs.max_val,
+                    self.ACTIVATION_PREFIX + self.PER_CHANNEL_MIN_KEY: pre_obs.min_val,
+                    self.ACTIVATION_PREFIX + self.GLOBAL_MAX_KEY: max(pre_obs.max_val),
+                    self.ACTIVATION_PREFIX + self.GLOBAL_MIN_KEY: min(pre_obs.min_val),
+                }
+        return input_info
+    def _extract_weight_info(self, model: GraphModule) -> Dict[str, Dict]:
+        r"""
+        Takes in a calibrated GraphModule and then finds the relevant observers.
+        It then extracts the weight information for each layer an observer is attached to.
+        Args
+            model (GraphModule): The prepared and calibrated GraphModule with inserted ModelReportObservers
+        Returns a dict mapping module fqns (str) to a dict with keys:
+            "per_channel_max" : maps to the per_channel max values
+            "per_channel_min" : maps to the per_channel min values
+            "global_max" : maps to the global max recorded
+            "global_min" : maps to the global min recorded
+        """
+        # return dictionary mapping observer fqns to desired info
+        weight_info: Dict[str, Dict] = {}
+        for fqn, module in model.named_modules():
+            # if module is supported and it has a pre-observer
+            if self._is_supported(module):
+                # we don't need actual observer, just the module weights
+                # calculate min and max vals
+                device = module.weight.device
+                min_val: torch.Tensor = torch.tensor([float('inf')], device=device)
+                max_val: torch.Tensor = torch.tensor([float('-inf')], device=device)
+                x_copy = module.weight
+                x_dim = x_copy.size()
+                new_axis_list = [i for i in range(len(x_dim))]  # noqa: C416
+                new_axis_list[self.ch_axis] = 0
+                new_axis_list[0] = self.ch_axis
+                y = x_copy.permute(new_axis_list)
+                # Need to match dtype of min/max because the updates to buffers
+                # are done in place and types need to match for comparisons
+                y = y.to(min_val.dtype)
+                y = torch.flatten(y, start_dim=1)
+                if min_val.numel() == 0 or max_val.numel() == 0:
+                    min_val, max_val = torch.aminmax(y, dim=1)
+                else:
+                    min_val_cur, max_val_cur = torch.aminmax(y, dim=1)
+                    min_val = torch.min(min_val_cur, min_val)
+                    max_val = torch.max(max_val_cur, max_val)
+                weight_info[fqn] = {
+                    self.WEIGHT_PREFIX + self.PER_CHANNEL_MAX_KEY: max_val,
+                    self.WEIGHT_PREFIX + self.PER_CHANNEL_MIN_KEY: min_val,
+                    self.WEIGHT_PREFIX + self.GLOBAL_MAX_KEY: max(max_val),
+                    self.WEIGHT_PREFIX + self.GLOBAL_MIN_KEY: min(min_val),
+                }
+        return weight_info
+    def _calculate_range_ratio(self, info_dict: Dict, info_str: str, module_fqn: str) -> torch.Tensor:
+        r"""
+        Takes in an info dict and calculates the s_c matrix.
+        Args:
+            info_dict (dict): A dictionary of either input or weight range info
+            info_str (str): A str describing whether currently looking at weight or input info
+                Either "weight" or "input"
+            module_fqn (str): The fqn of the module we are looking at
+        Returns a tensor of values, where each value is the s_c stat for a different channel
+        """
+        # calculate the ratios of the info
+        # get the prefix str
+        prefix_str = self.ACTIVATION_PREFIX if info_str == self.INPUT_STR else self.WEIGHT_PREFIX
+        per_channel_range = info_dict[prefix_str + self.PER_CHANNEL_MAX_KEY] - info_dict[prefix_str + self.PER_CHANNEL_MIN_KEY]
+        global_range = info_dict[prefix_str + self.GLOBAL_MAX_KEY] - info_dict[prefix_str + self.GLOBAL_MIN_KEY]
+        if global_range == 0:
+            range_zero_explanation = "We recommend removing this channel as it doesn't provide any useful information."
+            raise ValueError(
+                "The range of the {} data for module {} is 0, which means you have a constant value channel. {}".format(
+                    info_str, module_fqn, range_zero_explanation
+                )
+            )
+        ratio = per_channel_range / global_range
+        return ratio
+    def _generate_comparison_values(self, input_info: Dict, weight_info: Dict) -> Dict[str, torch.Tensor]:
+        r"""
+        Takes in the information on the min and max values of the inputs and weights and:
+            Calculates the comp stat for each channel: s_c = sqrt(w_c/W)/sqrt(i_c/I)
+        Args:
+            input_info (dict): A dict mapping each observer to input range information
+            weight_info (dict): A dict mapping each observer to weight range information
+        Returns a dict mapping relevant observer fqns (str) to a 1-D tensor.
+            Each value is a different s_c value for a different channel
+        """
+        # create return dictionary for each observer
+        module_fqn_to_channel: Dict[str, torch.Tensor] = {}
+        # for each module (both passed in dicts should have same keys)
+        for module_fqn in input_info:
+            # raise error if not in weight info
+            if module_fqn not in weight_info:
+                raise KeyError(f"Unable to find weight range stats for module {module_fqn}")
+            # calculate the ratios of the weight info and input info
+            weight_ratio = self._calculate_range_ratio(weight_info[module_fqn], self.WEIGHT_STR, module_fqn)
+            input_ratio = self._calculate_range_ratio(input_info[module_fqn], self.INPUT_STR, module_fqn)
+            # if mismatched size, because of grouping, we want to replicate weight enough times
+            weight_channels = len(weight_ratio)
+            input_channels = len(input_ratio)
+            if weight_channels != input_channels:
+                # we try to replicate
+                assert input_channels % weight_channels == 0, "input channels should be divisible by weight channels."
+                # get replication factor
+                rep_factor: int = input_channels // weight_channels
+                # weight ratio is (n,), input ratio is (k,), we just repeat weight ratio k // n
+                weight_ratio = weight_ratio.repeat(rep_factor)
+            # calculate the s metric per channel
+            s = torch.sqrt(weight_ratio) / torch.sqrt(input_ratio)
+            module_fqn_to_channel[module_fqn] = s
+        # return compiled observer ratios
+        return module_fqn_to_channel
+    def _generate_dict_info(self, input_info: Dict, weight_info: Dict, comp_stats: Dict) -> Dict[str, Dict]:
+        r"""
+        Helper function for generate_detector_report that does the generation of the dictionary.
+        This process is done as specified in generate_detector_report documentation
+        Args:
+            input_info (dict): A dict mapping each module to input range information
+            weight_info (dict): A dict mapping each module to weight range information
+            comp_stats (dict): A dict mapping each module to its corresponding comp stat
+        Returns a dictionary mapping each module with relevant ModelReportObservers around them to:
+            whether input weight equalization is recommended
+            their s_c metric compared to the threshold
+            the threshold used to make the recommendation
+            the channel used for recording data
+            the input channel range info
+            the weight channel range info
+        """
+        # store modules input weight equalization info
+        input_weight_equalization_info: Dict[str, Dict] = {}
+        # for each module we add separate set of suggestions
+        for module_fqn in input_info:
+            # get relevant info for this module
+            mod_input_info: Dict = input_info[module_fqn]
+            mod_weight_info: Dict = weight_info[module_fqn]
+            mod_comp_stat: Dict = comp_stats[module_fqn]
+            # decide if each channel should have input weight equalization or not
+            channel_rec_vals: list = []
+            for val in mod_comp_stat:
+                float_rep: float = val.item()
+                # decide if recommending input weight equalization
+                recommended: bool = float_rep >= self.ratio_threshold and float_rep <= 1 / self.ratio_threshold
+                channel_rec_vals.append(recommended)
+            # build the return dict input
+            # also unpack input and weight dicts into it
+            input_weight_equalization_info[module_fqn] = {
+                self.RECOMMENDED_KEY: channel_rec_vals,
+                self.COMP_METRIC_KEY: mod_comp_stat,
+                self.THRESHOLD_KEY: self.ratio_threshold,
+                self.CHANNEL_KEY: self.ch_axis,
+                **mod_input_info,
+                **mod_weight_info,
+            }
+        # return our compiled info for each module
+        return input_weight_equalization_info
+    def generate_detector_report(self, model: GraphModule) -> Tuple[str, Dict[str, Any]]:
+        r"""
+        Determines whether input weight equalization is appropriate for a given module.
+        Takes advantage of the ModelReport Observer which records per channel information of input range
+        It then uses the passed in weight info inconjunction to compute the desired ratio
+        Finally, it gives suggestions based on this information for each module of interest
+        Args:
+            model (GraphModule): The prepared and calibrated GraphModule with inserted ModelReportObservers
+        Returns a tuple with two elements:
+            String report of of whether input weight equalization is recommended for certain modules
+            Dictionary mapping modules of interest to:
+                whether input weight equalization is recommended
+                their s_c metric compared to the threshold
+                the threshold used to make the recommendation
+                the channel used for recording data
+                the input channel range info
+                the weight channel range info
+        """
+        # find the range of inputs
+        input_values: Dict[str, Dict] = self._extract_input_info(model)
+        # find the range of weights
+        weight_values: Dict[str, Dict] = self._extract_weight_info(model)
+        # calculate per_channel comparison statistic s_c
+        comp_stats: Dict[str, torch.Tensor] = self._generate_comparison_values(input_values, weight_values)
+        # generate the return dictionary
+        input_weight_equalization_info: Dict[str, Dict] = self._generate_dict_info(input_values, weight_values, comp_stats)
+        # now we can generate report based on this information
+        input_weight_string = "Input-Weight Equalization suggestions: \n"
+        # some strings to be formatted depending on module we are adding
+        module_suggestion_str = "For Module {} looked at with axis {}: \n"
+        channel_suggestion_str = "\tWe suggest {} input weight equalization because {}\n"
+        use_str = "to use"
+        no_use_str = "to not use"
+        input_weight_benefit_str = "{}/{} channels would benefit and we expect significant reduction in quantization error."
+        input_weight_non_benefit_reasoning = "{}/{} channels benefitting from input-weight equalization being applied."
+        input_weight_non_benefit_str = "we don't expect much improvement from input-weight equalization based on {}"
+        # added module check
+        added_module: bool = False
+        # compile the suggestion string
+        for module_fqn in input_weight_equalization_info:
+            # we added at least 1 module
+            added_module = True
+            # add the module level description
+            input_weight_string += module_suggestion_str.format(module_fqn, self.ch_axis)
+            mod_info: Dict[str, Any] = input_weight_equalization_info[module_fqn]
+            # gather info on how many channels would benefit from input weight and
+            recommendation_per_channel: torch.Tensor = mod_info[self.RECOMMENDED_KEY]
+            num_recs = sum(recommendation_per_channel)
+            if num_recs / len(recommendation_per_channel) >= self.DEFAULT_RECOMMEND_INPUT_WEIGHT_CHANNEL_RATIO:
+                input_benefit_formatted = input_weight_benefit_str.format(num_recs, len(recommendation_per_channel))
+                channel_str = channel_suggestion_str.format(use_str, input_benefit_formatted)
+                input_weight_string += channel_str
+            else:
+                non_benefit_reason_formatted = input_weight_non_benefit_reasoning.format(num_recs, len(recommendation_per_channel))
+                non_benefit_str = input_weight_non_benefit_str.format(non_benefit_reason_formatted)
+                channel_str = channel_suggestion_str.format(no_use_str, non_benefit_str)
+                input_weight_string += channel_str
+        # if no modules looked at, amend return string
+        if not added_module:
+            input_weight_string += "No applicable layers for suggestions. Only linear and conv valid.\n"
+        # return a tuple with the string explanation and the compiled dict info
+        return (input_weight_string, input_weight_equalization_info)
+class OutlierDetector(DetectorBase):
+    r"""
+    Determines whether there are significant outliers in activation data around a certain layer.
+    This is ideally used in conjunction with information on stationary vs. non-stationary distribution:
+        If the data is stationary, and there are significant outliers, then we want to flag them
+        We want to do this on a per channel basis for detecting outliers
+    Determines whether activation data is flagged as outlier based on if data is stationary and:
+        p_r = avg(100th percentile / "reference_percentile"th percentile)
+        where:
+            p_r is average percentile ratio across all batches in the epoch
+            reference_percentile is a percentile values between 0 and 100 exclusive
+        if p_r is above some threshold, then we consider the activations to have significant outliers
+    Args:
+        ratio_threshold (float, optional): The threshold for p_r to determine if there are outliers in activations
+            Should be >= 1
+            Default: 3.5
+        reference_percentile (float, optional): The denominator to find the relative scale of the 100th percentile
+            Should be between 0 and 1
+            Default: 0.975
+        fraction_batches_used_threshold (float, optional): Threshold of fraction of batches per channel to determine outlier
+            If fraction is below this, we deem number of samples used to calculate outliers as insignificant and alert user
+            regardless of whether we detected outliers or not in channel to take a closer look at channel results
+            Should be between 0 and 1
+            Default: 0.95
+        ch_axis (int, optional): The channel axis being observed to determine input weight equalization
+            Default: 1
+    * :attr:`ratio_threshold`: The threshold for p_r to determine if there are outliers in activations
+        The p_r value (average ratio of 100th percentile/reference_percentile) is compared to ratio_threshold
+        If it is significantly greater, then we consider it an outlier
+        This threshold was calculated based on the ratio of the percentiles in a normal distribution
+        The calculations behind value choice: https://drive.google.com/file/d/1N2wdtXWI-kOH8S7HH4-PYB_NmqzZil4p/view?usp=sharing
+    * :attr:`reference_percentile`: The denominator of the top fraction to find the relative scale of the 100th percentile
+        Should be between 0 and 1
+        The calculations behind value choice: https://drive.google.com/file/d/1N2wdtXWI-kOH8S7HH4-PYB_NmqzZil4p/view?usp=sharing
+    * :attr:`fraction_batches_used_threshold`: The fraction of batches to determine outliers for each channel should be above this
+        Some batches may not be used because of 0-based errors, so this is to ensure a good amount of the total batches are used
+        Should be between 0 and 1
+    * :attr:`ch_axis`: The channel axis being observed to determine outliers
+    * :attr:`DEFAULT_PRE_OBSERVER_NAME`: The name of the pre-observer to be inserted for this detector
+    """
+    # names for the pre observers that are inserted
+    DEFAULT_PRE_OBSERVER_NAME: str = "model_report_pre_observer"
+    # pre activation prefix
+    INPUT_ACTIVATION_PREFIX = "input_activation_"
+    # names for dict keys
+    OUTLIER_KEY = "outliers_detected"
+    NUM_BATCHES_KEY = "outlier_detection_batches_used"
+    IS_SUFFICIENT_BATCHES_KEY = "outlier_detection_is_sufficient_batches"
+    COMP_METRIC_KEY = "outlier_detection_percentile_ratios"
+    RATIO_THRES_KEY = "outlier_detection_ratio_threshold"
+    REF_PERCENTILE_KEY = "outlier_detection_reference_percentile"
+    CHANNEL_AXIS_KEY = "outlier_detection_channel_axis"
+    MAX_VALS_KEY = INPUT_ACTIVATION_PREFIX + "per_channel_max"
+    CONSTANT_COUNTS_KEY = "constant_batch_counts"
+    def __init__(
+        self,
+        ratio_threshold: float = 3.5,
+        reference_percentile: float = 0.975,
+        fraction_batches_used_threshold: float = 0.95,
+        ch_axis: int = 1,
+    ):
+        # initialize the variables of interest
+        self.ratio_threshold = ratio_threshold
+        # make sure passed in percentile is valid
+        assert reference_percentile >= 0 and reference_percentile <= 1
+        assert fraction_batches_used_threshold >= 0 and fraction_batches_used_threshold <= 1
+        self.reference_percentile = reference_percentile
+        self.fraction_batches_used_threshold = fraction_batches_used_threshold
+        self.ch_axis = ch_axis
+    def get_detector_name(self) -> str:
+        r"""Returns the name of this detector"""
+        return "outlier_detector"
+    def _supports_insertion(self, module: nn.Module) -> bool:
+        r"""Returns whether the given module is supported for observers insertion
+        Any module that doesn't have children and isn't an observer itself is supported
+        Args
+            module: The module to check and ensure is supported
+        Returns True if the module is supported by observer, False otherwise
+        """
+        # case for insertion of module
+        # check if the module has any children and isn't observer
+        num_children = len(list(module.children()))
+        return num_children == 0 and not _is_activation_post_process(module)
+    def get_qconfig_info(self, model) -> Dict[str, DetectorQConfigInfo]:
+        r""" Returns the DetectorQConfigInfo for each module_fqn relevant
+        Args
+            model (nn.Module or subclass): model to find observer insertion points
+        Returns a Dict mapping from unique observer fqns (where we want to insert them) to:
+            A DetectorQConfigInfo with the information to generate a QConfig for a specific module
+        """
+        # currently doesn't do anything for outlier detector
+        return {}
+    def _supports_report_gen(self, module: nn.Module) -> bool:
+        r"""Returns whether the given module is supported for report generation
+        Any module that has a model report pre-observer is supported
+        Args
+            module: The module to check and ensure is supported
+        Returns True if the module is supported by observer, False otherwise
+        """
+        return hasattr(module, self.DEFAULT_PRE_OBSERVER_NAME)
+    def determine_observer_insert_points(self, prepared_fx_model: GraphModule) -> Dict[str, Dict[str, Any]]:
+        r""" Determines where observers need to be inserted for the Outlier Detector.
+        For this detector, we want to place observers in front of supported layers.
+        Currently inserts observers for:
+            all layers that do not have children (leaf level layers)
+        Args:
+            prepared_fx_model (GraphModule):  The prepared Fx GraphModule
+        Returns a Dict mapping from unique observer fqns (where we want to insert them) to a Dict with:
+            key "target_node" -> the node we are trying to observe with this observer (torch.fx.node.Node)
+            key "observer_to_insert" -> the observer we wish to insert (ObserverBase)
+            key "is_post_observer" -> True if this is meant to be a post-observer for target_node, False if pre-observer
+            key "observer_args" -> The arguments that are meant to be passed into the observer
+        """
+        # observer for this detector is ModelReportObserver
+        obs_ctr = ModelReportObserver
+        # return dict
+        obs_fqn_to_info: Dict[str, Dict[str, Any]] = {}
+        for fqn, module in prepared_fx_model.named_modules():
+            # check to see if module is of a supported type
+            if self._supports_insertion(module):
+                # if it's a supported type, we want to get node and add observer insert locations
+                targeted_node = self._get_targeting_node(prepared_fx_model, fqn)
+                # add entry for pre-observer
+                pre_obs_fqn = fqn + "." + self.DEFAULT_PRE_OBSERVER_NAME
+                obs_fqn_to_info[pre_obs_fqn] = {
+                    DETECTOR_TARGET_NODE_KEY: targeted_node,
+                    DETECTOR_OBS_TO_INSERT_KEY: obs_ctr(ch_axis=self.ch_axis, comp_percentile=self.reference_percentile),
+                    DETECTOR_IS_POST_OBS_KEY: False,
+                    DETECTOR_OBS_ARGS_KEY: targeted_node.args,
+                }
+        return obs_fqn_to_info
+    def _calculate_outlier_info(
+        self,
+        percentile_ratios: torch.Tensor,
+        counted_batches: torch.Tensor,
+        total_batches: int,
+    ) -> Dict[str, List[bool]]:
+        r"""
+        Gives info on whether the percentile ratios calculated would be considered outliers
+        Also gives information on whether the collected data is statistically significant to make this claim
+        Args:
+            percentile_ratios (torch.Tensor): The average percentile_ratios per channel calculated by the observer
+            counted_batches (torch.Tensor): The number of batches used for average calculation per tensor
+            total_batches (int): The total number of batches that passed through observer in this epoch
+        Returns a dictionary mapping:
+            "outliers_detected" : list of bools per channel that are true if it is considered an outlier
+            "is_sufficient_batches": if o_r was >= fraction_batches_used_threshold:
+                where o_r = counted_batches / total_batches
+        """
+        outlier_dict: Dict[str, List[bool]] = {self.OUTLIER_KEY: [], self.IS_SUFFICIENT_BATCHES_KEY: []}
+        # get both as flattened lists for easy mapping
+        ratios_list: List = percentile_ratios.tolist()
+        num_batches_list: List = counted_batches.tolist()
+        # calculate whether channels were statistically significant
+        significant_size = [
+            batch_size / total_batches >= self.fraction_batches_used_threshold for batch_size in num_batches_list
+        ]
+        outlier_dict[self.IS_SUFFICIENT_BATCHES_KEY] = significant_size
+        # calculate for each channel whether it's an outlier or not based on ratio
+        outlier_detected = [ratio > self.ratio_threshold for ratio in ratios_list]
+        outlier_dict[self.OUTLIER_KEY] = outlier_detected
+        # return the dictionary with the two lists
+        return outlier_dict
+    def _generate_info_dict(self, model: GraphModule) -> Dict[str, Dict]:
+        r"""
+        Helper function for generate_detector_report that does the generation of the dictionary.
+        This process is done as specified in generate_detector_report documentation
+        Args:
+            model (GraphModule): The prepared and calibrated GraphModule with inserted ModelReportObservers
+        Returns a dict mapping relevant module fqns to:
+            whether there were outliers found in activation before
+            the number of batches used for each channel
+            whether fraction of applicable batches used is above fraction_batches_used_threshold
+            their p_r metric compared to the threshold
+            the threshold used to make the recommendation
+            the reference_percentile used to make the recommendation
+            the channel axis used to determine individual channels
+            the constant batch counts per channel
+            the per channel max values
+        """
+        # return dictionary mapping observer fqns to desired info
+        info_dict: Dict[str, Dict] = {}
+        for fqn, module in model.named_modules():
+            # if module is supported and it has a pre-observer
+            if self._supports_report_gen(module):
+                # get pre observer for the module
+                pre_obs: ModelReportObserver = getattr(module, self.DEFAULT_PRE_OBSERVER_NAME)
+                # get the number of batches and calculated ratio thresholds
+                num_batches: torch.Tensor = pre_obs.percentile_batches_tracked
+                average_ratios: torch.Tensor = pre_obs.average_percentile_ratio
+                channel_batch_cnts: torch.Tensor = pre_obs.constant_channels
+                total_batches: int = pre_obs.num_batches_tracked
+                # also get the max values
+                max_vals: torch.Tensor = pre_obs.max_val
+                # we have to specifically modify how we are recording negative ratio for pre-relu layers
+                for index, ratio_val in enumerate(average_ratios):
+                    # check if we have a negative ratio
+                    # a ratio might be negative if we have a situation where the 100th percentile is
+                    # > 0 while the nth percentile is < 0, in which case this would not be detected
+                    # as an outlier. Since we care more about magnitude, we make it positive.
+                    if ratio_val.item() < 0:
+                        # first make it positive
+                        average_ratios[index] = -ratio_val
+                    if ratio_val.item() < 1:
+                        # if it's less than 1 we have the flip it as well
+                        average_ratios[index] = 1 / ratio_val
+                outlier_calcs = self._calculate_outlier_info(average_ratios, num_batches, total_batches)
+                # calculate whether ratios were outliers
+                info_dict[fqn] = {
+                    self.CHANNEL_AXIS_KEY: self.ch_axis,
+                    self.REF_PERCENTILE_KEY: self.reference_percentile,
+                    self.RATIO_THRES_KEY: self.ratio_threshold,
+                    self.COMP_METRIC_KEY: average_ratios,
+                    self.NUM_BATCHES_KEY: num_batches,
+                    self.OUTLIER_KEY: outlier_calcs[self.OUTLIER_KEY],
+                    self.IS_SUFFICIENT_BATCHES_KEY: outlier_calcs[self.IS_SUFFICIENT_BATCHES_KEY],
+                    self.CONSTANT_COUNTS_KEY: channel_batch_cnts,
+                    self.MAX_VALS_KEY: max_vals
+                }
+        return info_dict
+    def generate_detector_report(self, model: GraphModule) -> Tuple[str, Dict[str, Any]]:
+        r"""
+        Determines whether input weight equalization is appropriate for a given module.
+        Takes advantage of the ModelReport Observer which records the relevant percentile information
+        Args:
+            model (GraphModule): The prepared and calibrated GraphModule with inserted ModelReportObservers
+        Returns a tuple with two elements:
+            String report of of whether there are outliers in the activations around certain modules
+            Dictionary mapping modules of interest to:
+                whether there were outliers found in activation before
+                the number of batches used for each channel
+                whether fraction of applicable batches used is above fraction_batches_used_threshold
+                their p_r metric compared to the threshold
+                the threshold used to make the recommendation
+                the reference_percentile used to make the recommendation
+                the channel axis used to determine individual channels
+                the constant batch counts per channel
+                the per channel max values
+        """
+        # generate the information dictionary of outlier information
+        info_dict = self._generate_info_dict(model)
+        # now we can generate report based on this information
+        outlier_string = "Outlier detection report: \n"
+        # added module check
+        added_module: bool = False
+        # some strings to be formatted depending on module we are adding
+        module_suggestion_str = "For Module {} looked at with axis {}: \n"
+        channel_suggestion_str = "\tFor channel {}, we found outliers in the preceding activation data with {}.\n"
+        channel_max_value_str = "a max value across all batches of {}"
+        note_string = "Note: outlier detection is only reliable for {}. We recommend {} to ensure the most accurate results."
+        note_distribution = "stationary distributions"
+        note_rec = "running the static vs. dynamic detector to ensure activation data before modules above is stationary"
+        # suggestion for constant batch check since that can make it no outliers
+        constant_str = "\tFor channel {}, we found {} constant value batches. {}\n"
+        constant_suggestion = "We recommend taking a look at the dict and data to see how frequent this occurred and why."
+        # compile the suggestion string
+        for module_fqn in info_dict:
+            # get module specific info
+            mod_info: Dict[str, Any] = info_dict[module_fqn]
+            # check to see if we already added high level model desc
+            added_model_desc = False
+            # look at each individual channel and add a suggestion
+            for index, outlier_detected in enumerate(mod_info[self.OUTLIER_KEY]):
+                if outlier_detected:
+                    # we found at least 1 outlier
+                    if not added_model_desc:
+                        # add the module level description
+                        outlier_string += module_suggestion_str.format(module_fqn, self.ch_axis)
+                        added_model_desc = True
+                    # we mark that we found at least one outlier
+                    added_module = True
+                    max_value_found_str = channel_max_value_str.format(mod_info[self.MAX_VALS_KEY][index])
+                    channel_str = channel_suggestion_str.format(index, max_value_found_str)
+                    outlier_string += channel_str
+                # also check if we found constant batch
+                if mod_info[self.CONSTANT_COUNTS_KEY][index] != 0:
+                    # make sure we add a module level highlight.
+                    if not added_model_desc:
+                        # add the module level description
+                        outlier_string += module_suggestion_str.format(module_fqn, self.ch_axis)
+                        added_model_desc = True
+                    constant_values_for_channel = mod_info[self.CONSTANT_COUNTS_KEY][index]
+                    formatted_str = constant_str.format(index, constant_values_for_channel, constant_suggestion)
+                    outlier_string += formatted_str
+                    # we also added at least one thing to description
+                    added_module = True
+        # if found outlier, give suggestion, else give default response
+        if added_module:
+            # compose the note string
+            note_composed = note_string.format(note_distribution, note_rec)
+            outlier_string += note_composed
+        else:
+            outlier_string += "There were no outliers found in the activations.\n"
+        return (outlier_string, info_dict)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/model_report_visualizer.py ADDED Viewed

	@@ -0,0 +1,666 @@

+import torch
+from typing import Any, Set, Dict, List, Tuple, OrderedDict
+from collections import OrderedDict as OrdDict
+# try to import tablate
+got_tabulate = True
+try:
+    from tabulate import tabulate
+except ImportError:
+    got_tabulate = False
+# var to see if we could import matplotlib
+got_matplotlib = True
+try:
+    import matplotlib.pyplot as plt
+except ImportError:
+    got_matplotlib = False
+class ModelReportVisualizer:
+    r"""
+    The ModelReportVisualizer class aims to provide users a way to visualize some of the statistics
+    that were generated by the ModelReport API. However, at a higher level, the class aims to provide
+    some level of visualization of statistics to PyTorch in order to make it easier to parse data and
+    diagnose any potential issues with data or a specific model. With respect to the visualizations,
+    the ModelReportVisualizer class currently supports several methods of visualizing data.
+    Supported Visualization Methods Include:
+    - Table format
+    - Plot format (line graph)
+    - Histogram format
+    For all of the existing visualization methods, there is the option to filter data based on:
+    - A module fqn prefix
+    - Feature [required for the plot and histogram]
+    * :attr:`generated_reports` The reports generated by the ModelReport class in the structure below
+        Ensure sure that features that are the same across different report contain the same name
+        Ensure that objects representing the same features are the same type / dimension (where applicable)
+    Note:
+        Currently, the ModelReportVisualizer class supports visualization of data generated by the
+        ModelReport class. However, this structure is extensible and should allow the visualization of
+        other information as long as the information is structured in the following general format:
+        Report Structure
+        -- module_fqn [module with attached detectors]
+            |
+            -- feature keys [not every detector extracts same information]
+                                    [same collected info has same keys, unless can be specific to detector]
+    The goal behind the class is that the generated visualizations can be used in conjunction with the generated
+    report for people to get a better understanding of issues and what the fix might be. It is also just to provide
+    a good visualization platform, since it might be hard to parse through the ModelReport returned dictionary as
+    that grows in size.
+    General Use Flow Expected
+    1.) Initialize ModelReport object with reports of interest by passing in initialized detector objects
+    2.) Prepare your model with prepare_fx
+    3.) Call model_report.prepare_detailed_calibration on your model to add relevant observers
+    4.) Callibrate your model with data
+    5.) Call model_report.generate_report on your model to generate report and optionally remove added observers
+    6.) Use output of model_report.generate_report to initialize ModelReportVisualizer instance
+    7.) Use instance to view different views of data as desired, applying filters as needed
+        8.) Either see the super detailed information or just the actual printed or shown table / plot / histogram
+    """
+    # keys for table dict
+    TABLE_TENSOR_KEY = "tensor_level_info"
+    TABLE_CHANNEL_KEY = "channel_level_info"
+    # Constants for header vals
+    NUM_NON_FEATURE_TENSOR_HEADERS = 2
+    NUM_NON_FEATURE_CHANNEL_HEADERS = 3
+    # Constants for row index in header
+    CHANNEL_NUM_INDEX = 2
+    def __init__(self, generated_reports: OrderedDict[str, Any]):
+        r"""
+        Initializes the ModelReportVisualizer instance with the necessary reports.
+        Args:
+            generated_reports (Dict[str, Any]): The reports generated by the ModelReport class
+                can also be a dictionary generated in another manner, as long as format is same
+        """
+        self.generated_reports = generated_reports
+    def get_all_unique_module_fqns(self) -> Set[str]:
+        r"""
+        The purpose of this method is to provide a user the set of all module_fqns so that if
+        they wish to use some of the filtering capabilities of the ModelReportVisualizer class,
+        they don't need to manually parse the generated_reports dictionary to get this information.
+        Returns all the unique module fqns present in the reports the ModelReportVisualizer
+        instance was initialized with.
+        """
+        # returns the keys of the ordered dict
+        return set(self.generated_reports.keys())
+    def get_all_unique_feature_names(self, plottable_features_only: bool = True) -> Set[str]:
+        r"""
+        The purpose of this method is to provide a user the set of all feature names so that if
+        they wish to use the filtering capabilities of the generate_table_view(), or use either of
+        the generate_plot_view() or generate_histogram_view(), they don't need to manually parse
+        the generated_reports dictionary to get this information.
+        Args:
+            plottable_features_only (bool): True if the user is only looking for plottable features,
+                False otherwise
+                plottable features are those that are tensor values
+                Default: True (only return those feature names that are plottable)
+        Returns all the unique module fqns present in the reports the ModelReportVisualizer
+        instance was initialized with.
+        """
+        unique_feature_names = set()
+        for module_fqn in self.generated_reports:
+            # get dict of the features
+            feature_dict: Dict[str, Any] = self.generated_reports[module_fqn]
+            # loop through features
+            for feature_name in feature_dict:
+                # if we need plottable, ensure type of val is tensor
+                if not plottable_features_only or type(feature_dict[feature_name]) == torch.Tensor:
+                    unique_feature_names.add(feature_name)
+        # return our compiled set of unique feature names
+        return unique_feature_names
+    def _get_filtered_data(self, feature_filter: str, module_fqn_filter: str) -> OrderedDict[str, Any]:
+        r"""
+        Filters the data and returns it in the same ordered dictionary format so the relevant views can be displayed.
+        Args:
+            feature_filter (str): The feature filter, if we want to filter the set of data to only include
+                a certain set of features that include feature_filter
+                If feature = "", then we do not filter based on any features
+            module_fqn_filter (str): The filter on prefix for the module fqn. All modules that have fqn with
+                this prefix will be included
+                If module_fqn_filter = "" we do not filter based on module fqn, and include all modules
+        First, the data is filtered based on module_fqn, and then filtered based on feature
+        Returns an OrderedDict (sorted in order of model) mapping:
+            module_fqns -> feature_names -> values
+        """
+        # create return dict
+        filtered_dict: OrderedDict[str, Any] = OrdDict()
+        for module_fqn in self.generated_reports:
+            # first filter based on module
+            if module_fqn_filter == "" or module_fqn_filter in module_fqn:
+                # create entry for module and loop through features
+                filtered_dict[module_fqn] = {}
+                module_reports = self.generated_reports[module_fqn]
+                for feature_name in module_reports:
+                    # check if filtering on features and do so if desired
+                    if feature_filter == "" or feature_filter in feature_name:
+                        filtered_dict[module_fqn][feature_name] = module_reports[feature_name]
+        # we have populated the filtered dict, and must return it
+        return filtered_dict
+    def _generate_tensor_table(
+        self,
+        filtered_data: OrderedDict[str, Dict[str, Any]],
+        tensor_features: List[str]
+    ) -> Tuple[List, List]:
+        r"""
+        Takes in the filtered data and features list and generates the tensor headers and table
+        Currently meant to generate the headers and table for both the tensor information.
+        Args:
+            filtered_data (OrderedDict[str, Dict[str, Any]]): An OrderedDict (sorted in order of model) mapping:
+                module_fqns -> feature_names -> values
+            tensor_features (List[str]): A list of the tensor level features
+        Returns a tuple with:
+            A list of the headers of the tensor table
+            A list of lists containing the table information row by row
+            The 0th index row will contain the headers of the columns
+            The rest of the rows will contain data
+        """
+        # now we compose the tensor information table
+        tensor_table: List[List[Any]] = []
+        tensor_headers: List[str] = []
+        # append the table row to the table only if we have features
+        if len(tensor_features) > 0:
+            # now we add all the data
+            for index, module_fqn in enumerate(filtered_data):
+                # we make a new row for the tensor table
+                tensor_table_row = [index, module_fqn]
+                for feature in tensor_features:
+                    # we iterate in same order of added features
+                    if feature in filtered_data[module_fqn]:
+                        # add value if applicable to module
+                        feature_val = filtered_data[module_fqn][feature]
+                    else:
+                        # add that it is not applicable
+                        feature_val = "Not Applicable"
+                    # if it's a tensor we want to extract val
+                    if isinstance(feature_val, torch.Tensor):
+                        feature_val = feature_val.item()
+                    # we add to our list of values
+                    tensor_table_row.append(feature_val)
+                tensor_table.append(tensor_table_row)
+        # add row of headers of we actually have something, otherwise just empty
+        if len(tensor_table) != 0:
+            tensor_headers = ["idx", "layer_fqn"] + tensor_features
+        return (tensor_headers, tensor_table)
+    def _generate_channels_table(
+        self,
+        filtered_data: OrderedDict[str, Any],
+        channel_features: List[str],
+        num_channels: int
+    ) -> Tuple[List, List]:
+        r"""
+        Takes in the filtered data and features list and generates the channels headers and table
+        Currently meant to generate the headers and table for both the channels information.
+        Args:
+            filtered_data (OrderedDict[str, Any]): An OrderedDict (sorted in order of model) mapping:
+                module_fqns -> feature_names -> values
+            channel_features (List[str]): A list of the channel level features
+            num_channels (int): Number of channels in the channel data
+        Returns a tuple with:
+            A list of the headers of the channel table
+            A list of lists containing the table information row by row
+            The 0th index row will contain the headers of the columns
+            The rest of the rows will contain data
+        """
+        # now we compose the table for the channel information table
+        channel_table: List[List[Any]] = []
+        channel_headers: List[str] = []
+        # counter to keep track of number of entries in
+        channel_table_entry_counter: int = 0
+        if len(channel_features) > 0:
+            # now we add all channel data
+            for module_fqn in filtered_data:
+                # we iterate over all channels
+                for channel in range(num_channels):
+                    # we make a new row for the channel
+                    new_channel_row = [channel_table_entry_counter, module_fqn, channel]
+                    for feature in channel_features:
+                        if feature in filtered_data[module_fqn]:
+                            # add value if applicable to module
+                            feature_val = filtered_data[module_fqn][feature][channel]
+                        else:
+                            # add that it is not applicable
+                            feature_val = "Not Applicable"
+                        # if it's a tensor we want to extract val
+                        if type(feature_val) is torch.Tensor:
+                            feature_val = feature_val.item()
+                        # add value to channel specific row
+                        new_channel_row.append(feature_val)
+                    # add to table and increment row index counter
+                    channel_table.append(new_channel_row)
+                    channel_table_entry_counter += 1
+        # add row of headers of we actually have something, otherwise just empty
+        if len(channel_table) != 0:
+            channel_headers = ["idx", "layer_fqn", "channel"] + channel_features
+        return (channel_headers, channel_table)
+    def generate_filtered_tables(self, feature_filter: str = "", module_fqn_filter: str = "") -> Dict[str, Tuple[List, List]]:
+        r"""
+        Takes in optional filter values and generates two tables with desired information.
+        The generated tables are presented in both a list-of-lists format
+        The reason for the two tables are that they handle different things:
+        1.) the first table handles all tensor level information
+        2.) the second table handles and displays all channel based information
+        The reasoning for this is that having all the info in one table can make it ambiguous which collected
+            statistics are global, and which are actually per-channel, so it's better to split it up into two
+            tables. This also makes the information much easier to digest given the plethora of statistics collected
+        Tensor table columns:
+            idx  layer_fqn  feature_1   feature_2   feature_3   .... feature_n
+            ----  ---------  ---------   ---------   ---------        ---------
+        Per-Channel table columns:
+            idx  layer_fqn  channel  feature_1   feature_2   feature_3   .... feature_n
+            ----  ---------  -------  ---------   ---------   ---------        ---------
+        Args:
+            feature_filter (str, optional): Filters the features presented to only those that
+                contain this filter substring
+                Default = "", results in all the features being printed
+            module_fqn_filter (str, optional): Only includes modules that contains this string
+                Default = "", results in all the modules in the reports to be visible in the table
+        Returns a dictionary with two keys:
+            (Dict[str, Tuple[List, List]]) A dict containing two keys:
+            "tensor_level_info", "channel_level_info"
+                Each key maps to a tuple with:
+                    A list of the headers of each table
+                    A list of lists containing the table information row by row
+                    The 0th index row will contain the headers of the columns
+                    The rest of the rows will contain data
+        Example Use:
+            >>> # xdoctest: +SKIP("undefined variables")
+            >>> mod_report_visualizer.generate_filtered_tables(
+            ...     feature_filter = "per_channel_min",
+            ...     module_fqn_filter = "block1"
+            ... ) # generates table with per_channel_min info for all modules in block 1 of the model
+        """
+        # first get the filtered data
+        filtered_data: OrderedDict[str, Any] = self._get_filtered_data(feature_filter, module_fqn_filter)
+        # now we split into tensor and per-channel data
+        tensor_features: Set[str] = set()
+        channel_features: Set[str] = set()
+        # keep track of the number of channels we have
+        num_channels: int = 0
+        for module_fqn in filtered_data:
+            for feature_name in filtered_data[module_fqn]:
+                # get the data for that specific feature
+                feature_data = filtered_data[module_fqn][feature_name]
+                # check if not zero dim tensor
+                is_tensor: bool = isinstance(feature_data, torch.Tensor)
+                is_not_zero_dim: bool = is_tensor and len(feature_data.shape) != 0
+                if is_not_zero_dim or isinstance(feature_data, list):
+                    # works means per channel
+                    channel_features.add(feature_name)
+                    num_channels = len(feature_data)
+                else:
+                    # means is per-tensor
+                    tensor_features.add(feature_name)
+        # we make them lists for iteration purposes
+        tensor_features_list: List[str] = sorted(tensor_features)
+        channel_features_list: List[str] = sorted(channel_features)
+        # get the tensor info
+        tensor_headers, tensor_table = self._generate_tensor_table(filtered_data, tensor_features_list)
+        # get the channel info
+        channel_headers, channel_table = self._generate_channels_table(
+            filtered_data, channel_features_list, num_channels
+        )
+        # let's now create the dictionary to return
+        table_dict = {
+            self.TABLE_TENSOR_KEY : (tensor_headers, tensor_table),
+            self.TABLE_CHANNEL_KEY : (channel_headers, channel_table)
+        }
+        # return the two tables
+        return table_dict
+    def generate_table_visualization(self, feature_filter: str = "", module_fqn_filter: str = ""):
+        r"""
+        Takes in optional filter values and prints out formatted tables of the information.
+        The reason for the two tables printed out instead of one large one are that they handle different things:
+        1.) the first table handles all tensor level information
+        2.) the second table handles and displays all channel based information
+        The reasoning for this is that having all the info in one table can make it ambiguous which collected
+            statistics are global, and which are actually per-channel, so it's better to split it up into two
+            tables. This also makes the information much easier to digest given the plethora of statistics collected
+        Tensor table columns:
+         idx  layer_fqn  feature_1   feature_2   feature_3   .... feature_n
+        ----  ---------  ---------   ---------   ---------        ---------
+        Per-Channel table columns:
+         idx  layer_fqn  channel  feature_1   feature_2   feature_3   .... feature_n
+        ----  ---------  -------  ---------   ---------   ---------        ---------
+        Args:
+            feature_filter (str, optional): Filters the features presented to only those that
+                contain this filter substring
+                Default = "", results in all the features being printed
+            module_fqn_filter (str, optional): Only includes modules that contains this string
+                Default = "", results in all the modules in the reports to be visible in the table
+        Example Use:
+            >>> # xdoctest: +SKIP("undefined variables")
+            >>> mod_report_visualizer.generate_table_visualization(
+            ...     feature_filter = "per_channel_min",
+            ...     module_fqn_filter = "block1"
+            ... )
+            >>> # prints out neatly formatted table with per_channel_min info
+            >>> # for all modules in block 1 of the model
+        """
+        # see if we got tabulate
+        if not got_tabulate:
+            print("Make sure to install tabulate and try again.")
+            return None
+        # get the table dict and the specific tables of interest
+        table_dict = self.generate_filtered_tables(feature_filter, module_fqn_filter)
+        tensor_headers, tensor_table = table_dict[self.TABLE_TENSOR_KEY]
+        channel_headers, channel_table = table_dict[self.TABLE_CHANNEL_KEY]
+        # get the table string and print it out
+        # now we have populated the tables for each one
+        # let's create the strings to be returned
+        table_str = ""
+        # the tables will have some headers columns that are non-feature
+        # ex. table index, module name, channel index, etc.
+        # we want to look at header columns for features, that come after those headers
+        if len(tensor_headers) > self.NUM_NON_FEATURE_TENSOR_HEADERS:
+            # if we have at least one tensor level feature to be added we add tensor table
+            table_str += "Tensor Level Information \n"
+            table_str += tabulate(tensor_table, headers=tensor_headers)
+        if len(channel_headers) > self.NUM_NON_FEATURE_CHANNEL_HEADERS:
+            # if we have at least one channel level feature to be added we add tensor table
+            table_str += "\n\n Channel Level Information \n"
+            table_str += tabulate(channel_table, headers=channel_headers)
+        # if no features at all, let user know
+        if table_str == "":
+            table_str = "No data points to generate table with."
+        print(table_str)
+    def _get_plottable_data(self, feature_filter: str, module_fqn_filter: str) -> Tuple[List, List[List], bool]:
+        r"""
+        Takes in the feature filters and module filters and outputs the x and y data for plotting
+        Args:
+            feature_filter (str): Filters the features presented to only those that
+                contain this filter substring
+            module_fqn_filter (str): Only includes modules that contains this string
+        Returns a tuple of three elements
+            The first is a list containing relevant x-axis data
+            The second is a list containing the corresponding y-axis data
+            If the data is per channel
+        """
+        # get the table dict and the specific tables of interest
+        table_dict = self.generate_filtered_tables(feature_filter, module_fqn_filter)
+        tensor_headers, tensor_table = table_dict[self.TABLE_TENSOR_KEY]
+        channel_headers, channel_table = table_dict[self.TABLE_CHANNEL_KEY]
+        # make sure it is only 1 feature that is being plotted
+        # get the number of features in each of these
+        tensor_info_features_count = len(tensor_headers) - ModelReportVisualizer.NUM_NON_FEATURE_TENSOR_HEADERS
+        channel_info_features_count = len(channel_headers) - ModelReportVisualizer.NUM_NON_FEATURE_CHANNEL_HEADERS
+        # see if valid tensor or channel plot
+        is_valid_per_tensor_plot: bool = tensor_info_features_count == 1
+        is_valid_per_channel_plot: bool = channel_info_features_count == 1
+        # offset should either be one of tensor or channel table or neither
+        feature_column_offset = ModelReportVisualizer.NUM_NON_FEATURE_TENSOR_HEADERS
+        table = tensor_table
+        # if a per_channel plot, we have different offset and table
+        if is_valid_per_channel_plot:
+            feature_column_offset = ModelReportVisualizer.NUM_NON_FEATURE_CHANNEL_HEADERS
+            table = channel_table
+        x_data: List = []
+        y_data: List[List] = []
+        # the feature will either be a tensor feature or channel feature
+        if is_valid_per_tensor_plot:
+            for table_row_num, row in enumerate(table):
+                # get x_value to append
+                x_val_to_append = table_row_num
+                # the index of the feature will the 0 + num non feature columns
+                tensor_feature_index = feature_column_offset
+                row_value = row[tensor_feature_index]
+                if not type(row_value) == str:
+                    x_data.append(x_val_to_append)
+                    y_data.append(row_value)
+        elif is_valid_per_channel_plot:
+            # gather the x_data and multiple y_data
+            # calculate the number of channels
+            num_channels: int = max(row[self.CHANNEL_NUM_INDEX] for row in table) + 1
+            for channel in range(num_channels):
+                y_data.append([])  # separate data list per channel
+            for table_row_num, row in enumerate(table):
+                # get x_value to append
+                x_val_to_append = table_row_num
+                current_channel = row[self.CHANNEL_NUM_INDEX]  # initially chose current channel
+                new_module_index: int = table_row_num // num_channels
+                x_val_to_append = new_module_index
+                # the index of the feature will the 0 + num non feature columns
+                tensor_feature_index = feature_column_offset
+                row_value = row[tensor_feature_index]
+                if not type(row_value) == str:
+                    # only append if new index we are appending
+                    if len(x_data) == 0 or x_data[-1] != x_val_to_append:
+                        x_data.append(x_val_to_append)
+                    # append value for that channel
+                    y_data[current_channel].append(row_value)
+        else:
+            # more than one feature was chosen
+            error_str = "Make sure to pick only a single feature with your filter to plot a graph."
+            error_str += " We recommend calling get_all_unique_feature_names() to find unique feature names."
+            error_str += " Pick one of those features to plot."
+            raise ValueError(error_str)
+        # return x, y values, and if data is per-channel
+        return (x_data, y_data, is_valid_per_channel_plot)
+    def generate_plot_visualization(self, feature_filter: str, module_fqn_filter: str = ""):
+        r"""
+        Takes in a feature and optional module_filter and plots of the desired data.
+        For per channel features, it averages the value across the channels and plots a point
+        per module. The reason for this is that for models with hundreds of channels, it can
+        be hard to differentiate one channel line from another, and so the point of generating
+        a single average point per module is to give a sense of general trends that encourage
+        further deep dives.
+        Note:
+            Only features in the report that have tensor value data are plottable by this class
+            When the tensor information is plotted, it will plot:
+                idx as the x val, feature value as the y_val
+            When the channel information is plotted, it will plot:
+                the first idx of each module as the x val, feature value as the y_val [for each channel]
+                The reason for this is that we want to be able to compare values across the
+                channels for same layer, and it will be hard if values are staggered by idx
+                This means each module is represented by only 1 x value
+        Args:
+            feature_filter (str): Filters the features presented to only those that
+                contain this filter substring
+            module_fqn_filter (str, optional): Only includes modules that contains this string
+                Default = "", results in all the modules in the reports to be visible in the table
+        Example Use:
+            >>> # xdoctest: +SKIP("undefined variables")
+            >>> mod_report_visualizer.generate_plot_visualization(
+            ...     feature_filter = "per_channel_min",
+            ...     module_fqn_filter = "block1"
+            ... )
+            >>> # outputs line plot of per_channel_min information for all
+            >>> # modules in block1 of model each channel gets it's own line,
+            >>> # and it's plotted across the in-order modules on the x-axis
+        """
+        # checks if we have matplotlib and let's user know to install it if don't
+        if not got_matplotlib:
+            print("make sure to install matplotlib and try again.")
+            return None
+        # get the x and y data and if per channel
+        x_data, y_data, data_per_channel = self._get_plottable_data(feature_filter, module_fqn_filter)
+        # plot based on whether data is per channel or not
+        ax = plt.subplot()
+        ax.set_ylabel(feature_filter)
+        ax.set_title(feature_filter + " Plot")
+        plt.xticks(x_data)  # only show ticks for actual points
+        if data_per_channel:
+            ax.set_xlabel("First idx of module")
+            # set the legend as well
+            # plot a single line that is average of the channel values
+            num_modules = len(y_data[0])  # all y_data have same length, so get num modules
+            num_channels = len(y_data)  # we want num channels to be able to calculate average later
+            avg_vals = [sum(y_data[:][index]) / num_channels for index in range(num_modules)]
+            # plot the three things we measured
+            ax.plot(x_data, avg_vals, label=f"Average Value Across {num_channels} Channels")
+            ax.legend(loc='upper right')
+        else:
+            ax.set_xlabel("idx")
+            ax.plot(x_data, y_data)
+        # actually show the plot
+        plt.show()
+    def generate_histogram_visualization(self, feature_filter: str, module_fqn_filter: str = "", num_bins: int = 10):
+        r"""
+        Takes in a feature and optional module_filter and plots the histogram of desired data.
+        Note:
+            Only features in the report that have tensor value data can be viewed as a histogram
+            If you want to plot a histogram from all the channel values of a specific feature for
+                a specific model, make sure to specify both the model and the feature properly
+                in the filters and you should be able to see a distribution of the channel data
+        Args:
+            feature_filter (str, optional): Filters the features presented to only those that
+                contain this filter substring
+                Default = "", results in all the features being printed
+            module_fqn_filter (str, optional): Only includes modules that contains this string
+                Default = "", results in all the modules in the reports to be visible in the table
+            num_bins (int, optional): The number of bins to create the histogram with
+                Default = 10, the values will be split into 10 equal sized bins
+        Example Use:
+            >>> # xdoctest: +SKIP
+            >>> mod_report_visualizer.generategenerate_histogram_visualization_plot_visualization(
+            ...     feature_filter = "per_channel_min",
+            ...     module_fqn_filter = "block1"
+            ... )
+            # outputs histogram of per_channel_min information for all modules in block1 of model
+                information is gathered across all channels for all modules in block 1 for the
+                per_channel_min and is displayed in a histogram of equally sized bins
+        """
+        # checks if we have matplotlib and let's user know to install it if don't
+        if not got_matplotlib:
+            print("make sure to install matplotlib and try again.")
+            return None
+        # get the x and y data and if per channel
+        x_data, y_data, data_per_channel = self._get_plottable_data(feature_filter, module_fqn_filter)
+        # for histogram, we just care about plotting the y data
+        # plot based on whether data is per channel or not
+        ax = plt.subplot()
+        ax.set_xlabel(feature_filter)
+        ax.set_ylabel("Frequency")
+        ax.set_title(feature_filter + " Histogram")
+        if data_per_channel:
+            # set the legend as well
+            # combine all the data
+            all_data = []
+            for channel_info in y_data:
+                all_data.extend(channel_info)
+            val, bins, _ = plt.hist(
+                all_data,
+                bins=num_bins,
+                stacked=True,
+                rwidth=0.8,
+            )
+            plt.xticks(bins)
+        else:
+            val, bins, _ = plt.hist(
+                y_data,
+                bins=num_bins,
+                stacked=False,
+                rwidth=0.8,
+            )
+            plt.xticks(bins)
+        plt.show()

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/quantize_handler.py ADDED Viewed

	@@ -0,0 +1,197 @@

+from abc import ABC
+from typing import Callable, Dict, List, Optional, Type
+import torch
+from torch.ao.quantization.backend_config import (
+    BackendConfig,
+    DTypeConfig,
+    ObservationType,
+)
+from torch.ao.quantization.utils import NodePattern, Pattern, QuantizerCls
+from torch.fx.graph import Node
+from .utils import all_node_args_have_no_tensors
+__all__ = [
+    "QuantizeHandler",
+    "BinaryOpQuantizeHandler",
+    "CatQuantizeHandler",
+    "ConvReluQuantizeHandler",
+    "LinearReLUQuantizeHandler",
+    "BatchNormQuantizeHandler",
+    "EmbeddingQuantizeHandler",
+    "RNNDynamicQuantizeHandler",
+    "DefaultNodeQuantizeHandler",
+    "FixedQParamsOpQuantizeHandler",
+    "CopyNodeQuantizeHandler",
+    "GeneralTensorShapeOpQuantizeHandler",
+    "CustomModuleQuantizeHandler",
+    "StandaloneModuleQuantizeHandler",
+]
+def _default_root_node_getter(node_pattern):
+    if node_pattern is None:
+        return node_pattern
+    while not isinstance(node_pattern, Node):
+        node_pattern = node_pattern[-1]
+    return node_pattern
+# Base Pattern Handler
+class QuantizeHandler(ABC):  # noqa: B024
+    """ Base handler class for the quantizer patterns
+    """
+    def __init__(
+            self,
+            node_pattern: NodePattern,
+            modules: Dict[str, torch.nn.Module],
+            root_node_getter: Optional[Callable] = None,
+            is_custom_module=False,
+            is_standalone_module=False):
+        """ Records pattern information in __init__, which will be used
+        in convert
+        """
+        self.node_pattern = node_pattern
+        self.modules = modules
+        if root_node_getter is None:
+            root_node_getter = _default_root_node_getter
+        self.root_node = root_node_getter(node_pattern)
+        self.is_custom_module_ = is_custom_module
+        self.is_standalone_module_ = is_standalone_module
+        self.num_tensor_args = 0
+        # determine how many of the first two args are Tensors (versus scalars)
+        # this distinguishes things like "x + y" from "x + 2" or "2 + x"
+        if isinstance(self.root_node, Node):
+            cache_for_no_tensor_check: Dict[Node, bool] = {}
+            for arg_idx in range(len(self.root_node.args)):
+                arg = self.root_node.args[arg_idx]
+                if isinstance(arg, Node) and (
+                        not all_node_args_have_no_tensors(
+                            arg, self.modules, cache_for_no_tensor_check)):
+                    self.num_tensor_args += 1
+    def is_general_tensor_value_op(self) -> bool:
+        """
+        Returns True if the operator works for both floating point and
+        quantized input, and does some computation based on the input Tensor,
+        or the ops that only re-arranges the Tensor values or query some metadata
+        about the Tensor
+        so we need to insert observer/fake_quant for the output of the
+        operator (same observer instance as input)
+        since the distribution of values is different for input and output
+        Tensors (for HistogramObserver) while they share the same quantization
+        parameters
+        Example operator: avgpool2d, reshape, transpose, maxpool2d
+        Example observed operator:
+        observer_0 - avgpool2d - observer_0 (same observer instance as input)
+        """
+        return False
+    def is_custom_module(self):
+        return self.is_custom_module_
+    def is_standalone_module(self):
+        return self.is_standalone_module_
+def _get_quantize_handler_cls(
+        observation_type: ObservationType,
+        dtype_configs: List[DTypeConfig],
+        num_tensor_args_to_observation_type: Dict[int, ObservationType]) -> Type[QuantizeHandler]:
+    """
+    Return a configurable QuantizeHandler that matches the given specifications from the backend.
+    """
+    class ConfigurableQuantizeHandler(QuantizeHandler):
+        def __init__(
+                self,
+                node_pattern: NodePattern,
+                modules: Dict[str, torch.nn.Module],
+                root_node_getter: Optional[Callable] = None):
+            super().__init__(node_pattern, modules, root_node_getter)
+            if num_tensor_args_to_observation_type:
+                assert self.num_tensor_args in num_tensor_args_to_observation_type, \
+                    f"Must provide observation_type config for tensor number {self.num_tensor_args}" \
+                    f" in num_tensor_args_to_observation_type for {node_pattern}"
+                self.observation_type = num_tensor_args_to_observation_type[self.num_tensor_args]
+            else:
+                self.observation_type = observation_type
+            self.dtype_configs = dtype_configs
+        def is_general_tensor_value_op(self) -> bool:
+            return self.observation_type == ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT
+    return ConfigurableQuantizeHandler
+def _get_pattern_to_quantize_handlers(backend_config: BackendConfig) -> Dict[Pattern, QuantizerCls]:
+    """
+    Note: Quantize handler is just a holder for some check methods like
+    (should_insert_observer_for_output), maybe this can be a enum as well,
+    we can refactor this after we convert the path for fbgemm/qnnpack fully to the
+    new path, this is not exposed to backend developers
+    """
+    pattern_to_quantize_handlers = {}
+    for pattern, config in backend_config._pattern_complex_format_to_config.items():
+        observation_type = config.observation_type
+        dtype_configs = config.dtype_configs
+        num_tensor_args_to_observation_type = config._num_tensor_args_to_observation_type
+        pattern_to_quantize_handlers[pattern] = \
+            _get_quantize_handler_cls(
+                observation_type,
+                dtype_configs,
+                num_tensor_args_to_observation_type)
+    return pattern_to_quantize_handlers
+# TODO: remove this class, this is still exposed in torch.ao.quantization
+# but we should be able to break bc
+class BinaryOpQuantizeHandler(QuantizeHandler):
+    pass
+class CatQuantizeHandler(QuantizeHandler):
+    pass
+# TODO: remove this class
+class ConvReluQuantizeHandler(QuantizeHandler):
+    pass
+# TODO: remove this class
+class LinearReLUQuantizeHandler(QuantizeHandler):
+    pass
+# TODO: remove this class
+class BatchNormQuantizeHandler(QuantizeHandler):
+    pass
+# TODO: remove this class
+class EmbeddingQuantizeHandler(QuantizeHandler):
+    pass
+# TODO: remove this class
+class RNNDynamicQuantizeHandler(QuantizeHandler):
+    pass
+# TODO: remove this class
+class DefaultNodeQuantizeHandler(QuantizeHandler):
+    """ Common quantized op, first input and first output will be quantized
+    """
+    pass
+# TODO: remove this class
+class FixedQParamsOpQuantizeHandler(QuantizeHandler):
+    pass
+# TODO: remove
+class CopyNodeQuantizeHandler(QuantizeHandler):
+    pass
+# TODO: remove
+class GeneralTensorShapeOpQuantizeHandler(QuantizeHandler):
+    pass
+# TODO: not used, can be removed after torch.ao.quantization namespace is deprecated
+class CustomModuleQuantizeHandler(QuantizeHandler):
+    pass
+# TODO: not used, can be removed after torch.ao.quantization namespace is deprecated
+class StandaloneModuleQuantizeHandler(QuantizeHandler):
+    pass

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/utils.py ADDED Viewed

	@@ -0,0 +1,885 @@

+import copy
+import torch
+import torch.nn as nn
+from torch.ao.quantization import (
+    QConfigAny,
+    QuantType,
+)
+from torch.ao.quantization.backend_config import (
+    DTypeWithConstraints,
+)
+from torch.ao.quantization.fake_quantize import (
+    FakeQuantizeBase,
+    FixedQParamsFakeQuantize,
+)
+from torch.ao.quantization.observer import (
+    FixedQParamsObserver,
+    ObserverBase,
+)
+from torch.ao.quantization.qconfig import (
+    float16_static_qconfig,
+    float16_dynamic_qconfig,
+    qconfig_equals,
+)
+from torch.ao.quantization.stubs import DeQuantStub
+from torch.ao.quantization.utils import (
+    activation_is_statically_quantized,
+)
+from torch.ao.quantization.observer import _is_activation_post_process
+from torch.ao.quantization.qconfig_mapping import QConfigMapping
+from torch.fx import GraphModule, map_arg
+from torch.fx.graph import (
+    Graph,
+    Node,
+)
+from .custom_config import PrepareCustomConfig
+# importing the lib so that the quantized_decomposed ops are registered
+from ._decomposed import quantized_decomposed_lib  # noqa: F401
+from typing import Callable, Optional, List, Dict, Any, Set, Tuple, Union, Type
+from dataclasses import dataclass
+from collections import namedtuple
+import operator
+import warnings
+# TODO: revisit this list. Many helper methods shouldn't be public
+__all__ = [
+    "all_node_args_except_first",
+    "all_node_args_have_no_tensors",
+    "assert_and_get_unique_device",
+    "collect_producer_nodes",
+    "create_getattr_from_value",
+    "create_node_from_old_node_preserve_meta",
+    "EMPTY_ARG_DICT",
+    "get_custom_module_class_keys",
+    "get_linear_prepack_op_for_dtype",
+    "get_new_attr_name_with_prefix",
+    "get_non_observable_arg_indexes_and_types",
+    "get_qconv_prepack_op",
+    "get_skipped_module_name_and_classes",
+    "graph_module_from_producer_nodes",
+    "maybe_get_next_module",
+    "NodeInfo",
+    "node_arg_is_bias",
+    "node_arg_is_weight",
+    "NON_OBSERVABLE_ARG_DICT",
+    "NON_QUANTIZABLE_WEIGHT_OPS",
+    "return_arg_list",
+    "ObservedGraphModuleAttrs",
+]
+NON_QUANTIZABLE_WEIGHT_OPS = {torch.nn.functional.layer_norm, torch.nn.functional.group_norm, torch.nn.functional.instance_norm}
+@dataclass
+class ObservedGraphModuleAttrs:
+    node_name_to_qconfig: Dict[str, QConfigAny]
+    node_name_to_scope: Dict[str, Tuple[str, type]]
+    prepare_custom_config: PrepareCustomConfig
+    equalization_node_name_to_qconfig: Dict[str, Any]
+    qconfig_mapping: QConfigMapping
+    is_qat: bool
+    observed_node_names: Set[str]
+    is_observed_standalone_module: bool = False
+    standalone_module_input_quantized_idxs: Optional[List[int]] = None
+    standalone_module_output_quantized_idxs: Optional[List[int]] = None
+def node_arg_is_weight(node: Node, arg: Any) -> bool:
+    """Returns if node arg is weight"""
+    weight_index = None
+    if "target_dtype_info" in node.meta:
+        weight_index = node.meta["target_dtype_info"].get("weight_index", None)
+    if weight_index is not None and weight_index < len(node.args) and node.args[weight_index] is arg:
+        return True
+    return node.kwargs.get("weight") is arg
+def node_arg_is_bias(node: Node, arg: Any) -> bool:
+    """Returns if node arg is bias"""
+    bias_index = None
+    if "target_dtype_info" in node.meta:
+        bias_index = node.meta["target_dtype_info"].get("bias_index", None)
+    if bias_index is not None and bias_index < len(node.args) and node.args[bias_index] is arg:
+        return True
+    return node.kwargs.get("bias") is arg
+def get_custom_module_class_keys(custom_module_mapping: Dict[QuantType, Dict[Type, Type]]) -> List[Any]:
+    r""" Get all the unique custom module keys in the custom config dict
+    e.g.
+    Input:
+    {
+        QuantType.STATIC: {
+            CustomModule1: ObservedCustomModule
+        },
+        QuantType.DYNAMIC: {
+            CustomModule2: DynamicObservedCustomModule
+        },
+        QuantType.WEIGHT_ONLY: {
+            CustomModule3: WeightOnlyObservedCustomModule
+        },
+    }
+    Output:
+    # extract the keys across all inner STATIC, DYNAMIC, and WEIGHT_ONLY dicts
+    [CustomModule1, CustomModule2, CustomModule3]
+    """
+    # using set to dedup
+    float_custom_module_classes : Set[Any] = set()
+    for quant_mode in [QuantType.STATIC, QuantType.DYNAMIC, QuantType.WEIGHT_ONLY]:
+        quant_mode_custom_module_config = custom_module_mapping.get(quant_mode, {})
+        quant_mode_custom_module_classes = set(quant_mode_custom_module_config.keys())
+        float_custom_module_classes |= quant_mode_custom_module_classes
+    return list(float_custom_module_classes)
+def get_linear_prepack_op_for_dtype(dtype):
+    if dtype == torch.float16:
+        return torch.ops.quantized.linear_prepack_fp16
+    elif dtype == torch.qint8:
+        return torch.ops.quantized.linear_prepack
+    else:
+        raise Exception("can't get linear prepack op for dtype:", dtype)
+def get_qconv_prepack_op(conv_op: Callable) -> Callable:
+    prepack_ops = {
+        torch.nn.functional.conv1d: torch.ops.quantized.conv1d_prepack,
+        torch.nn.functional.conv2d: torch.ops.quantized.conv2d_prepack,
+        torch.nn.functional.conv3d: torch.ops.quantized.conv3d_prepack,
+        torch.nn.functional.conv_transpose1d: torch.ops.quantized.conv_transpose1d_prepack,
+        torch.nn.functional.conv_transpose2d: torch.ops.quantized.conv_transpose2d_prepack,
+        torch.nn.functional.conv_transpose3d: torch.ops.quantized.conv_transpose3d_prepack,
+    }
+    prepack_op = prepack_ops.get(conv_op, None)
+    assert prepack_op, f"Didn't find prepack op for {conv_op}"
+    return prepack_op
+# Returns a function that can get a new attribute name for module with given
+# prefix, for example,
+# >> get_new_observer_name = get_new_attr_name_with_prefix('_observer')
+# >> new_name = get_new_observer_name(module)
+# new_name will be an unused attribute name on module, e.g. `_observer_1`
+def get_new_attr_name_with_prefix(prefix: str) -> Callable:
+    prefix = prefix.replace(".", "_")
+    def get_new_attr_name(module: torch.nn.Module):
+        def get_attr_name(i: int):
+            return prefix + str(i)
+        i = 0
+        attr_name = get_attr_name(i)
+        while hasattr(module, attr_name):
+            i += 1
+            attr_name = get_attr_name(i)
+        return attr_name
+    return get_new_attr_name
+def collect_producer_nodes(node: Node) -> Optional[List[Node]]:
+    r''' Starting from a target node, trace back until we hit inpu or
+    getattr node. This is used to extract the chain of operators
+    starting from getattr to the target node, for example
+    def forward(self, x):
+      observed = self.observer(self.weight)
+      return F.linear(x, observed)
+    collect_producer_nodes(observed) will either return a list of nodes that
+    produces the observed node or None if we can't extract a self contained
+    graph without free variables(inputs of the forward function).
+    '''
+    nodes = [node]
+    frontier = [node]
+    while frontier:
+        node = frontier.pop()
+        all_args = list(node.args) + list(node.kwargs.values())
+        for arg in all_args:
+            if not isinstance(arg, Node):
+                continue
+            if arg.op == 'placeholder':
+                # hit input, can't fold in this case
+                return None
+            nodes.append(arg)
+            if not (arg.op == 'call_function' and arg.target == getattr):
+                frontier.append(arg)
+    return nodes
+def graph_module_from_producer_nodes(
+        root: GraphModule, producer_nodes: List[Node]) -> GraphModule:
+    r''' Construct a graph module from extracted producer nodes
+    from `collect_producer_nodes` function
+    Args:
+      root: the root module for the original graph
+      producer_nodes: a list of nodes we use to construct the graph
+    Return:
+      A graph module constructed from the producer nodes
+    '''
+    assert len(producer_nodes) > 0, 'list of producer nodes can not be empty'
+    # since we traced back from node to getattr
+    producer_nodes.reverse()
+    graph = Graph()
+    env: Dict[Any, Any] = {}
+    def load_arg(a):
+        return map_arg(a, lambda node: env[node])
+    for producer_node in producer_nodes:
+        env[producer_node] = graph.node_copy(producer_node, load_arg)
+    graph.output(load_arg(producer_nodes[-1]))
+    graph_module = GraphModule(root, graph)
+    return graph_module
+def assert_and_get_unique_device(module: torch.nn.Module) -> Any:
+    """
+    Returns the unique device for a module, or None if no device is found.
+    Throws an error if multiple devices are detected.
+    """
+    devices = {p.device for p in module.parameters()} | \
+        {p.device for p in module.buffers()}
+    """
+    As a temp workaround for AIMP HHC publish we added CPU check.remove it later. T163614564
+    """
+    if {torch.device("cpu"), torch.device("meta")} == devices:
+        warnings.warn("Both 'meta' and 'cpu' are present in the list of devices. Module can have one device. We Select 'cpu'.")
+        devices = {torch.device("cpu")}
+    ""
+    assert len(devices) <= 1, (
+        "prepare only works with cpu or single-device CUDA modules, "
+        f"but got devices {devices}"
+    )
+    device = next(iter(devices)) if len(devices) > 0 else None
+    return device
+def create_getattr_from_value(module: torch.nn.Module, graph: Graph, prefix: str, value: Any) -> Node:
+    """
+    Given a value of any type, creates a getattr node corresponding to the value and
+    registers the value as a buffer to the module.
+    """
+    get_new_attr_name = get_new_attr_name_with_prefix(prefix)
+    attr_name = get_new_attr_name(module)
+    device = assert_and_get_unique_device(module)
+    new_value = value.clone().detach() if isinstance(value, torch.Tensor) \
+        else torch.tensor(value, device=device)
+    module.register_buffer(attr_name, new_value)
+    # Create get_attr with value
+    attr_node = graph.create_node("get_attr", attr_name)
+    return attr_node
+def all_node_args_have_no_tensors(node: Node, modules: Dict[str, torch.nn.Module], cache: Dict[Node, bool]) -> bool:
+    """
+    If we know for sure that all of this node's args have no
+    tensors (are primitives), return True.  If we either
+    find a tensor or are not sure, return False. Note: this
+    function is not exact.
+    """
+    if cache and node in cache:
+        return cache[node]
+    result = False  # will be overwritten
+    if not isinstance(node, Node):
+        result = True
+    elif node.op == 'placeholder':
+        result = False
+    elif node.op == 'call_module':
+        assert isinstance(node.target, str)
+        if _is_activation_post_process(modules[node.target]):
+            result = all_node_args_have_no_tensors(node.args[0], modules, cache)  # type: ignore[arg-type]
+    elif node.op == 'call_module':
+        result = False
+    elif node.op == 'call_function' and node.target is operator.getitem:
+        result = all_node_args_have_no_tensors(node.args[0], modules, cache)  # type: ignore[arg-type]
+    elif node.op == 'get_attr':
+        result = False
+    elif node.target is getattr and node.args[1] in ['ndim', 'shape']:
+        # x1 = x0.ndim
+        result = True
+    elif node.op == 'call_method' and node.target == 'size':
+        # x1 = x0.size(0)
+        result = True
+    else:
+        found_one_tensor = False
+        for arg in node.args:
+            if isinstance(arg, list):
+                for list_el in arg:
+                    if isinstance(list_el, Node):
+                        this_list_el_args_have_no_tensors = \
+                            all_node_args_have_no_tensors(list_el, modules, cache)
+                        found_one_tensor = found_one_tensor or \
+                            (not this_list_el_args_have_no_tensors)
+                        # If found_one_tensor is True, there is no point in
+                        # recursing further as the end result will always
+                        # be True.
+                        # TODO(future PR): remove this entire function  and
+                        # change to dtype inference without recursion.
+                        if found_one_tensor:
+                            result = not found_one_tensor
+                            if cache:
+                                cache[node] = result
+                            return result
+            elif isinstance(arg, int):
+                pass
+            else:
+                if isinstance(arg, Node):
+                    this_arg_args_have_no_tensors = all_node_args_have_no_tensors(arg, modules, cache)
+                    found_one_tensor = found_one_tensor or \
+                        (not this_arg_args_have_no_tensors)
+                    # If found_one_tensor is True, there is no point in
+                    # recursing further as the end result will always
+                    # be True.
+                    # TODO(future PR): remove this entire function  and
+                    # change to dtype inference without recursion.
+                    if found_one_tensor:
+                        result = not found_one_tensor
+                        if cache:
+                            cache[node] = result
+                        return result
+                else:
+                    found_one_tensor = True
+            result = not found_one_tensor
+    if cache:
+        cache[node] = result
+    return result
+def all_node_args_except_first(node: Node) -> List[int]:
+    """
+    Returns all node arg indices after first
+    """
+    return list(range(1, len(node.args)))
+def return_arg_list(arg_indices: List[int]) -> Callable[[Node], List[int]]:
+    """
+    Constructs a function that takes a node as arg and returns the arg_indices
+    that are valid for node.args
+    """
+    def arg_indices_func(node: Node) -> List[int]:
+        return [i for i in arg_indices if i < len(node.args)]
+    return arg_indices_func
+NodeInfo = namedtuple("NodeInfo", "op target")
+# this dict identifies which indices of a node are non tensors
+# so that they can be propagated correctly since inserting observers
+# for them would cause errors
+NON_OBSERVABLE_ARG_DICT: Dict[NodeInfo, Dict[Union[type, torch.dtype], Callable[[Node], List[int]]]] = {
+    NodeInfo("call_method", "masked_fill") : {
+        torch.bool: return_arg_list([1]),
+        float: return_arg_list([2])
+    },
+    NodeInfo("call_method", "permute") : {
+        int: all_node_args_except_first
+    },
+    NodeInfo("call_method", "repeat") : {
+        int: all_node_args_except_first
+    },
+    NodeInfo("call_method", "reshape") : {
+        int: all_node_args_except_first
+    },
+    NodeInfo("call_method", "size") : {
+        int: return_arg_list([1])
+    },
+    NodeInfo("call_method", "transpose") : {
+        int: all_node_args_except_first
+    },
+    NodeInfo("call_method", torch.transpose) : {
+        int: all_node_args_except_first
+    },
+    NodeInfo("call_method", "unsqueeze") : {
+        int: return_arg_list([1])
+    },
+    NodeInfo("call_method", "unsqueeze_") : {
+        int: return_arg_list([1])
+    },
+    NodeInfo("call_method", torch.unsqueeze) : {
+        int: return_arg_list([1])
+    },
+    NodeInfo("call_method", "view") : {
+        int: all_node_args_except_first
+    },
+}
+EMPTY_ARG_DICT: Dict[Union[type, torch.dtype], Callable[[Node], List[int]]] = {}
+def get_non_observable_arg_indexes_and_types(node: Node) -> Dict[Union[type, torch.dtype], Callable[[Node], List[int]]]:
+    """
+    Returns a dict with of non float tensor types as keys and values which correspond to a
+    function to retrieve the list (which takes the node as an argument)
+    """
+    info = NodeInfo(node.op, node.target)
+    return NON_OBSERVABLE_ARG_DICT.get(info, EMPTY_ARG_DICT)
+def maybe_get_next_module(
+    node: Node,
+    modules: Dict[str, nn.Module],
+    target_module_type: Optional[Type[nn.Module]] = None,
+    target_functional_type: Any = None,
+) -> Optional[Node]:
+    """ Gets the next module that matches what is needed in
+    is_target_module_type if it exists
+    Args:
+        node: The node whose users we want to look at
+        target_module_type: Module type that we want to check
+        target_functional_type: Functional type that we want to check
+    """
+    for user in node.users.keys():
+        if user.op == 'call_module' and target_module_type is not None and \
+           isinstance(modules[str(user.target)], target_module_type):
+            return user
+        elif (user.op == 'call_function' and target_functional_type is not None and
+              user.target == target_functional_type):
+            return user
+    return None
+def create_node_from_old_node_preserve_meta(
+    quantized_graph: Graph,
+    create_node_args: Tuple[Any, ...],
+    old_node: Node,
+) -> Node:
+    """
+    Creates `new_node` and copies the necessary metadata to it from `old_node`.
+    """
+    new_node = quantized_graph.create_node(*create_node_args)
+    new_node.stack_trace = old_node.stack_trace
+    return new_node
+def get_skipped_module_name_and_classes(
+        prepare_custom_config: PrepareCustomConfig,
+        is_standalone_module: bool) -> Tuple[List[str], List[Type[Any]]]:
+    skipped_module_names = copy.copy(prepare_custom_config.non_traceable_module_names)
+    skipped_module_classes = copy.copy(prepare_custom_config.non_traceable_module_classes)
+    if not is_standalone_module:
+        # standalone module and custom module config are applied in top level module
+        skipped_module_names += list(prepare_custom_config.standalone_module_names.keys())
+        skipped_module_classes += list(prepare_custom_config.standalone_module_classes.keys())
+        skipped_module_classes += get_custom_module_class_keys(prepare_custom_config.float_to_observed_mapping)
+    return skipped_module_names, skipped_module_classes
+def _is_custom_module_lstm(
+        node: Node,
+        named_modules: Dict[str, torch.nn.Module],
+        qconfig: QConfigAny = None,
+        # QuantizeHandler, but we cannot include the type here due to circular imports
+        qhandler: Optional[Any] = None,
+) -> bool:
+    """
+    Return whether this refers to the custom module LSTM flow.
+    """
+    mod = _get_module(node, named_modules)
+    if qconfig is not None and qhandler is not None:
+        assert isinstance(qhandler, torch.ao.quantization.fx.quantize_handler.QuantizeHandler)  # type: ignore[attr-defined]
+        return isinstance(mod, torch.nn.LSTM) and \
+            activation_is_statically_quantized(qconfig) and \
+            qhandler.is_custom_module()
+    else:
+        return isinstance(mod, torch.ao.nn.quantizable.LSTM)
+def _is_custom_module_mha(
+        node: Node,
+        named_modules: Dict[str, torch.nn.Module],
+        qconfig: QConfigAny = None,
+        # QuantizeHandler, but we cannot include the type here due to circular imports
+        qhandler: Optional[Any] = None,
+) -> bool:
+    """
+    Return whether this refers to the custom module MultiheadAttention flow.
+    """
+    mod = _get_module(node, named_modules)
+    if qconfig is not None and qhandler is not None:
+        assert isinstance(qhandler, torch.ao.quantization.fx.quantize_handler.QuantizeHandler)  # type: ignore[attr-defined]
+        return isinstance(mod, torch.nn.MultiheadAttention) and \
+            activation_is_statically_quantized(qconfig) and \
+            qhandler.is_custom_module()
+    else:
+        return isinstance(mod, torch.ao.nn.quantizable.MultiheadAttention)
+def _get_module(node: Node, named_modules: Dict[str, torch.nn.Module]) -> Optional[torch.nn.Module]:
+    """
+    If `node` refers to a call_module node, return the module, else None.
+    """
+    if node.op == "call_module" and str(node.target) in named_modules:
+        return named_modules[str(node.target)]
+    else:
+        return None
+def _insert_dequant_stub(
+    node: Node,
+    model: torch.nn.Module,
+    named_modules: Dict[str, torch.nn.Module],
+    graph: Graph,
+) -> Node:
+    """
+    Attach a `DeQuantStub` to the model and create a node that calls this
+    `DeQuantStub` on the output of `node`, similar to how observers are inserted.
+    """
+    prefix = "dequant_stub_"
+    get_new_dequant_stub_name = get_new_attr_name_with_prefix(prefix)
+    dequant_stub_name = get_new_dequant_stub_name(model)
+    dequant_stub = DeQuantStub()
+    setattr(model, dequant_stub_name, dequant_stub)
+    named_modules[dequant_stub_name] = dequant_stub
+    with graph.inserting_after(node):
+        return graph.call_module(dequant_stub_name, (node,))
+def _insert_dequant_stubs_for_custom_module_lstm_output(
+    node: Node,
+    model: torch.nn.Module,
+    named_modules: Dict[str, torch.nn.Module],
+    graph: Graph,
+) -> Node:
+    """
+    Insert DeQuantStubs after each internal output node of custom module LSTM.
+    Custom module LSTM outputs are nested tuples of the structure (output, (hidden0, hidden1)),
+    Since we cannot dequantize a tuple as a whole, we must first break down the tuple into its
+    components through `getitem`. This function transforms the graph as follows:
+      (1) Split the LSTM node into (output, (hidden0, hidden1))
+      (2) Insert a DeQuantStub after each internal node
+      (3) Recombine the DeQuantStubs into the same structure as before
+      (4) Reroute all consumers of the original LSTM node and its sub-nodes
+          (e.g. lstm[0])
+    Before:
+                   lstm_output
+                        |
+                        v
+                  original_user(s)
+    After:
+                   lstm_output
+                  /           \\
+                 /  (getitem)  \\
+                /               \\
+               v                 v
+             output            hidden
+               |               /   \\
+         (DeQuantStub)        (getitem)
+               |             /       \\
+               v            v         v
+           output_dq     hidden0    hidden1
+               |            |         |
+               |    (DeQuantStub) (DeQuantStub)
+               |            |         |
+               |            v         v
+               |      hidden0_dq  hidden1_dq
+               |            \\       /
+               |              (tuple)
+               |              \\   /
+               |               v  v
+               |             hidden_dq
+               \\               /
+                \\   (tuple)   /
+                 v            v
+                 lstm_output_dq
+                       |
+                       v
+                original_user(s)
+    For step (4), reroute all users of the original LSTM node(s) as follows:
+      lstm_output -> lstm_output_dq
+      lstm_output[0] -> output_dq
+      lstm_output[1] -> hidden_dq
+      lstm_output[1][0] -> hidden0_dq
+      lstm_output[1][1] -> hidden1_dq
+    Return the node `lstm_output_dq`.
+    """
+    # (1) Split the LSTM node into (output, (hidden0, hidden1))
+    # (2) Insert a DeQuantStub after each internal node
+    with graph.inserting_after(node):
+        output = graph.call_function(operator.getitem, (node, 0))
+        output_dq = _insert_dequant_stub(output, model, named_modules, graph)
+    with graph.inserting_after(output_dq):
+        hidden = graph.call_function(operator.getitem, (node, 1))
+    with graph.inserting_after(hidden):
+        hidden0 = graph.call_function(operator.getitem, (hidden, 0))
+        hidden0_dq = _insert_dequant_stub(hidden0, model, named_modules, graph)
+    with graph.inserting_after(hidden0_dq):
+        hidden1 = graph.call_function(operator.getitem, (hidden, 1))
+        hidden1_dq = _insert_dequant_stub(hidden1, model, named_modules, graph)
+    # (3) Recombine the DeQuantStubs into the same structure as before
+    with graph.inserting_after(hidden1_dq):
+        hidden_dq = graph.call_function(tuple, ([hidden0_dq, hidden1_dq],))
+    with graph.inserting_after(hidden_dq):
+        lstm_output_dq = graph.call_function(tuple, ([output_dq, hidden_dq],))
+    # (4) Reroute all consumers of the original LSTM node and its sub-nodes
+    for user in list(node.users.keys()):
+        if user != output and user != hidden:
+            user.replace_input_with(node, lstm_output_dq)
+    # The getitem and tuple nodes we added here may interfere with reference quantized
+    # pattern matching, so we need to redirect the consumers of internal nodes to the
+    # corresponding nodes with DeQuantStubs (e.g. lstm_output_dq[0] -> output_dq) attached,
+    # in order to preserve reference patterns like "dequantize - consumer - quantize".
+    _reroute_tuple_getitem_pattern(graph)
+    return lstm_output_dq
+def _maybe_get_custom_module_lstm_from_node_arg(
+    arg: Node,
+    named_modules: Dict[str, torch.nn.Module],
+) -> Optional[Node]:
+    """
+    Given an argument of a node, if the argument refers to the path through which the node
+    is a consumer of custom module LSTM, return the custom module LSTM node, or None otherwise.
+    This is used to determine whether a node is a consumer of custom module LSTM, and, if so,
+    skip inserting input observers for this node. This is because custom module LSTM produces
+    quantized outputs, so inserting an input observer for the consumer of custom module LSTM
+    would unnecessarily quantize the outputs again.
+      lstm -> consumer
+    In practice, however, custom module LSTM outputs a tuple (output, (hidden0, hidden1)) with
+    DeQuantStubs attached to each internal node (see `_insert_dequant_stubs_for_custom_module_lstm_output`).
+    This tuple can be consumed in one of four ways:
+      lstm -> getitem -> DeQuantStub -> consumer                       # consume lstm[0]
+      lstm -> getitem -> getitem -> DeQuantStub -> tuple -> consumer   # consume lstm[1]
+      lstm -> getitem -> getitem -> DeQuantStub -> consumer            # consume lstm[1][0] or lstm[1][1]
+      lstm -> getitem -> DeQuantStub -> tuple -> consumer              # consume lstm
+    Thus, we must match against the above patterns instead of simply checking the parent node
+    to determine whether this node is a consumer of a custom module LSTM.
+    """
+    def match_dq(a):
+        return isinstance(_get_module(a, named_modules), DeQuantStub)
+    def match_lstm(a):
+        return _is_custom_module_lstm(a, named_modules)
+    def match_getitem(a):
+        return a.op == "call_function" and a.target == operator.getitem
+    def match_tuple(a):
+        return a.op == "call_function" and a.target == tuple
+    def _match_pattern(match_pattern: List[Callable]) -> Optional[Node]:
+        """
+        Traverse up the graph and match the args one by one.
+        If there is a match, return the last matched node, or None otherwise.
+        """
+        a = arg
+        for i, match in enumerate(match_pattern):
+            if not match(a):
+                return None
+            # Match next arg, for tuple the arg is a tuple of a list, e.g. ([dq_1, other_node],)
+            if i < len(match_pattern) - 1:
+                if match == match_tuple:
+                    a = a.args[0][0]  # type: ignore[assignment,index]
+                else:
+                    a = a.args[0]  # type: ignore[assignment]
+        return a
+    all_match_patterns = [
+        [match_dq, match_getitem, match_lstm],
+        [match_tuple, match_dq, match_getitem, match_getitem, match_lstm],
+        [match_dq, match_getitem, match_getitem, match_lstm],
+        [match_tuple, match_dq, match_getitem, match_lstm],
+    ]
+    for p in all_match_patterns:
+        matched_node = _match_pattern(p)
+        if matched_node is not None:
+            return matched_node
+    return None
+def _reroute_tuple_getitem_pattern(graph: Graph):
+    """
+    Search for patterns where N consecutive `tuple` call_function nodes are followed by
+    N consecutive `getitem` call_function nodes that are "reverses" of the `tuple` nodes.
+    If we find this pattern, reroute the consumers of the last `getitem` to skip these
+    N `tuple` and `getitem` nodes.
+    Before:
+        a   b     c
+        |   \\   /
+        \\   tuple
+         \\   /
+          tuple
+            |
+        getitem(1)
+            |
+        getitem(0)
+            |
+            d
+    After:
+        b
+        |
+        d
+    """
+    def find_patterns(
+            node: Node,
+            index_stack: List[int],
+            current_pattern: List[Node],
+            matched_patterns: List[List[Node]],
+            seen: Set[Tuple[Node, Tuple[int, ...]]]):
+        """
+        Traverse the graph recursively to match for the N-tuple - N-getitem patterns,
+        starting at the given node.
+        We use a stack to keep track of the expected `getitem` indices, since these are
+        reversed from the `tuple` indices. In the above example, the stack after
+        (b -> tuple -> tuple) will be [0, 1], which will be popped by getitem(1) first
+        and then by getitem(0).
+        TODO: traverse upwards from the output and handle the case when tuple is not a
+        separate node, e.g. graph.call_function(operator.getitem, args=(a, (b, c)))
+        """
+        if len(index_stack) == 0 and len(current_pattern) > 0:
+            matched_patterns.append(copy.copy(current_pattern))
+            current_pattern.clear()
+        # Avoid duplicating work
+        state = (node, tuple(index_stack))
+        if state in seen:
+            return
+        seen.add(state)
+        # Iterate through users of this node to find tuple/getitem nodes to match
+        for user in node.users:
+            if user.op == "call_function" and user.target == tuple:
+                for i, user_arg in enumerate(user.args[0]):  # type: ignore[arg-type]
+                    if user_arg == node:
+                        index_stack.append(i)
+                        current_pattern.append(user)
+                        find_patterns(user, index_stack, current_pattern, matched_patterns, seen)
+            elif user.op == "call_function" and user.target == operator.getitem:
+                if len(index_stack) > 0:
+                    if user.args[1] == index_stack[-1]:
+                        index_stack.pop()
+                        current_pattern.append(user)
+                        find_patterns(user, index_stack, current_pattern, matched_patterns, seen)
+        return matched_patterns
+    # Collect all matched patterns
+    matched_patterns: List[List[Node]] = []
+    seen: Set[Tuple[Node, Tuple[int, ...]]] = set()  # (node, index_stack)
+    for node in graph.nodes:
+        find_patterns(node, [], [], matched_patterns, seen)
+    # For each pattern, redirect all consumers of the last getitem node to the correct input
+    # of the first tuple node
+    for pattern in matched_patterns:
+        first_tuple = pattern[0]
+        last_getitem = pattern[-1]
+        assert first_tuple.op == "call_function" and first_tuple.target == tuple
+        assert last_getitem.op == "call_function" and last_getitem.target == operator.getitem
+        last_getitem_index = last_getitem.args[1]
+        new_input = first_tuple.args[0][last_getitem_index]  # type: ignore[index]
+        for user in list(last_getitem.users.keys()):
+            user.replace_input_with(last_getitem, new_input)
+def _get_observer_from_activation_post_process(
+    activation_post_process: Union[ObserverBase, FakeQuantizeBase],
+) -> ObserverBase:
+    """
+    If `activation_post_process` is an observer, return the observer.
+    If `activation_post_process` is a fake quantize, return the internal observer.
+    """
+    if isinstance(activation_post_process, ObserverBase):
+        return activation_post_process
+    else:
+        assert isinstance(activation_post_process, FakeQuantizeBase)
+        return activation_post_process.activation_post_process  # type: ignore[return-value]
+def _qconfig_satisfies_dtype_config_constraints(
+        qconfig: QConfigAny,
+        dtype_with_constraints: DTypeWithConstraints,
+        is_activation: bool = True) -> bool:
+    """
+    Return whether `qconfig` satisfies the following constraints from the backend,
+    specified through the activation and weight DTypeWithConstraints.
+        1. QConfig specified a quantization range that falls within the backend's, if any
+        2. QConfig specified a min scale value that is >= the backend's, if any
+        3. QConfig specified a FixedQParamsObserver or FixedQParamsFakeQuantize that has
+           scale and zero point that match the backend's, if any
+    If `is_activation` is True, we check `qconfig.activation`, else we check `qconfig.weight`.
+    If `qconfig` or `dtype_with_constraints.dtype` is None, or the dtypes do not match, return True.
+    """
+    # TODO: log warnings only when the user enabled a debug flag
+    def _activation_post_process_satisfies_dtype_config_constraints(
+            activation_post_process: Union[ObserverBase, FakeQuantizeBase],
+            dtype_with_constraints: DTypeWithConstraints,
+            debug_string: str) -> bool:
+        observer = _get_observer_from_activation_post_process(activation_post_process)
+        app_quant_min = getattr(observer, "quant_min", None)
+        app_quant_max = getattr(observer, "quant_max", None)
+        # TODO: for now, just use the existing eps value as scale_min. In the future, we should
+        # resolve the differences between the two, either by renaming eps or some other way
+        app_scale_min = getattr(observer, "eps", None)
+        backend_quant_min = dtype_with_constraints.quant_min_lower_bound
+        backend_quant_max = dtype_with_constraints.quant_max_upper_bound
+        backend_scale_min = dtype_with_constraints.scale_min_lower_bound
+        backend_scale_exact_match = dtype_with_constraints.scale_exact_match
+        backend_zero_point_exact_match = dtype_with_constraints.zero_point_exact_match
+        # check quantization ranges
+        if backend_quant_min is not None and backend_quant_max is not None:
+            if app_quant_min is None or app_quant_max is None:
+                warnings.warn(f"QConfig {debug_string} must specify 'quant_min' and 'quant_max', ignoring {qconfig}")
+                return False
+            elif app_quant_min < backend_quant_min or app_quant_max > backend_quant_max:
+                warnings.warn(
+                    f"QConfig {debug_string} quantization range must fall within the backend's:\n"
+                    f"QConfig range = ({app_quant_min}, {app_quant_max}), "
+                    f"BackendConfig range = ({backend_quant_min}, {backend_quant_max}), "
+                    f"ignoring {qconfig}"
+                )
+                return False
+        # check scale min
+        if backend_scale_min is not None:
+            if app_scale_min is None:
+                warnings.warn(f"QConfig {debug_string} must specify 'eps', ignoring {qconfig}")
+                return False
+            if app_scale_min < backend_scale_min:
+                warnings.warn(
+                    f"QConfig {debug_string} eps ({app_scale_min}) must be greater than or equal to "
+                    f"the backend's min scale value ({backend_scale_min}), ignoring {qconfig}"
+                )
+                return False
+        # check fixed scale and zero point
+        if backend_scale_exact_match is not None and backend_zero_point_exact_match is not None:
+            # For tests only, accept the following qconfigs for now
+            # TODO: handle fp16 qconfigs properly
+            for accepted_qconfig in [float16_static_qconfig, float16_dynamic_qconfig]:
+                if qconfig_equals(qconfig, accepted_qconfig):
+                    return True
+            suggestion_str = (
+                "Please use torch.ao.quantization.get_default_qconfig_mapping or "
+                "torch.ao.quantization.get_default_qat_qconfig_mapping. Example:\n"
+                "    qconfig_mapping = get_default_qconfig_mapping(\"fbgemm\")\n"
+                "    model = prepare_fx(model, qconfig_mapping, example_inputs)"
+            )
+            if not isinstance(activation_post_process, FixedQParamsObserver) and \
+                    not isinstance(activation_post_process, FixedQParamsFakeQuantize):
+                warnings.warn(
+                    f"QConfig must specify a FixedQParamsObserver or a FixedQParamsFakeQuantize "
+                    f"for fixed qparams ops, ignoring {qconfig}.\n{suggestion_str}"
+                )
+                return False
+            if observer.scale != backend_scale_exact_match or observer.zero_point != backend_zero_point_exact_match:
+                warnings.warn(
+                    f"QConfig fixed scale ({observer.scale}) and zero point ({observer.zero_point}) "
+                    f"do not match the backend's ({backend_scale_exact_match} and {backend_zero_point_exact_match}), "
+                    f"ignoring {qconfig}.\n{suggestion_str}"
+                )
+                return False
+        return True
+    if qconfig is None or dtype_with_constraints.dtype is None:
+        return True
+    activation_post_process_ctr = qconfig.activation if is_activation else qconfig.weight
+    debug_string = "activation" if is_activation else "weight"
+    satisfies_constraints = True
+    if activation_post_process_ctr is not None:
+        activation_post_process = activation_post_process_ctr()
+        assert _is_activation_post_process(activation_post_process)
+        # If dtypes don't match, don't check the activation_post_process and return True early
+        if activation_post_process.dtype != dtype_with_constraints.dtype:
+            return True
+        satisfies_constraints = _activation_post_process_satisfies_dtype_config_constraints(
+            activation_post_process, dtype_with_constraints, debug_string)
+    return satisfies_constraints

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/__pycache__/export_utils.cpython-311.pyc ADDED Viewed

Binary file (9.73 kB). View file