koichi12 commited on Feb 12, 2025

Commit

0c36bb3

verified ·

1 Parent(s): 8caf96a

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_custom_op/__init__.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_custom_op/__pycache__/autograd.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_custom_op/__pycache__/functional.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_custom_op/__pycache__/impl.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_custom_op/functional.py +187 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/_conversions.py +118 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/linalg/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/nn/__init__.py +3 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/nn/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/nn/functional/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/special/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__init__.py +9 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/functional_modules.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/normalization.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/batchnorm.py +106 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/utils.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/linear.py +57 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/dynamic/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/ns/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/ns/__pycache__/_numeric_suite.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/ns/fx/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__init__.py +189 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/quantization_mappings.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/quantize_fx.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/_correct_bias.py +144 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__pycache__/_common_operator_config_utils.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__pycache__/backend_config.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__pycache__/observation_type.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__pycache__/onednn.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/observation_type.py +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/qnnpack.py +160 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/tensorrt.py +81 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/utils.py +279 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/x86.py +113 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fake_quantize.py +546 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/model_report.py +606 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/prepare.py +1880 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/__pycache__/duplicate_dq_pass.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/__pycache__/prepare.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/__pycache__/qat_utils.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/__pycache__/utils.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/duplicate_dq_pass.py +83 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/export_utils.py +211 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/prepare.py +489 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/representation/__init__.py +5 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/qconfig_mapping.py +350 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/quantizer/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/quantizer/__pycache__/xnnpack_quantizer.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/export/__pycache__/__init__.cpython-311.pyc +0 -0
tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/export/__pycache__/_remove_auto_functionalized_pass.cpython-311.pyc +0 -0

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_custom_op/__init__.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_custom_op/__pycache__/autograd.cpython-311.pyc ADDED Viewed

Binary file (16 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_custom_op/__pycache__/functional.cpython-311.pyc ADDED Viewed

Binary file (10.4 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_custom_op/__pycache__/impl.cpython-311.pyc ADDED Viewed

Binary file (50 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_custom_op/functional.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import weakref
+import torch
+import torch.utils._pytree as pytree
+from torch._C import _ExcludeDispatchKeyGuard, DispatchKey, DispatchKeySet
+from torch._ops import OpOverload
+from torch.library import Library
+from torchgen.model import (
+    BaseTy,
+    BaseType,
+    FunctionSchema,
+    OperatorName,
+    OptionalType,
+    SchemaKind,
+)
+from .autograd import autograd_not_implemented
+def register_functional_op(
+    lib: Library,
+    new_op_name: str,
+    mutable_op: OpOverload,
+) -> None:
+    """Given a mutable operator, registers the functional variant.
+    This API also correctly links the functional variant with the mutable
+    operator for the purposes of functionalization.
+    All of the new registrations are performed on the ``lib`` passed in.
+    Arguments:
+        lib (Library): Should be a torch.library.Library object that has
+            the same namespace as ``mutable_op``'s namespace.
+            lib will be used to register the new functional op as well
+            as a functionalization kernel for the ``mutable_op``
+            If you don't have a library handy, use
+            ``torch.library.Library(ns, 'FRAGMENT')`` to construct one.
+        new_op_name (str): The name of the functional operator (without the
+            namespace). If no namespace, the new functional variant will be
+            accessible under ``torch.ops.{lib.ns}.new_op_name``.
+        mutable_op (OpOverload): The mutable custom operator. Note
+            that you may need to add a `.default` to it, like
+            `torch.ops.aten.abs_.default`.
+    """
+    validate(mutable_op)
+    schema = functional_schema(new_op_name, mutable_op)
+    lib.define(schema)
+    functional_impl = construct_functional_impl(mutable_op)
+    lib.impl(new_op_name, functional_impl, 'CompositeExplicitAutograd')
+    functional_op = getattr(getattr(torch.ops, lib.ns), new_op_name).default
+    # There's no easy way for us to generate the autograd kernel, so we
+    # use autograd_not_implemented. Also, this makes it so that the user
+    # is unable to register an autograd formula themselves. This shouldn't
+    # be a problem if the user doesn't use the functional op direclty
+    # in their program, but we may need to revist this in the future.
+    lib.impl(new_op_name, autograd_not_implemented(functional_op), 'Autograd')
+    f_kernel = construct_functionalization_kernel(weakref.proxy(mutable_op), functional_op)
+    lib.impl(mutable_op, f_kernel, 'Functionalize')
+def construct_functional_impl(mutable_op):
+    def functional_impl(*args):
+        # Strategy:
+        # - clone args that would have been mutated
+        # - run mutable_op
+        # - return the cloned args as additional outputs
+        new_args = []
+        extra_rets = []
+        for is_write, arg in zip(mutable_args(mutable_op), args):
+            if is_write:
+                cloned = arg.clone() if arg is not None else None
+                new_args.append(cloned)
+                extra_rets.append(cloned)
+            else:
+                new_args.append(arg)
+        result = mutable_op(*new_args)
+        if result is None:
+            return tuple(extra_rets)
+        if isinstance(result, tuple):
+            return (*result, *extra_rets)
+        return (result, *extra_rets)
+    return functional_impl
+def construct_functionalization_kernel(mutable_op, functional_op):
+    def kernel(*args):
+        # There's nothing to be functionalized!
+        # We can still end up here because DispatchKey::Functionalize is a mode key
+        if pytree.tree_all_only(torch.Tensor, lambda x: not torch._is_functional_tensor(x), args):
+            with _ExcludeDispatchKeyGuard(DispatchKeySet(DispatchKey.Functionalize)):
+                return mutable_op(*args)
+        # NB: This differs from the codegen -- codegen handles cases where there
+        # are mixed FunctionalTensorWrapper and non-FunctionalTensorWrapper.
+        # This only really matters for XLA (mixed CPU-XLA tensors) and
+        # running functionalization without the PT2 stack (which guarantees to us that
+        # all tensors are FunctionalTensorWrapper).
+        if not pytree.tree_all_only(torch.Tensor, torch._is_functional_tensor, args):
+            raise RuntimeError("{mutable_op}: expected all args to be FunctionalTensorWrapper")
+        unwrapped_args = []
+        for arg in args:
+            if isinstance(arg, torch.Tensor) and torch._is_functional_tensor(arg):
+                torch._sync(arg)
+                unwrapped = torch._from_functional_tensor(arg)
+                unwrapped_args.append(unwrapped)
+            else:
+                unwrapped_args.append(arg)
+        with _ExcludeDispatchKeyGuard(DispatchKeySet(DispatchKey.Functionalize)):
+            output = functional_op(*unwrapped_args)
+        num_actual_output = len(mutable_op._schema.returns)
+        actual_output = pytree.tree_map(
+            torch._to_functional_tensor, output[:num_actual_output])
+        new_values_to_propagate = output[num_actual_output:]
+        inputs_to_replace = [arg for is_write, arg in zip(mutable_args(mutable_op), args)
+                             if is_write]
+        assert len(new_values_to_propagate) == len(inputs_to_replace)
+        for new_value, arg in zip(new_values_to_propagate, inputs_to_replace):
+            if (arg is None and new_value is None) or (arg is not None and new_value is not None):
+                continue
+            torch._C._propagate_xla_data(arg, new_value)
+            torch._C._replace_(arg, new_value)
+            torch._C._commit_update(arg)
+            torch._sync(arg)
+        if len(actual_output) == 1:
+            return actual_output[0]
+        elif len(actual_output) == 0:
+            return None
+        return actual_output
+    return kernel
+def validate(mutable_op: OpOverload):
+    if not isinstance(mutable_op, OpOverload):
+        raise TypeError(
+            f"register_functional_op(mutable_op): expected mutable_op to be instance of "
+            f"OpOverload but got {type(mutable_op)}")
+    # There are generally three types of "in-place" or "mutable" ops.
+    # Each of them have their own conventions:
+    # - inplace (first input modified in-place and returned as only output)
+    # - out= (some args modified in-place and returned as outputs)
+    # - mutable (some args modified in-place but none of those returned as outputs)
+    # In theory we can support all three, but we'll just support the last
+    # option right now for simplicity.
+    schema = FunctionSchema.parse(str(mutable_op._schema))
+    if not schema.kind() == SchemaKind.mutable:
+        raise RuntimeError("Expected op to be mutable (as opposed to functional, inplace or out)")
+    for ret in schema.returns:
+        # construct_functionalization_kernel assumes this for simplicity
+        if ret.annotation is not None:
+            raise NotImplementedError(
+                "NYI: register_functional_op(op) where op returns a mutated or aliased value. "
+                "Please file an issue (and as a workaround, modify your operator to "
+                "not return the mutated value or aliases)")
+    for arg in schema.arguments.flat_all:
+        # construct_functionalization_kernel assumes this for simplicity
+        if arg.type.is_tensor_like() and (
+            arg.type != BaseType(BaseTy.Tensor)
+            and arg.type != OptionalType(BaseType(BaseTy.Tensor))
+        ):
+            raise NotImplementedError(
+                "NYI: register_functional_op(op) where op has a List[Tensor] input."
+                "Please file an issue.")
+def functional_schema(new_op_name, op: OpOverload):
+    schema = FunctionSchema.parse(str(op._schema))
+    schema = schema.signature().with_name(OperatorName.parse(new_op_name))
+    return str(schema)
+def mutable_args(op: OpOverload):
+    return tuple(False if arg.alias_info is None else arg.alias_info.is_write
+                 for arg in op._schema.arguments)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/_conversions.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import torch
+import torch._prims_common as utils
+# Utilities should come BEFORE this import
+from torch._decomp import register_decomposition
+from torch._prims_common import TensorLikeType
+from torch._prims_common.wrappers import out_wrapper
+from torch._refs import _broadcast_shapes
+# Data conversion references.
+#
+# Note: this module breaks the usual _refs to torch naming scheme where
+# _refs.foo.bar is a ref for torch.foo.bar.  The following definitions are not
+# part of _refs/__init__.py to avoid name clashes with Python builtin types
+# (like int).
+__all__ = [
+    # dtypes
+    "bfloat16",
+    "bool",
+    "byte",
+    "cdouble",
+    "cfloat",
+    "chalf",
+    "char",
+    "double",
+    "float",
+    "half",
+    "int",
+    "long",
+    "short",
+    # misc
+    "complex",
+    "polar",
+]
+def _make_conversion_method(name: str, dtype: torch.dtype):
+    def fn(
+        self: TensorLikeType, memory_format: torch.memory_format = torch.preserve_format
+    ) -> TensorLikeType:
+        return self.to(dtype, memory_format=memory_format)  # type: ignore[call-overload]
+    fn.__name__ = name
+    return fn
+bfloat16 = _make_conversion_method("bfloat16", torch.bfloat16)
+bool = _make_conversion_method("bool", torch.bool)
+byte = _make_conversion_method("byte", torch.uint8)
+cdouble = _make_conversion_method("cdouble", torch.cdouble)
+cfloat = _make_conversion_method("cfloat", torch.cfloat)
+chalf = _make_conversion_method("chalf", torch.complex32)
+char = _make_conversion_method("char", torch.int8)
+double = _make_conversion_method("double", torch.double)
+float = _make_conversion_method("float", torch.float)
+half = _make_conversion_method("half", torch.half)
+int = _make_conversion_method("int", torch.int)
+long = _make_conversion_method("long", torch.long)
+short = _make_conversion_method("short", torch.short)
+@register_decomposition(torch._ops.ops.aten.complex)
+# Note: complex has type promotion tests disabled due to different semantics.
+# exact_dtype is for compat with complex_check_dtype from core.
+@out_wrapper(exact_dtype=True)
+def complex(real: TensorLikeType, imag: TensorLikeType) -> TensorLikeType:
+    allowed_dtypes = (torch.float32, torch.float64, torch.float16)
+    torch._check(
+        real.dtype in allowed_dtypes and imag.dtype in allowed_dtypes,
+        lambda: (
+            f"Expected both inputs to be Half, Float or Double tensors but got "
+            f"{real.dtype} and {imag.dtype}"
+        ),
+    )
+    torch._check(
+        real.dtype == imag.dtype,
+        lambda: (
+            f"Expected object of scalar type {real.dtype} but got "
+            f"scalar type {imag.dtype} for second argument"
+        ),
+    )
+    result_dtype = utils.corresponding_complex_dtype(real.dtype)  # type: ignore[arg-type]
+    common_shape = _broadcast_shapes(real.shape, imag.shape)
+    result = real.new_empty(
+        common_shape,
+        dtype=result_dtype,
+        layout=real.layout,
+        device=real.device,
+        # pin_memory=real.is_pinned(),  # NYI
+    )
+    result.real = real
+    result.imag = imag
+    return result
+@register_decomposition(torch._ops.ops.aten.polar)
+# Note: polar has type promotion tests disabled due to different semantics.
+# exact_dtype is for compat with complex_check_dtype from core.
+@out_wrapper(exact_dtype=True)
+def polar(abs: TensorLikeType, angle: TensorLikeType) -> TensorLikeType:
+    result = torch.complex(abs, angle)
+    result.real = abs * torch.cos(angle)
+    result.imag = abs * torch.sin(angle)
+    return result

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/linalg/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (16.9 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/nn/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from typing import List
2	+
3	+ __all__: List[str] = []

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/nn/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (351 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/nn/functional/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (49 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/_refs/special/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (10.6 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantizable/modules/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from .activation import MultiheadAttention
+from .rnn import LSTM
+from .rnn import LSTMCell
+__all__ = [
+    'LSTM',
+    'LSTMCell',
+    'MultiheadAttention',
+]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/functional_modules.cpython-311.pyc ADDED Viewed

Binary file (13.5 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/__pycache__/normalization.cpython-311.pyc ADDED Viewed

Binary file (11.9 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/modules/batchnorm.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import torch
+import torch.ao.nn.intrinsic as nni
+__all__ = [
+    "BatchNorm2d",
+    "BatchNorm3d"
+]
+class _BatchNorm(torch.nn.modules.batchnorm._BatchNorm):
+    def __init__(self, num_features, eps=1e-5, momentum=0.1, device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(num_features, eps, momentum, True, True, **factory_kwargs)
+        self.register_buffer('scale', torch.tensor(1.0, **factory_kwargs))
+        self.register_buffer('zero_point', torch.tensor(0, **factory_kwargs))
+    @staticmethod
+    def from_float(cls, mod):
+        activation_post_process = mod.activation_post_process
+        if type(mod) == cls._NNI_BN_RELU_MODULE:
+            mod = mod[0]
+        scale, zero_point = activation_post_process.calculate_qparams()
+        new_mod = cls(mod.num_features, mod.eps)
+        new_mod.weight = mod.weight
+        new_mod.bias = mod.bias
+        new_mod.running_mean = mod.running_mean
+        new_mod.running_var = mod.running_var
+        new_mod.scale = scale
+        new_mod.zero_point = zero_point
+        return new_mod
+    @classmethod
+    def from_reference(cls, bn, output_scale, output_zero_point):
+        qbn = cls(
+            bn.num_features,
+            bn.eps,
+            bn.momentum,
+            device=bn.weight.device,
+            dtype=bn.weight.dtype
+        )
+        qbn.weight = bn.weight
+        qbn.bias = bn.bias
+        qbn.running_mean = bn.running_mean
+        qbn.running_var = bn.running_var
+        qbn.scale = output_scale
+        qbn.zero_point = output_zero_point
+        return qbn
+class BatchNorm2d(_BatchNorm):
+    r"""This is the quantized version of :class:`~torch.nn.BatchNorm2d`.
+    """
+    _NNI_BN_RELU_MODULE = nni.BNReLU2d
+    def __init__(self, num_features, eps=1e-5, momentum=0.1, device=None, dtype=None) -> None:
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(num_features, eps, momentum, **factory_kwargs)
+    def _get_name(self):
+        return 'QuantizedBatchNorm2d'
+    def _check_input_dim(self, input):
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 4:
+            raise ValueError("Input shape must be `(N, C, H, W)`!")
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        # disabling this since this is not symbolically traceable
+        # self._check_input_dim(input)
+        return torch.ops.quantized.batch_norm2d(
+            input, self.weight, self.bias, self.running_mean,
+            self.running_var, self.eps, self.scale, self.zero_point)
+    @classmethod
+    def from_float(cls, mod):
+        return _BatchNorm.from_float(cls, mod)
+class BatchNorm3d(_BatchNorm):
+    r"""This is the quantized version of :class:`~torch.nn.BatchNorm3d`.
+    """
+    _NNI_BN_RELU_MODULE = nni.BNReLU3d
+    def __init__(self, num_features, eps=1e-5, momentum=0.1, device=None, dtype=None):
+        factory_kwargs = {'device': device, 'dtype': dtype}
+        super().__init__(num_features, eps, momentum, **factory_kwargs)
+    def _get_name(self):
+        return 'QuantizedBatchNorm3d'
+    def _check_input_dim(self, input):
+        # Temporarily using len(shape) instead of ndim due to JIT issue
+        # https://github.com/pytorch/pytorch/issues/23890
+        if len(input.shape) != 5:
+            raise ValueError("Input shape must be `(N, C, H, W)`!")
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        # disabling this since this is not symbolically traceable
+        # self._check_input_dim(input)
+        return torch.ops.quantized.batch_norm3d(
+            input, self.weight, self.bias, self.running_mean,
+            self.running_var, self.eps, self.scale, self.zero_point)
+    @classmethod
+    def from_float(cls, mod):
+        return _BatchNorm.from_float(cls, mod)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (14.5 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/quantized/reference/modules/linear.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import Optional, Dict, Any
+from .utils import ReferenceQuantizedModule
+__all__ = ['Linear']
+class Linear(nn.Linear, ReferenceQuantizedModule):
+    """ A reference quantized linear module that fits into the FX
+    Graph Mode Quantization workflow
+    activation will be floating point Tensor, we will store floating
+    point weight as well in the module, but in forward we'll quantize
+    and dequantize the weight before running the floating point functional
+    linear operator.
+    """
+    _IS_REFERENCE = True
+    def __init__(
+            self,
+            in_features: int,
+            out_features: int,
+            bias_: bool = True,
+            device: Optional[torch.device] = None,
+            dtype: Optional[torch.dtype] = None,
+            weight_qparams: Optional[Dict[str, Any]] = None):
+        super().__init__(in_features, out_features, bias_, device, dtype)
+        self._init_weight_qparams(weight_qparams, device)
+    def _get_name(self):
+        return "QuantizedLinear(Reference)"
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        we have:
+        w(float) -- quant - dequant \
+        x(float) ------------- F.linear ---
+        In the full model, we will see
+        w(float) -- quant - *dequant \
+        x -- quant --- *dequant --  *F.linear --- *quant - dequant
+        and the backend should be able to fuse the ops with `*` into a quantized linear
+        """
+        weight_quant_dequant = self.get_weight()
+        result = F.linear(x, weight_quant_dequant, self.bias)
+        return result
+    @classmethod
+    def from_float(cls, float_linear, weight_qparams):
+        qref_linear = Linear(
+            float_linear.in_features, float_linear.out_features,
+            float_linear.bias is not None, device=float_linear.weight.device,
+            dtype=float_linear.weight.dtype, weight_qparams=weight_qparams)
+        qref_linear.weight = torch.nn.Parameter(float_linear.weight.detach())
+        if float_linear.bias is not None:
+            qref_linear.bias = torch.nn.Parameter(float_linear.bias.detach())
+        return qref_linear

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/nn/sparse/quantized/dynamic/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (318 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/ns/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (212 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/ns/__pycache__/_numeric_suite.cpython-311.pyc ADDED Viewed

Binary file (26.9 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/ns/fx/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (215 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__init__.py ADDED Viewed

	@@ -0,0 +1,189 @@

+# flake8: noqa: F403
+from .fake_quantize import *  # noqa: F403
+from .fuse_modules import fuse_modules  # noqa: F403
+from .fuse_modules import fuse_modules_qat  # noqa: F403
+from .fuser_method_mappings import *  # noqa: F403
+from .observer import *  # noqa: F403
+from .qconfig import *  # noqa: F403
+from .qconfig_mapping import *  # noqa: F403
+from .quant_type import *  # noqa: F403
+from .quantization_mappings import *  # type: ignore[no-redef]
+from .quantize import *  # noqa: F403
+from .quantize_jit import *  # noqa: F403
+from .stubs import *  # noqa: F403
+from .pt2e.export_utils import _move_exported_model_to_eval as move_exported_model_to_eval
+from .pt2e.export_utils import _move_exported_model_to_train as move_exported_model_to_train
+from .pt2e.export_utils import _allow_exported_model_train_eval as allow_exported_model_train_eval
+from .pt2e.generate_numeric_debug_handle import generate_numeric_debug_handle  # noqa: F401
+from typing import Union, List, Callable, Tuple, Optional
+from torch import Tensor
+import torch
+ObserverOrFakeQuantize = Union[ObserverBase, FakeQuantizeBase]
+ObserverOrFakeQuantize.__module__ = "torch.ao.quantization"
+__all__ = [
+    "DeQuantStub",
+    "FakeQuantize",
+    "FakeQuantizeBase",
+    "FixedQParamsFakeQuantize",
+    "FixedQParamsObserver",
+    "FusedMovingAvgObsFakeQuantize",
+    "HistogramObserver",
+    "MatchAllNode",
+    "MinMaxObserver",
+    "MovingAverageMinMaxObserver",
+    "MovingAveragePerChannelMinMaxObserver",
+    "NoopObserver",
+    "ObserverBase",
+    "ObserverOrFakeQuantize",
+    "Pattern",
+    "PerChannelMinMaxObserver",
+    "PlaceholderObserver",
+    "QConfig",
+    "QConfigAny",
+    "QConfigDynamic",
+    "QConfigMapping",
+    "QuantStub",
+    "QuantType",
+    "QuantWrapper",
+    "RecordingObserver",
+    "ReuseInputObserver",
+    "UniformQuantizationObserverBase",
+    "add_quant_dequant",
+    "convert",
+    "convert_dynamic_jit",
+    "convert_jit",
+    "default_affine_fixed_qparams_fake_quant",
+    "default_affine_fixed_qparams_observer",
+    "default_debug_observer",
+    "default_dynamic_fake_quant",
+    "default_dynamic_quant_observer",
+    "default_embedding_fake_quant",
+    "default_embedding_fake_quant_4bit",
+    "default_eval_fn",
+    "default_fake_quant",
+    "default_fixed_qparams_range_0to1_fake_quant",
+    "default_fixed_qparams_range_0to1_observer",
+    "default_fixed_qparams_range_neg1to1_fake_quant",
+    "default_fixed_qparams_range_neg1to1_observer",
+    "default_float_qparams_observer",
+    "default_float_qparams_observer_4bit",
+    "default_fused_act_fake_quant",
+    "default_fused_per_channel_wt_fake_quant",
+    "default_fused_wt_fake_quant",
+    "default_histogram_fake_quant",
+    "default_histogram_observer",
+    "default_observer",
+    "default_per_channel_weight_fake_quant",
+    "default_per_channel_weight_observer",
+    "default_placeholder_observer",
+    "default_reuse_input_observer",
+    "default_symmetric_fixed_qparams_fake_quant",
+    "default_symmetric_fixed_qparams_observer",
+    "default_weight_fake_quant",
+    "default_weight_observer",
+    "disable_fake_quant",
+    "disable_observer",
+    "enable_fake_quant",
+    "enable_observer",
+    "fuse_conv_bn",
+    "fuse_conv_bn_jit",
+    "fuse_conv_bn_relu",
+    "fuse_convtranspose_bn",
+    "fuse_linear_bn",
+    "fuse_modules",
+    "fuse_modules_qat",
+    "fused_per_channel_wt_fake_quant_range_neg_127_to_127",
+    "fused_wt_fake_quant_range_neg_127_to_127",
+    "get_combined_dict",
+    "get_default_compare_output_module_list",
+    "get_default_custom_config_dict",
+    "get_default_dynamic_quant_module_mappings",
+    "get_default_dynamic_sparse_quant_module_mappings",
+    "get_default_float_to_quantized_operator_mappings",
+    "get_default_qat_module_mappings",
+    "get_default_qat_qconfig",
+    "get_default_qat_qconfig_dict",
+    "get_default_qat_qconfig_mapping",
+    "get_default_qconfig",
+    "get_default_qconfig_dict",
+    "get_default_qconfig_mapping",
+    "get_default_qconfig_propagation_list",
+    "get_default_static_quant_module_mappings",
+    "get_default_static_quant_reference_module_mappings",
+    "get_default_static_sparse_quant_module_mappings",
+    "get_dynamic_quant_module_class",
+    "get_embedding_qat_module_mappings",
+    "get_embedding_static_quant_module_mappings",
+    "get_fuser_method",
+    "get_fuser_method_new",
+    "get_observer_state_dict",
+    "get_quantized_operator",
+    "get_static_quant_module_class",
+    "load_observer_state_dict",
+    "move_exported_model_to_eval",
+    "move_exported_model_to_train",
+    "allow_exported_model_train_eval",
+    "no_observer_set",
+    "per_channel_weight_observer_range_neg_127_to_127",
+    "prepare",
+    "prepare_dynamic_jit",
+    "prepare_jit",
+    "prepare_qat",
+    "propagate_qconfig_",
+    "qconfig_equals",
+    "quantize",
+    "quantize_dynamic",
+    "quantize_dynamic_jit",
+    "quantize_jit",
+    "quantize_qat",
+    "script_qconfig",
+    "script_qconfig_dict",
+    "swap_module",
+    "weight_observer_range_neg_127_to_127",
+    "generate_numeric_debug_handle",
+]
+def default_eval_fn(model, calib_data):
+    r"""Define the default evaluation function.
+    Default evaluation function takes a torch.utils.data.Dataset or a list of
+    input Tensors and run the model on the dataset
+    """
+    for data, target in calib_data:
+        model(data)
+class _DerivedObserverOrFakeQuantize(ObserverBase):
+    r"""This observer is used to describe an observer whose quantization parameters
+    are derived from other observers
+    """
+    def __init__(
+        self,
+        dtype: torch.dtype,
+        obs_or_fqs: List[ObserverOrFakeQuantize],
+        derive_qparams_fn: Callable[[List[ObserverOrFakeQuantize]], Tuple[Tensor, Tensor]],
+        quant_min: Optional[int]=None,
+        quant_max: Optional[int]=None,
+        qscheme: Optional[torch.qscheme]=None,
+        ch_axis: Optional[int] = None
+    ):
+        super().__init__(dtype)
+        self.obs_or_fqs = obs_or_fqs
+        self.derive_qparams_fn = derive_qparams_fn
+        self.quant_min = quant_min
+        self.quant_max = quant_max
+        self.qscheme = qscheme
+        self.ch_axis = ch_axis
+        from .utils import is_per_channel
+        if is_per_channel(self.qscheme):
+            assert self.ch_axis is not None, "Must provide a valid ch_axis if qscheme is per channel"
+    def forward(self, x: Tensor) -> Tensor:
+        return x
+    def calculate_qparams(self):
+        return self.derive_qparams_fn(self.obs_or_fqs)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/quantization_mappings.cpython-311.pyc ADDED Viewed

Binary file (16.8 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/__pycache__/quantize_fx.cpython-311.pyc ADDED Viewed

Binary file (34.4 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/_correct_bias.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import torch
+import torch.nn as nn
+import torch.ao.nn.quantized as nnq
+import torch.ao.quantization
+import torch.ao.ns._numeric_suite as ns
+__all__ = [
+    "get_module",
+    "parent_child_names",
+    "get_param",
+    "MeanShadowLogger",
+    "bias_correction",
+]
+_supported_modules = {nn.Linear, nn.Conv2d}
+_supported_modules_quantized = {nnq.Linear, nnq.Conv2d}
+def get_module(model, name):
+    """Given name of submodule, this function grabs the submodule from given model."""
+    return dict(model.named_modules())[name]
+def parent_child_names(name):
+    """Split full name of submodule into parent submodule's full name and submodule's name."""
+    split_name = name.rsplit('.', 1)
+    if len(split_name) == 1:
+        return '', split_name[0]
+    else:
+        return split_name[0], split_name[1]
+def get_param(module, attr):
+    """Get the parameter given a module and attribute.
+    Sometimes the weights/bias attribute gives you the raw tensor, but sometimes
+    gives a function that will give you the raw tensor, this function takes care of that logic
+    """
+    param = getattr(module, attr, None)
+    if callable(param):
+        return param()
+    else:
+        return param
+class MeanShadowLogger(ns.Logger):
+    """Mean Logger for a Shadow module.
+    A logger for a Shadow module whose purpose is to record the rolling mean
+    of the data passed to the floating point and quantized models
+    """
+    def __init__(self):
+        """Set up initial values for float and quantized stats, count, float sum, and quant sum."""
+        super().__init__()
+        self.stats["float"] = None
+        self.stats["quantized"] = None
+        self.count = 0
+        self.float_sum = None
+        self.quant_sum = None
+    def forward(self, x, y):
+        """Compute the average of quantized and floating-point data from modules.
+        The inputs x,y are output data from the quantized and floating-point modules.
+        x is for the quantized module, y is for the floating point module
+        """
+        if x.is_quantized:
+            x = x.dequantize()
+        self.count += 1
+        if self.stats["quantized"] is None:
+            self.stats["quantized"] = x
+            self.quant_sum = x
+        else:
+            self.quant_sum += x
+            self.stats["quantized"] = self.quant_sum / self.count
+        if self.stats["float"] is None:
+            self.stats["float"] = y
+            self.float_sum = y
+        else:
+            self.float_sum += y
+            self.stats["float"] = self.float_sum / self.count
+    def clear(self):
+        self.stats["float"] = None
+        self.stats["quantized"] = None
+        self.count = 0
+        self.float_sum = None
+        self.quant_sum = None
+def bias_correction(float_model, quantized_model, img_data, target_modules=_supported_modules_quantized, neval_batches=None):
+    """Perform bias correction on a module.
+    Using numeric suite shadow module, the expected output of the floating point and quantized modules
+    is recorded. Using that data the bias of supported modules is shifted to compensate for the drift caused
+    by quantization
+    Paper reference: https://arxiv.org/pdf/1906.04721.pdf (Section 4.2)
+    Args:
+        float_model: a trained model that serves as a reference to what bias correction should aim for
+        quantized_model: quantized form of float_model that bias correction is to applied to
+        img_data: calibration data to estimate the expected output (used to find quantization error)
+        target_modules: specifies what submodules in quantized_model need bias correction (can be extended to
+                unquantized submodules)
+        neval_batches: a cap to the number of batches you want to be used for estimating the expected output
+    """
+    ns.prepare_model_with_stubs(float_model, quantized_model, _supported_modules, MeanShadowLogger)
+    uncorrected_modules = {}
+    for name, submodule in quantized_model.named_modules():
+        if type(submodule) in target_modules:
+            uncorrected_modules[name] = submodule
+    for uncorrected_module in uncorrected_modules:
+        quantized_submodule = get_module(quantized_model, uncorrected_module)
+        bias = get_param(quantized_submodule, 'bias')
+        if bias is not None:
+            count = 0
+            for data in img_data:
+                quantized_model(data[0])
+                count += 1
+                if count == neval_batches:
+                    break
+            ob_dict = ns.get_logger_dict(quantized_model)
+            parent_name, _ = parent_child_names(uncorrected_module)
+            float_data = ob_dict[parent_name + '.stats']['float']
+            quant_data = ob_dict[parent_name + '.stats']['quantized']
+            # math for expected_error
+            quantization_error = quant_data - float_data
+            dims = list(range(quantization_error.dim()))
+            # Note: we don't want to take the mean over the output channel dimension
+            dims.remove(1)
+            expected_error = torch.mean(quantization_error, dims)
+            updated_bias = bias.data - expected_error
+            bias.data = updated_bias
+            # Resets the data contained in the loggers
+            for name, submodule in quantized_model.named_modules():
+                if isinstance(submodule, MeanShadowLogger):
+                    submodule.clear()

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__pycache__/_common_operator_config_utils.cpython-311.pyc ADDED Viewed

Binary file (32.9 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__pycache__/backend_config.cpython-311.pyc ADDED Viewed

Binary file (34.2 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__pycache__/observation_type.cpython-311.pyc ADDED Viewed

Binary file (245 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/__pycache__/onednn.cpython-311.pyc ADDED Viewed

Binary file (20.9 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/observation_type.py ADDED Viewed

File without changes

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/qnnpack.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import torch
+from ._common_operator_config_utils import (
+    _get_binary_op_configs,
+    _get_bn_configs,
+    _get_cat_config,
+    _get_conv_configs,
+    _get_default_op_configs,
+    _get_embedding_op_configs,
+    _get_fixed_qparams_op_configs,
+    _get_linear_configs,
+    _get_rnn_op_configs,
+    _get_share_qparams_op_configs,
+)
+from .backend_config import BackendConfig, DTypeConfig, DTypeWithConstraints
+__all__ = [
+    "get_qnnpack_backend_config",
+]
+# ===================
+# |  DTYPE CONFIGS  |
+# ===================
+qnnpack_weighted_op_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.quint8,
+    weight_dtype=torch.qint8,
+    bias_dtype=torch.float,
+)
+qnnpack_default_op_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.quint8,
+)
+qnnpack_default_op_fp16_dtype_config = DTypeConfig(
+    input_dtype=torch.float16,
+    output_dtype=torch.float16,
+    weight_dtype=torch.float16,
+    bias_dtype=torch.float16,
+)
+qnnpack_default_dynamic_int8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.float,
+    weight_dtype=torch.qint8,
+    bias_dtype=torch.float,
+    is_dynamic=True,
+)
+qnnpack_default_dynamic_float16_dtype_config = DTypeConfig(
+    input_dtype=torch.float16,
+    output_dtype=torch.float,
+    weight_dtype=torch.float16,
+    bias_dtype=torch.float,
+    is_dynamic=True,
+)
+qnnpack_weight_only_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.float,
+    output_dtype=torch.float,
+    weight_dtype=torch.quint8,
+)
+qnnpack_weight_only_quint4x2_dtype_config = DTypeConfig(
+    input_dtype=torch.float,
+    output_dtype=torch.float,
+    weight_dtype=torch.quint4x2,
+)
+# xnnpack compatible dtype configs
+# We restrict scale values to be 2 ** -12 to ensure the
+# requantization scale never falls below the xnnpack lower
+# threshold. Additionally, for qint8 weight, we restrict
+# the quantization values to [-127, +127], excluding -128.
+# For more detail, refer to the description of
+# `default_symmetric_qnnpack_qconfig`.
+# TODO: add additional restriction on qscheme to ensure it
+# is either per_tensor_symmetric or per_channel_symmetric
+qnnpack_act_qint8_scale_min_2_neg_12 = DTypeWithConstraints(
+    dtype=torch.qint8,
+    scale_min_lower_bound=2 ** -12,
+)
+qnnpack_weight_qint8_neg_127_to_127_scale_min_2_neg_12 = DTypeWithConstraints(
+    dtype=torch.qint8,
+    quant_min_lower_bound=-127,
+    quant_max_upper_bound=127,
+    scale_min_lower_bound=2 ** -12,
+)
+qnnpack_weighted_op_qint8_symmetric_dtype_config = DTypeConfig(
+    input_dtype=qnnpack_act_qint8_scale_min_2_neg_12,
+    output_dtype=qnnpack_act_qint8_scale_min_2_neg_12,
+    weight_dtype=qnnpack_weight_qint8_neg_127_to_127_scale_min_2_neg_12,
+    bias_dtype=torch.float,
+)
+qnnpack_default_op_qint8_symmetric_dtype_config = DTypeConfig(
+    input_dtype=qnnpack_act_qint8_scale_min_2_neg_12,
+    output_dtype=qnnpack_act_qint8_scale_min_2_neg_12,
+)
+# =====================
+# |  BACKEND CONFIGS  |
+# =====================
+def get_qnnpack_backend_config() -> BackendConfig:
+    """
+    Return the `BackendConfig` for PyTorch's native QNNPACK backend.
+    """
+    conv_dtype_configs = [
+        qnnpack_weighted_op_qint8_symmetric_dtype_config,
+        qnnpack_weighted_op_quint8_dtype_config,
+    ]
+    linear_dtype_configs = [
+        qnnpack_weighted_op_qint8_symmetric_dtype_config,
+        qnnpack_weighted_op_quint8_dtype_config,
+        qnnpack_default_dynamic_int8_dtype_config,
+        qnnpack_default_dynamic_float16_dtype_config,
+    ]
+    binary_op_dtype_configs = [
+        qnnpack_default_op_qint8_symmetric_dtype_config,
+        qnnpack_default_op_quint8_dtype_config,
+    ]
+    default_op_dtype_configs = [
+        qnnpack_default_op_qint8_symmetric_dtype_config,
+        qnnpack_default_op_quint8_dtype_config,
+    ]
+    fixed_qparams_op_dtype_configs = [
+        qnnpack_default_op_qint8_symmetric_dtype_config,
+        qnnpack_default_op_quint8_dtype_config,
+    ]
+    share_qparams_op_dtype_configs = [
+        qnnpack_default_op_qint8_symmetric_dtype_config,
+        qnnpack_default_op_quint8_dtype_config,
+    ]
+    rnn_op_dtype_configs = [
+        qnnpack_default_dynamic_int8_dtype_config,
+        qnnpack_default_dynamic_float16_dtype_config,
+    ]
+    embedding_op_dtype_configs = [
+        qnnpack_weight_only_quint8_dtype_config,
+        qnnpack_weight_only_quint4x2_dtype_config,
+    ]
+    return BackendConfig("qnnpack") \
+        .set_backend_pattern_configs(_get_conv_configs(conv_dtype_configs)) \
+        .set_backend_pattern_configs(_get_linear_configs(linear_dtype_configs)) \
+        .set_backend_pattern_configs(_get_binary_op_configs(binary_op_dtype_configs)) \
+        .set_backend_pattern_config(_get_cat_config(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_default_op_configs(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_fixed_qparams_op_configs(fixed_qparams_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_share_qparams_op_configs(share_qparams_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_bn_configs(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_rnn_op_configs(rnn_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_embedding_op_configs(embedding_op_dtype_configs))

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/tensorrt.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import torch
+from .backend_config import (
+    BackendConfig,
+    BackendPatternConfig,
+    DTypeConfig,
+    ObservationType
+)
+from ._common_operator_config_utils import (
+    _get_binary_op_configs,
+    _get_linear_configs,
+    _get_conv_configs,
+    _get_share_qparams_op_configs,
+    _get_tensor_info_op_configs,
+)
+__all__ = [
+    "get_tensorrt_backend_config",
+    "get_tensorrt_backend_config_dict",
+]
+def get_tensorrt_backend_config() -> BackendConfig:
+    """
+    Return the `BackendConfig` for the TensorRT backend.
+    NOTE: Current api will change in the future, it's just to unblock experimentation for
+    new backends, please don't use it right now.
+    TODO: add a README when it's more stable
+    """
+    # dtype configs
+    weighted_op_qint8_dtype_config = DTypeConfig(
+        input_dtype=torch.qint8,
+        output_dtype=torch.qint8,
+        weight_dtype=torch.qint8,
+        bias_dtype=torch.float,
+    )
+    non_weighted_op_qint8_dtype_config = DTypeConfig(
+        input_dtype=torch.qint8,
+        output_dtype=torch.qint8,
+    )
+    addmm_config = BackendPatternConfig(torch.addmm) \
+        .set_observation_type(ObservationType.OUTPUT_USE_DIFFERENT_OBSERVER_AS_INPUT) \
+        .add_dtype_config(weighted_op_qint8_dtype_config) \
+        ._set_input_type_to_index({
+            "bias": 0,
+            "input": 1,
+            "weight": 2,
+        })
+    cat_config = BackendPatternConfig(torch.cat) \
+        .set_observation_type(ObservationType.OUTPUT_SHARE_OBSERVER_WITH_INPUT) \
+        .add_dtype_config(non_weighted_op_qint8_dtype_config)
+    conv_dtype_configs = [
+        weighted_op_qint8_dtype_config,
+    ]
+    linear_dtype_configs = [
+        weighted_op_qint8_dtype_config,
+    ]
+    binary_op_dtype_configs = [
+        weighted_op_qint8_dtype_config,
+    ]
+    share_qparams_op_dtype_configs = [
+        non_weighted_op_qint8_dtype_config,
+    ]
+    tensor_info_op_dtype_configs = [
+        non_weighted_op_qint8_dtype_config,
+    ]
+    # there might be things not supported in fx2trt, but it will error out
+    # during fx2trt conversion and can support them after that
+    return BackendConfig("tensorrt") \
+        .set_backend_pattern_configs(_get_conv_configs(conv_dtype_configs)) \
+        .set_backend_pattern_config(addmm_config) \
+        .set_backend_pattern_config(cat_config) \
+        .set_backend_pattern_configs(_get_linear_configs(linear_dtype_configs)) \
+        .set_backend_pattern_configs(_get_binary_op_configs(binary_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_share_qparams_op_configs(share_qparams_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_tensor_info_op_configs(tensor_info_op_dtype_configs))
+def get_tensorrt_backend_config_dict():
+    """
+    Return the `BackendConfig` for the TensorRT backend in dictionary form.
+    """
+    return get_tensorrt_backend_config().to_dict()

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/utils.py ADDED Viewed

	@@ -0,0 +1,279 @@

+from typing import Dict, Any, List, Callable, Union, Tuple, Type
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .backend_config import (
+    BackendConfig,
+    BackendPatternConfig,
+    DTypeConfig,
+)
+from ..utils import Pattern
+from ..fuser_method_mappings import (
+    _reverse2,
+    _reverse3,
+)
+__all__ = [
+    "get_pattern_to_dtype_configs",
+    "get_qat_module_classes",
+    "get_fused_module_classes",
+    "get_pattern_to_input_type_to_index",
+    "get_root_module_to_quantized_reference_module",
+    "get_fuser_method_mapping",
+    "get_module_to_qat_module",
+    "get_fusion_pattern_to_root_node_getter",
+    "get_fusion_pattern_to_extra_inputs_getter",
+    "remove_boolean_dispatch_from_name",
+    "pattern_to_human_readable",
+    "entry_to_pretty_str",
+]
+def get_pattern_to_dtype_configs(backend_config: BackendConfig) -> Dict[Pattern, List[DTypeConfig]]:
+    pattern_to_dtype_configs: Dict[Pattern, List[DTypeConfig]] = {}
+    for pattern, config in backend_config._pattern_complex_format_to_config.items():
+        pattern_to_dtype_configs[pattern] = config.dtype_configs
+    return pattern_to_dtype_configs
+def get_qat_module_classes(backend_config: BackendConfig) -> Tuple[type, ...]:
+    qat_module_classes = []
+    for config in backend_config.configs:
+        if config.qat_module is not None:
+            qat_module_classes.append(config.qat_module)
+    return tuple(set(qat_module_classes))
+def get_fused_module_classes(backend_config: BackendConfig) -> Tuple[type, ...]:
+    fused_module_classes = []
+    for config in backend_config.configs:
+        if config.fused_module is not None:
+            fused_module_classes.append(config.fused_module)
+    return tuple(set(fused_module_classes))
+def get_pattern_to_input_type_to_index(backend_config: BackendConfig) -> Dict[Pattern, Dict[str, int]]:
+    pattern_to_input_type_to_index: Dict[Pattern, Dict[str, int]] = {}
+    for pattern, config in backend_config._pattern_complex_format_to_config.items():
+        pattern_to_input_type_to_index[pattern] = config._input_type_to_index
+    return pattern_to_input_type_to_index
+def get_root_module_to_quantized_reference_module(
+        backend_config: BackendConfig) -> Dict[Type[torch.nn.Module], Type[torch.nn.Module]]:
+    mapping: Dict[Type[torch.nn.Module], Type[torch.nn.Module]] = {}
+    for config in backend_config.configs:
+        if config.root_module is not None and config.reference_quantized_module is not None:
+            mapping[config.root_module] = config.reference_quantized_module
+    return mapping
+def get_fuser_method_mapping(backend_config: BackendConfig) -> Dict[Pattern, Union[nn.Sequential, Callable]]:
+    fuser_method_mapping : Dict[Pattern, Union[nn.Sequential, Callable]] = {}
+    for pattern, config in backend_config._pattern_complex_format_to_config.items():
+        if config.fuser_method is not None:
+            # Note: both the fuser method and the pattern are specified in forward order in the
+            # BackendConfig, but the internal pattern matching code uses the reversed nested tuple
+            # format, so we need to convert both to the internal format
+            fuser_method = _get_fuser_method_in_reversed_nested_tuple_format(config)
+            fuser_method_mapping[pattern] = fuser_method
+    return fuser_method_mapping
+def get_module_to_qat_module(backend_config: BackendConfig) -> Dict[Pattern, Type[torch.nn.Module]]:
+    module_to_qat_module: Dict[Pattern, Type[torch.nn.Module]] = {}
+    for pattern, config in backend_config._pattern_complex_format_to_config.items():
+        if config.qat_module is not None:
+            module_to_qat_module[pattern] = config.qat_module
+    return module_to_qat_module
+def get_fusion_pattern_to_root_node_getter(backend_config: BackendConfig) -> Dict[Pattern, Callable]:
+    """ Get a map from fusion pattern to a function that returns the root node
+    from the fusion pattern, e.g. the most common one is:
+    def get_root_node(node_pattern):
+        while not isinstance(node_pattern[-1], Node):
+            node_pattern = node_pattern[-1]
+        return node_pattern[-1]
+    This can work for all patterns whose root node is the "last node" in the pattern,
+    e.g. (torch.add, MatchAllNode, (torch.ReLU, torch.Conv2d))
+    """
+    root_node_getter_mapping: Dict[Pattern, Callable] = {}
+    for pattern, config in backend_config._pattern_complex_format_to_config.items():
+        if config._root_node_getter is not None:
+            root_node_getter_mapping[pattern] = config._root_node_getter
+    return root_node_getter_mapping
+def get_fusion_pattern_to_extra_inputs_getter(backend_config: BackendConfig) -> Dict[Pattern, Callable]:
+    """ Get a map from fusion pattern to a function that returns extra input nodes
+    from the fusion pattern, in the order required by the root node. This is optional,
+    if not specified, we will not copy over any extra inputs for the root node.
+    Example:
+    # Let's say we have the pattern (torch.add, MatchAllNode, (torch.nn.BatchNorm2d, torch.nn.Conv2d))
+    # and root node is torch.nn.Conv2d, and the node in MatchAllNode would be an extra
+    # argument to the fused module, we can unpack the pattern and return the node at
+    # MatchAllNode here
+    # we can implement extra_inputs_getter as follows:
+    def extra_inputs_getter(pattern) -> List[Any]:
+        add, extra_input, conv_pattern = pattern
+        return [extra_input]
+    """
+    extra_inputs_getter_mapping: Dict[Pattern, Callable] = {}
+    for pattern, config in backend_config._pattern_complex_format_to_config.items():
+        if config._extra_inputs_getter is not None:
+            extra_inputs_getter_mapping[pattern] = config._extra_inputs_getter
+    return extra_inputs_getter_mapping
+def remove_boolean_dispatch_from_name(p) -> Any:
+    """
+    Some ops have a default string representation such as
+    '<function boolean_dispatch.<locals>.fn at 0x7ff1106bf280>',
+    this function replaces them with the hardcoded function names.
+    """
+    if p is F.fractional_max_pool2d:
+        return "torch.nn.functional.fractional_max_pool2d"
+    elif p is F.fractional_max_pool3d:
+        return "torch.nn.functional.fractional_max_pool3d"
+    elif p is F.max_pool1d:
+        return "torch.nn.functional.max_pool1d"
+    elif p is F.max_pool2d:
+        return "torch.nn.functional.max_pool2d"
+    elif p is F.max_pool3d:
+        return "torch.nn.functional.max_pool3d"
+    elif p is F.adaptive_max_pool1d:
+        return "torch.nn.functional.adaptive_max_pool1d"
+    elif p is F.adaptive_max_pool2d:
+        return "torch.nn.functional.adaptive_max_pool2d"
+    elif p is F.adaptive_max_pool3d:
+        return "torch.nn.functional.adaptive_max_pool3d"
+    assert "boolean_dispatch" not in str(p), \
+        f"{p} does not have a human readable representation in " + \
+        "quantization documentation"
+    return p
+def pattern_to_human_readable(p) -> Any:
+    if isinstance(p, tuple):
+        # nested patterns, recurse
+        return tuple(pattern_to_human_readable(inner_p) for inner_p in p)
+    elif isinstance(p, str):
+        # method names are already human readable
+        return p
+    else:
+        p = remove_boolean_dispatch_from_name(p)
+        return p
+# TODO(future PR): move backend_config_dict to use dataclass and move this logic to
+# the corresponding __str__ function
+def entry_to_pretty_str(entry) -> str:
+    """
+    Given a backend_config_dict entry, returns a string with the human readable
+    representation of it.
+    """
+    s = "{\n"
+    # always output the pattern first
+    if "pattern" in entry:
+        pattern_str = pattern_to_human_readable(entry["pattern"])
+        s += f"  'pattern': {pattern_str},\n"
+    # custom output for dtype_configs to make it look nice
+    if "dtype_configs" in entry:
+        s += "  'dtype_configs': [\n"
+        for dtype_config in entry["dtype_configs"]:
+            s += "    {\n"
+            for k, v in dtype_config.items():
+                s += f"      '{k}': {v},\n"
+            s += "    },\n"
+        s += "  ],\n"
+    # custom output for num_tensor_args_to_observation_type to make it look nice
+    if "num_tensor_args_to_observation_type" in entry:
+        s += "  'num_tensor_args_to_observation_type': {\n"
+        for k, v in entry["num_tensor_args_to_observation_type"].items():
+            s += f"    {k}: {v},\n"
+        s += "  },\n"
+    # output all the other fields
+    custom_handled_fields = [
+        "pattern",
+        "dtype_configs",
+        "num_tensor_args_to_observation_type",
+    ]
+    for field_name in entry:
+        if field_name in custom_handled_fields:
+            continue
+        s += f"  '{field_name}': {entry[field_name]},\n"
+    s += "}"
+    return s
+def _get_pattern_in_reversed_nested_tuple_format(config: BackendPatternConfig) -> Pattern:
+    """
+    Return the pattern specified in the given config in the reversed nested tuple format
+    used internally in the quantization pattern matching code.
+    If the pattern is not a tuple, or the pattern is already specified in the reversed
+    nested tuple format, return the pattern as is. Otherwise:
+    For 2-tuples (a, b), return (b, a).
+    For 3-tuples (a, b, c), return (c, (b, a)).
+    For example:
+        * Given nn.Linear, return nn.Linear
+        * Given (nn.Linear, nn.ReLU), return (nn.ReLU, nn.Linear)
+        * Given (nn.Conv2d, nn.BatchNorm2d, nn.ReLU), return
+          (nn.ReLU, (nn.BatchNorm2d, nn.Conv2d))
+    For context, the reason why this is needed is the user-facing BackendConfig
+    API accepts the flat 2-or-3-tuple format in forward order. While this simple
+    format handles the vast majority of use cases, it does not handle the more
+    complex ones, and so the internal pattern matching code for quantization uses
+    the following, more general reversed nested tuple format instead:
+        operator = module_type | functional | torch op | native op | MatchAllNode
+        Pattern = (operator, Pattern, Pattern, ...) | operator
+    In the future, we expect to replace the above complex format with the one used
+    by the subgraph rewriter in torch.fx, so we don't have to maintain our own
+    complex pattern matching code. Then we won't need this helper function anymore.
+    """
+    if config._pattern_complex_format is not None:
+        return config._pattern_complex_format
+    if config.pattern is None:
+        raise ValueError("Either 'pattern' or 'pattern_complex_format' must be specified")
+    if not isinstance(config.pattern, tuple):
+        return config.pattern
+    # Pattern is specified in the simple tuple format, need to convert
+    if len(config.pattern) == 2:
+        (a, b) = config.pattern
+        return (b, a)
+    elif len(config.pattern) == 3:
+        (a, b, c) = config.pattern
+        return (c, (b, a))
+    else:
+        raise ValueError("Expected a tuple with 2 or 3 elements, got: ", config.pattern)
+def _get_fuser_method_in_reversed_nested_tuple_format(config: BackendPatternConfig) -> Callable:
+    """
+    Return the fuser method specified in the given config in the reversed nested
+    tuple format used internally in the quantization pattern matching code.
+    If pattern is specified in the reversed nested tuple format, we assume the
+    fuser method is also specified in this format and simply return it as is.
+    Otherwise, we convert the fuser method as follows:
+        * Given f(is_qat, conv, relu), return f'(is_qat, relu, conv)
+        * Given f(is_qat, conv, bn, relu), return f'(is_qat, relu, bn_conv),
+          where bn_conv is a 2-tuple (bn, conv)
+    The first argument of a fuser method is always `is_qat` and is not affected
+    in the conversion. We currently only support functions with 3 or 4 arguments.
+    """
+    assert config.fuser_method is not None
+    if config._pattern_complex_format is not None:
+        return config.fuser_method
+    if not isinstance(config.pattern, tuple):
+        raise ValueError("Expected pattern to be a tuple, got: ", config.pattern)
+    # Pattern is specified in the simple tuple format, need to convert
+    if len(config.pattern) == 2:
+        return _reverse2(config.fuser_method)
+    elif len(config.pattern) == 3:
+        return _reverse3(config.fuser_method)
+    else:
+        raise ValueError("Expected a tuple with 2 or 3 elements, got: ", config.pattern)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/backend_config/x86.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import torch
+from ._common_operator_config_utils import (
+    _get_binary_op_configs,
+    _get_bn_configs,
+    _get_cat_config,
+    _get_conv_configs,
+    _get_default_op_configs,
+    _get_embedding_op_configs,
+    _get_fixed_qparams_op_configs,
+    _get_linear_configs,
+    _get_rnn_op_configs,
+    _get_share_qparams_op_configs,
+    _get_tensor_info_op_configs,
+)
+from .backend_config import BackendConfig, DTypeConfig
+__all__ = [
+    "get_x86_backend_config",
+]
+# ===================
+# |  DTYPE CONFIGS  |
+# ===================
+# X86 aligns with FBGEMM for now
+x86_weighted_op_int8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.quint8,
+    weight_dtype=torch.qint8,
+    bias_dtype=torch.float,
+)
+x86_default_op_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.quint8,
+)
+x86_default_op_fp16_dtype_config = DTypeConfig(
+    input_dtype=torch.float16,
+    output_dtype=torch.float16,
+    weight_dtype=torch.float16,
+    bias_dtype=torch.float16,
+)
+x86_default_dynamic_int8_dtype_config = DTypeConfig(
+    input_dtype=torch.quint8,
+    output_dtype=torch.float,
+    weight_dtype=torch.qint8,
+    bias_dtype=torch.float,
+    is_dynamic=True,
+)
+x86_default_dynamic_float16_dtype_config = DTypeConfig(
+    input_dtype=torch.float16,
+    output_dtype=torch.float,
+    weight_dtype=torch.float16,
+    bias_dtype=torch.float,
+    is_dynamic=True,
+)
+x86_weight_only_quint8_dtype_config = DTypeConfig(
+    input_dtype=torch.float,
+    output_dtype=torch.float,
+    weight_dtype=torch.quint8,
+)
+x86_weight_only_quint4x2_dtype_config = DTypeConfig(
+    input_dtype=torch.float,
+    output_dtype=torch.float,
+    weight_dtype=torch.quint4x2,
+)
+# =====================
+# |  BACKEND CONFIGS  |
+# =====================
+def get_x86_backend_config() -> BackendConfig:
+    """
+    Return the `BackendConfig` for PyTorch's native x86 backend.
+    """
+    conv_dtype_configs = [x86_weighted_op_int8_dtype_config]
+    linear_dtype_configs = [
+        x86_weighted_op_int8_dtype_config,
+        x86_default_dynamic_int8_dtype_config,
+        x86_default_dynamic_float16_dtype_config,
+    ]
+    binary_op_dtype_configs = [x86_weighted_op_int8_dtype_config]
+    default_op_dtype_configs = [x86_default_op_quint8_dtype_config]
+    fixed_qparams_op_dtype_configs = [x86_weighted_op_int8_dtype_config]
+    share_qparams_op_dtype_configs = [x86_default_op_quint8_dtype_config]
+    tensor_info_op_dtype_configs = [x86_default_op_quint8_dtype_config]
+    rnn_op_dtype_configs = [
+        x86_default_dynamic_int8_dtype_config,
+        x86_default_dynamic_float16_dtype_config,
+    ]
+    embedding_op_dtype_configs = [
+        x86_weight_only_quint8_dtype_config,
+        x86_weight_only_quint4x2_dtype_config,
+    ]
+    return BackendConfig("x86") \
+        .set_backend_pattern_configs(_get_conv_configs(conv_dtype_configs)) \
+        .set_backend_pattern_configs(_get_linear_configs(linear_dtype_configs)) \
+        .set_backend_pattern_configs(_get_binary_op_configs(binary_op_dtype_configs)) \
+        .set_backend_pattern_config(_get_cat_config(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_default_op_configs(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_fixed_qparams_op_configs(fixed_qparams_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_share_qparams_op_configs(share_qparams_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_tensor_info_op_configs(tensor_info_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_bn_configs(default_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_rnn_op_configs(rnn_op_dtype_configs)) \
+        .set_backend_pattern_configs(_get_embedding_op_configs(embedding_op_dtype_configs))

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fake_quantize.py ADDED Viewed

	@@ -0,0 +1,546 @@

+"""Implements modules  used to perform fake quantization."""
+import torch
+from torch.nn import Module
+from torch.ao.quantization.observer import (
+    MovingAverageMinMaxObserver,
+    HistogramObserver,
+    MovingAveragePerChannelMinMaxObserver,
+    FixedQParamsObserver,
+    default_fixed_qparams_range_0to1_observer,
+    default_fixed_qparams_range_neg1to1_observer,
+    _with_args,
+)
+import re
+from abc import ABC, abstractmethod
+from typing import Any, Tuple
+__all__ = [
+    "FakeQuantizeBase",
+    "FakeQuantize",
+    "FixedQParamsFakeQuantize",
+    "FusedMovingAvgObsFakeQuantize",
+    "disable_fake_quant",
+    "disable_observer",
+    "enable_fake_quant",
+    "enable_observer",
+    "default_fake_quant",
+    "default_weight_fake_quant",
+    "default_dynamic_fake_quant",
+    "default_fixed_qparams_range_neg1to1_fake_quant",
+    "default_fixed_qparams_range_0to1_fake_quant",
+    "default_symmetric_fixed_qparams_fake_quant",
+    "default_affine_fixed_qparams_fake_quant",
+    "default_per_channel_weight_fake_quant",
+    "default_embedding_fake_quant",
+    "default_embedding_fake_quant_4bit",
+    "default_histogram_fake_quant",
+    "default_fused_act_fake_quant",
+    "default_fused_wt_fake_quant",
+    "default_fused_per_channel_wt_fake_quant",
+    "fused_wt_fake_quant_range_neg_127_to_127",
+    "fused_per_channel_wt_fake_quant_range_neg_127_to_127",
+]
+def _is_per_channel(qscheme: 'torch.qscheme') -> bool:
+    return qscheme in [torch.per_channel_symmetric, torch.per_channel_affine, torch.per_channel_affine_float_qparams]
+def _is_per_tensor(qscheme: 'torch.qscheme') -> bool:
+    return qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine]
+def _is_symmetric_quant(qscheme: 'torch.qscheme') -> bool:
+    return qscheme in [torch.per_tensor_symmetric, torch.per_channel_symmetric]
+def _is_float_qparams(qscheme: 'torch.qscheme') -> bool:
+    return qscheme in [torch.per_channel_affine_float_qparams, ]
+class FakeQuantizeBase(ABC, Module):
+    r"""Base fake quantize module.
+    Base fake quantize module
+    Any fake quantize implementation should derive from this class.
+    Concrete fake quantize module should follow the same API. In forward, they will update
+    the statistics of the observed Tensor and fake quantize the input. They should also provide a
+    `calculate_qparams` function that computes the quantization parameters given
+    the collected statistics.
+    """
+    fake_quant_enabled: torch.Tensor
+    observer_enabled: torch.Tensor
+    def __init__(self):
+        """Set fake_quant_enabled and observer_enabled."""
+        super().__init__()
+        # fake_quant_enabled and observer_enabled are buffers to support their
+        # replication in DDP. Data type is uint8 because NCCL does not support
+        # bool tensors.
+        self.register_buffer('fake_quant_enabled', torch.tensor([1], dtype=torch.uint8))
+        self.register_buffer('observer_enabled', torch.tensor([1], dtype=torch.uint8))
+    @abstractmethod
+    def forward(self, x):
+        pass
+    @abstractmethod
+    def calculate_qparams(self, **kwargs):
+        pass
+    @torch.jit.export
+    def enable_fake_quant(self, enabled: bool = True) -> None:
+        self.fake_quant_enabled[0] = 1 if enabled else 0
+    @torch.jit.export
+    def disable_fake_quant(self):
+        self.enable_fake_quant(False)
+    @torch.jit.export
+    def enable_observer(self, enabled: bool = True) -> None:
+        self.observer_enabled[0] = 1 if enabled else 0
+    @torch.jit.export
+    def disable_observer(self):
+        self.enable_observer(False)
+    @classmethod
+    def with_args(cls, **kwargs):
+        fake_quant_constructor = _with_args(cls, **kwargs)
+        # need to assign the correct module to fake_quantize
+        # constructors to satisfy public v private requirements
+        fake_quant_constructor.__module__ = "torch.ao.quantization.fake_quantize"
+        return fake_quant_constructor
+class FakeQuantize(FakeQuantizeBase):
+    r"""Simulate the quantize and dequantize operations in training time.
+    The output of this module is given by::
+        x_out = (
+          clamp(round(x/scale + zero_point), quant_min, quant_max) - zero_point
+        ) * scale
+    * :attr:`is_dynamic` indicates whether the fake quantie is a placeholder for dynamic quantization
+      operators (choose_qparams -> q -> dq) or static quantization operators (q -> dq)
+    * :attr:`scale` defines the scale factor used for quantization.
+    * :attr:`zero_point` specifies the quantized value to which 0 in floating point maps to
+    * :attr:`fake_quant_enabled` controls the application of fake quantization on tensors, note that
+      statistics can still be updated.
+    * :attr:`observer_enabled` controls statistics collection on tensors
+    * :attr:`dtype` specifies the quantized dtype that is being emulated with fake-quantization,
+        allowable values are torch.qint8 and torch.quint8.
+    Args:
+        observer (module): Module for observing statistics on input tensors and calculating scale
+          and zero-point.
+        observer_kwargs (optional): Arguments for the observer module
+    Attributes:
+        activation_post_process (Module): User provided module that collects statistics on the input tensor and
+          provides a method to calculate scale and zero-point.
+    """
+    scale: torch.Tensor
+    zero_point: torch.Tensor
+    def __init__(self, observer=MovingAverageMinMaxObserver, quant_min=None, quant_max=None, is_dynamic=False, **observer_kwargs):
+        super().__init__()
+        # Populate quant_min/quant_max to observer_kwargs if valid
+        if quant_min is not None and quant_max is not None:
+            assert quant_min <= quant_max, \
+                'quant_min must be less than or equal to quant_max'
+            dtype = observer_kwargs.get("dtype", torch.quint8)
+            if hasattr(observer, "p"):
+                # In case observer is _PartialWrapper, dtype can be stored in
+                # observer.p.keywords["dtype"]
+                dtype = getattr(getattr(observer, "p", {}), "keywords", {}).get(
+                    "dtype", dtype
+                )
+            assert torch.iinfo(dtype).min <= quant_min, 'quant_min out of bound'
+            assert quant_max <= torch.iinfo(dtype).max, 'quant_max out of bound'
+            observer_kwargs.update({"quant_min": quant_min, "quant_max": quant_max})
+        observer_kwargs["is_dynamic"] = is_dynamic
+        self.activation_post_process = observer(**observer_kwargs)
+        # TODO: keeping self.quant_min/max for BC; remove after a couple releases
+        # Users should use self.activation_post_process.quant_min
+        self.quant_min = self.activation_post_process.quant_min
+        self.quant_max = self.activation_post_process.quant_max
+        self.is_dynamic = self.activation_post_process.is_dynamic
+        if _is_float_qparams(self.activation_post_process.qscheme):
+            zero_point_dtype = torch.float
+        else:
+            zero_point_dtype = torch.int
+        self.register_buffer('scale', torch.tensor([1.0], dtype=torch.float))
+        self.register_buffer('zero_point', torch.tensor([0], dtype=zero_point_dtype))
+        self.dtype = self.activation_post_process.dtype
+        self.qscheme = self.activation_post_process.qscheme
+        self.ch_axis = self.activation_post_process.ch_axis \
+            if hasattr(self.activation_post_process, 'ch_axis') else -1
+        assert _is_per_channel(self.qscheme) or \
+            _is_per_tensor(self.qscheme), \
+            'Only per channel and per tensor quantization are supported in fake quantize' + \
+            ' got qscheme: ' + str(self.qscheme)
+        self.is_per_channel = _is_per_channel(self.qscheme)
+    @torch.jit.export
+    def calculate_qparams(self):
+        return self.activation_post_process.calculate_qparams()
+    def forward(self, X):
+        if self.observer_enabled[0] == 1:
+            self.activation_post_process(X.detach())
+            _scale, _zero_point = self.calculate_qparams()
+            _scale, _zero_point = _scale.to(self.scale.device), _zero_point.to(self.zero_point.device)
+            if self.scale.shape != _scale.shape:
+                self.scale.resize_(_scale.shape)
+                self.zero_point.resize_(_zero_point.shape)
+            self.scale.copy_(_scale)
+            self.zero_point.copy_(_zero_point)
+        if self.fake_quant_enabled[0] == 1:
+            if self.is_per_channel:
+                X = torch.fake_quantize_per_channel_affine(
+                    X, self.scale, self.zero_point,
+                    self.ch_axis, self.activation_post_process.quant_min, self.activation_post_process.quant_max)
+            else:
+                X = torch.fake_quantize_per_tensor_affine(
+                    X, self.scale, self.zero_point,
+                    self.activation_post_process.quant_min, self.activation_post_process.quant_max)
+        return X
+    @torch.jit.export
+    def extra_repr(self):
+        return 'fake_quant_enabled={}, observer_enabled={}, ' \
+               'quant_min={}, quant_max={}, dtype={}, qscheme={}, ch_axis={}, ' \
+               'scale={}, zero_point={}'.format(
+                   self.fake_quant_enabled, self.observer_enabled,
+                   self.activation_post_process.quant_min, self.activation_post_process.quant_max,
+                   self.dtype, self.qscheme, self.ch_axis, self.scale, self.zero_point)
+    def _save_to_state_dict(self, destination, prefix, keep_vars):
+        # We cannot currently register scalar values as buffers, so need to manually
+        # specify serialization here.
+        super()._save_to_state_dict(destination, prefix, keep_vars)
+        destination[prefix + 'scale'] = self.scale
+        destination[prefix + 'zero_point'] = self.zero_point
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
+                              missing_keys, unexpected_keys, error_msgs):
+        # Removing this function throws an error that the size of the loaded tensor does not match the original size
+        # i.e., These buffers start out with numel 0 and become numel 1 once they have their first forward pass.
+        local_state = ['scale', 'zero_point']
+        for name in local_state:
+            key = prefix + name
+            if key in state_dict:
+                val = state_dict[key]
+                # Custom handling to allow loading scale and zero_point
+                # of size N into uninitialized buffers of size 0. The
+                # buffers are resized here, and the values are copied in
+                # the default state_dict loading code of the parent.
+                if name == 'scale':
+                    self.scale.resize_(val.shape)
+                else:
+                    assert name == 'zero_point'
+                    self.zero_point.resize_(val.shape)
+                # For torchscript module we need to update the attributes here since we do not
+                # call the `_load_from_state_dict` function defined module.py
+                if torch.jit.is_scripting():
+                    if name == 'scale':
+                        self.scale.copy_(val)
+                    else:
+                        assert name == 'zero_point'
+                        self.zero_point.copy_(val)
+            elif strict:
+                missing_keys.append(key)
+        super()._load_from_state_dict(state_dict, prefix, local_metadata, strict,
+                                      missing_keys, unexpected_keys, error_msgs)
+class FixedQParamsFakeQuantize(FakeQuantize):
+    """Simulate quantize and dequantize in training time.
+    Simulate quantize and dequantize with fixed quantization
+    parameters in training time. Only per tensor quantization
+    is supported.
+    """
+    # TODO: rename observer to observer_ctr
+    def __init__(self, observer):
+        super().__init__(observer=observer)
+        assert type(self.activation_post_process) == FixedQParamsObserver, \
+            f"{self.__class__.__name__}'s observer must be a {FixedQParamsObserver.__name__}"
+        self._observer_ctr = observer
+        self.scale = self.activation_post_process.scale
+        self.zero_point = self.activation_post_process.zero_point
+        assert _is_per_tensor(self.qscheme), 'Only per tensor quantization is supported' + \
+            ' FixedQParamsFakeQuantize module, got qscheme:' + str(self.qscheme)
+    @torch.jit.export
+    def calculate_qparams(self):
+        return self.scale, self.zero_point
+    @torch.jit.export
+    def extra_repr(self):
+        """Define a string representation of the object's attributes."""
+        return 'fake_quant_enabled={}, observer_enabled={}, scale={}, zero_point={}, ' \
+               'dtype={}, quant_min={}, quant_max={}, qscheme={}'.format(
+                   self.fake_quant_enabled, self.observer_enabled,
+                   self.scale, self.zero_point, self.dtype,
+                   self.activation_post_process.quant_min, self.activation_post_process.quant_max, self.qscheme)
+class FusedMovingAvgObsFakeQuantize(FakeQuantize):
+    r"""Define a fused module to observe the tensor.
+    Fused module that is used to observe the input tensor (compute min/max), compute
+    scale/zero_point and fake_quantize the tensor.
+    This module uses calculation similar MovingAverageMinMaxObserver for the inputs,
+    to compute the min/max values in order to compute the scale/zero_point.
+    The qscheme input in the observer is used to differentiate between symmetric/affine
+    quantization scheme.
+    The output of this module is given by
+    x_out = (clamp(round(x/scale + zero_point), quant_min, quant_max)-zero_point)*scale
+    Similar to :class:`~torch.ao.quantization.FakeQuantize`, and accepts the same attributes as the
+    base class.
+    """
+    def __init__(
+        self,
+        observer: Any = MovingAverageMinMaxObserver,
+        quant_min: int = 0,
+        quant_max: int = 255,
+        **observer_kwargs: Any
+    ) -> None:
+        super().__init__(observer, quant_min, quant_max, **observer_kwargs)
+        assert isinstance(self.activation_post_process, (MovingAverageMinMaxObserver, MovingAveragePerChannelMinMaxObserver)), \
+            "Fused observer+fake_quant module only works with MovingAverageMinMaxObserver"
+        self.register_buffer("fake_quant_enabled", torch.tensor([1], dtype=torch.long))
+        self.register_buffer("observer_enabled", torch.tensor([1], dtype=torch.long))
+        self.is_symmetric_quant = _is_symmetric_quant(self.activation_post_process.qscheme)
+    @torch.jit.export
+    def calculate_qparams(self) -> Tuple[torch.Tensor, torch.Tensor]:
+        return self.activation_post_process.calculate_qparams()
+    @torch.jit.export
+    def extra_repr(self) -> str:
+        return (
+            "fake_quant_enabled={}, observer_enabled={}, scale={}, zero_point={}, "
+            "dtype={}, quant_min={}, quant_max={}, qscheme={}, reduce_range={}".format(
+                self.fake_quant_enabled,
+                self.observer_enabled,
+                self.scale,
+                self.zero_point,
+                self.dtype,
+                self.activation_post_process.quant_min,
+                self.activation_post_process.quant_max,
+                self.qscheme,
+                self.activation_post_process.reduce_range,
+            )
+        )
+    def forward(self, X: torch.Tensor) -> torch.Tensor:
+        return torch.fused_moving_avg_obs_fake_quant(
+            X,
+            self.observer_enabled,
+            self.fake_quant_enabled,
+            self.activation_post_process.min_val,
+            self.activation_post_process.max_val,
+            self.scale,
+            self.zero_point,
+            self.activation_post_process.averaging_constant,
+            self.activation_post_process.quant_min,
+            self.activation_post_process.quant_max,
+            self.ch_axis,
+            self.is_per_channel,
+            self.is_symmetric_quant,
+        )
+default_fake_quant = FakeQuantize.with_args(observer=MovingAverageMinMaxObserver, quant_min=0, quant_max=255,
+                                            dtype=torch.quint8, qscheme=torch.per_tensor_affine, reduce_range=True)
+"""
+Default fake_quant for activations.
+"""
+default_weight_fake_quant = FakeQuantize.with_args(observer=MovingAverageMinMaxObserver, quant_min=-128, quant_max=127,
+                                                   dtype=torch.qint8, qscheme=torch.per_tensor_symmetric, reduce_range=False)
+"""
+Default fake_quant for weights.
+Observer is memoryless since averaging_constant is 1.
+"""
+default_dynamic_fake_quant = FakeQuantize.with_args(
+    observer=MovingAverageMinMaxObserver, quant_min=0, quant_max=255, is_dynamic=True,
+    dtype=torch.quint8, averaging_constant=1)
+"""
+Default dynamic fake_quant for activations.
+"""
+default_fixed_qparams_range_neg1to1_fake_quant = (
+    FixedQParamsFakeQuantize.with_args(observer=default_fixed_qparams_range_neg1to1_observer)
+)
+default_fixed_qparams_range_0to1_fake_quant = (
+    FixedQParamsFakeQuantize.with_args(observer=default_fixed_qparams_range_0to1_observer)
+)
+# TODO: the following 2 variables are kept for backwards compatibility; remove after a few releases
+default_symmetric_fixed_qparams_fake_quant = default_fixed_qparams_range_neg1to1_fake_quant
+default_affine_fixed_qparams_fake_quant = default_fixed_qparams_range_0to1_fake_quant
+default_per_channel_weight_fake_quant = FakeQuantize.with_args(observer=MovingAveragePerChannelMinMaxObserver,
+                                                               quant_min=-128,
+                                                               quant_max=127,
+                                                               dtype=torch.qint8,
+                                                               qscheme=torch.per_channel_symmetric,
+                                                               reduce_range=False,
+                                                               ch_axis=0)
+"""
+Default fake_quant for per-channel weights.
+Observer is memoryless since averaging_constant is 1.
+"""
+default_embedding_fake_quant = FakeQuantize.with_args(observer=MovingAveragePerChannelMinMaxObserver,
+                                                      qscheme=torch.per_channel_affine_float_qparams,
+                                                      dtype=torch.quint8,
+                                                      quant_min=0,
+                                                      quant_max=255,
+                                                      ch_axis=0,
+                                                      averaging_constant=1)
+"""
+Default fake_quant for embeddings.
+Observer is memoryless since averaging_constant is 1.
+"""
+default_embedding_fake_quant_4bit = FakeQuantize.with_args(observer=MovingAveragePerChannelMinMaxObserver,
+                                                           qscheme=torch.per_channel_affine_float_qparams,
+                                                           ch_axis=0,
+                                                           dtype=torch.quint4x2,
+                                                           averaging_constant=1)
+default_histogram_fake_quant = FakeQuantize.with_args(observer=HistogramObserver,
+                                                      quant_min=0,
+                                                      quant_max=255,
+                                                      dtype=torch.quint8,
+                                                      qscheme=torch.per_tensor_affine,
+                                                      reduce_range=True)
+"""
+Fake_quant for activations using a histogram..
+"""
+default_fused_act_fake_quant = FusedMovingAvgObsFakeQuantize.with_args(observer=MovingAverageMinMaxObserver,
+                                                                       quant_min=0,
+                                                                       quant_max=255,
+                                                                       dtype=torch.quint8,)
+"""
+Fused version of `default_fake_quant`, with improved performance.
+"""
+default_fused_wt_fake_quant = FusedMovingAvgObsFakeQuantize.with_args(observer=MovingAverageMinMaxObserver,
+                                                                      quant_min=-128,
+                                                                      quant_max=127,
+                                                                      dtype=torch.qint8,
+                                                                      qscheme=torch.per_tensor_symmetric)
+"""
+Fused version of `default_weight_fake_quant`, with improved performance.
+"""
+default_fused_per_channel_wt_fake_quant = FusedMovingAvgObsFakeQuantize.with_args(observer=MovingAveragePerChannelMinMaxObserver,
+                                                                                  quant_min=-128,
+                                                                                  quant_max=127,
+                                                                                  dtype=torch.qint8,
+                                                                                  qscheme=torch.per_channel_symmetric)
+"""
+Fused version of `default_per_channel_weight_fake_quant`, with improved performance.
+"""
+fused_wt_fake_quant_range_neg_127_to_127 = FusedMovingAvgObsFakeQuantize.with_args(observer=MovingAverageMinMaxObserver,
+                                                                                   quant_min=-127,
+                                                                                   quant_max=127,
+                                                                                   dtype=torch.qint8,
+                                                                                   qscheme=torch.per_tensor_symmetric,
+                                                                                   eps=2 ** -12)
+"""
+Fused version of `default_weight_fake_quant`, with the 8-bit values restricted to [-127, +127], excluding -128.
+"""
+fused_per_channel_wt_fake_quant_range_neg_127_to_127 = \
+    FusedMovingAvgObsFakeQuantize.with_args(observer=MovingAveragePerChannelMinMaxObserver,
+                                            quant_min=-127,
+                                            quant_max=127,
+                                            dtype=torch.qint8,
+                                            qscheme=torch.per_channel_symmetric,
+                                            eps=2 ** -12)
+"""
+Fused version of `default_per_channel_weight_fake_quant`, with the 8-bit values restricted to [-127, +127], excluding -128.
+"""
+def _is_fake_quant_script_module(mod):
+    """Return true if given mod is an instance of FakeQuantize script module."""
+    if isinstance(mod, torch.jit.RecursiveScriptModule):
+        # qualified name looks like '__torch__.torch.ao.quantization.fake_quantize.___torch_mangle_2.FakeQuantize'
+        suffix = mod._c.qualified_name.split('.', 1)[1]
+        name = re.sub(r'\.___torch_mangle_\d+', '', suffix)
+        return name == 'torch.ao.quantization.fake_quantize.FakeQuantize' or \
+            name == 'torch.ao.quantization.fake_quantize.FusedMovingAvgObsFakeQuantize'
+    return False
+def disable_fake_quant(mod):
+    """Disable fake quantization for the module.
+    Disable fake quantization for this module, if applicable. Example usage::
+      # model is any PyTorch model
+      model.apply(torch.ao.quantization.disable_fake_quant)
+    """
+    if isinstance(mod, FakeQuantizeBase) or _is_fake_quant_script_module(mod):
+        mod.disable_fake_quant()
+def enable_fake_quant(mod):
+    """Enable fake quantization for the module.
+    Enable fake quantization for this module, if applicable. Example usage::
+      # model is any PyTorch model
+      model.apply(torch.ao.quantization.enable_fake_quant)
+    """
+    if isinstance(mod, FakeQuantizeBase) or _is_fake_quant_script_module(mod):
+        mod.enable_fake_quant()
+def disable_observer(mod):
+    """Disable observation for this module.
+    Disable observation for this module, if applicable. Example usage::
+      # model is any PyTorch model
+      model.apply(torch.ao.quantization.disable_observer)
+    """
+    if isinstance(mod, FakeQuantizeBase) or _is_fake_quant_script_module(mod):
+        mod.disable_observer()
+def enable_observer(mod):
+    """Enable observation for this module.
+    Enable observation for this module, if applicable. Example usage::
+      # model is any PyTorch model
+      model.apply(torch.ao.quantization.enable_observer)
+    """
+    if isinstance(mod, FakeQuantizeBase) or _is_fake_quant_script_module(mod):
+        mod.enable_observer()

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/_model_report/model_report.py ADDED Viewed

	@@ -0,0 +1,606 @@

+from typing import Any, Dict, Set, Tuple, Callable
+from collections import OrderedDict
+import torch
+from torch.ao.quantization.fx._model_report.detector import (
+    DetectorBase,
+    DETECTOR_OBS_ARGS_KEY,
+    DETECTOR_OBS_TO_INSERT_KEY,
+    DETECTOR_IS_POST_OBS_KEY,
+    DETECTOR_TARGET_NODE_KEY,
+    DetectorQConfigInfo
+)
+from torch.ao.quantization.fx._model_report.model_report_visualizer import ModelReportVisualizer
+from torch.ao.quantization.fx.graph_module import GraphModule
+from torch.ao.quantization.observer import ObserverBase
+from torch.ao.quantization.qconfig_mapping import QConfigMapping, QConfig
+from torch.ao.quantization.fx._equalize import EqualizationQConfig
+class ModelReport:
+    r"""
+    The ModelReport class aims to provide users an easy way to diagnose issues that they run into
+    with their models. The class works with all traceable GraphModules to help diagnose issues,
+    though the requirements on the type of model more-so depends on the specific report the user
+    is trying to generate. With respect to the reports, the ModelReport class is initialized with
+    a set of Detector classes, each of which generate reports on quantization configuration
+    issues a use might have.
+    Currently supports generating reports on:
+    - Suggestions for per-channel vs. per-tensor quantization (nn.Module)
+    - Suggestions for dynamic vs static quantization for linear layers (Graph Modules)
+    - Suggestions for input-weight equalization for linear and conv layers (Graph Modules)
+    - Suggestions for outlier detection for all layers (Graph Modules)
+    The ModelReport class has the primary functionality of inserting observers (primarily the ModelReportObserver)
+    where needed for each detector to gather the information it needs, and then after callibration, the ModelReport
+    class compiles the report generated by each Detector class into a single report to return to the user. It also
+    has the capability to remove all the observers it inserted as well.
+    * :attr:`_model` The model we wish to generate the report for. Must be a traceable GraphModule
+    * :attr:`_desired_report_detectors` The set of Detectors representing desired reports from the ModelReport class
+        Make sure that these are all unique types of detectors [do not have more than 1 of the same class]
+    * :attr:`_desired_detector_names` The set of detector names of the _desired_report_detectors.
+        This set is generated by calling the get_detector_name() of each detector
+    * :attr:`_detector_name_to_observer_fqns` The mapping from each detector to fqns of observers of interest
+        The purpose of this is to keep track of what observers were inserted for each detector, so that they
+        can be removed at the end if desired
+    * :attr:`_prepared_flag` A boolean flag that keeps track of whether we have prepared the model or not
+        This is to ensure we only insert observers once with the ModelReport instance
+    * :attr:`_removed_observers` A boolean to track if we have removed observers already
+        The purpose is to ensure we don't attempt to remove observers twice with the same ModelReport
+        instance. This also allows the functionality where we can generate the report multiple times
+        as long as we haven't removed the observers yet.
+    Note:
+        This class was initially designed to work with the Fx Graph Mode workflow in mind. However,
+        full functionality is available as long as there is a traceable GraphModule that is being used.
+        One method to get a traceable GraphModule without going through the Fx workflow is to use
+        the QuantizationTracer class.
+    General Flow for Fx workflow:
+    1.) Initialize ModelReport object with reports of interest by passing in initialized detector objects and model
+    2.) Prepare your model with prepare_fx
+    3.) Call model_report.prepare_detailed_calibration to add relevant observers
+    4.) Callibrate your model with data
+    5.) Call model_report.generate_report on your model to generate report and optionally remove added observers
+    Optional
+        6.) Call model_report.generate_visualizer to get a ModelReportVisualizer instance
+        7.) To help in parsing report information and debugging, view report info as a:
+            - Table
+            - Histogram
+            - Line plot
+    8.) Call model_report.generate_qconfigs to generate the qconfigs based on the report suggestions
+    Example (with QuantizationTracer):
+        >>> # xdoctest: +SKIP
+        >>> # get the necessary qconfig
+        >>> config = PrepareCustomConfig()
+        >>> skipped_module_names, skipped_module_classes = get_skipped_module_name_and_classes(config, False)
+        >>> # initialize our model and get GraphModule
+        >>> model = SomeModel()
+        >>> tracer = QuantizationTracer(skipped_module_names, skipped_module_classes)
+        >>> graph_module = GraphModule(model, tracer.trace(model))
+        >>> # get our set of detectors and ModelReport instance
+        >>> detector_set = set([DynamicStaticDetector(tolerance=0.5), InputWeightEqualizationDetector(ratio_threshold=0.7)])
+        >>> tracer_reporter = ModelReport(graph_module, tracer_detector_set)
+        >>> # now we insert the observers and callibrate the model
+        >>> tracer_model_with_observers = tracer_reporter.prepare_detailed_calibration()
+        >>> for i in range(num_callibration_batches):
+        >>>     example_input = get_callibration_input()
+        >>>     tracer_model_with_observers(example_input)
+        >>> # finally we generate the reports and optionally remove the observers we inserted
+        >>> reports = tracer_reporter.generate_model_report(remove_inserted_observers=True)
+        >>> # Optional: we can generate the qconfig mapping based on the suggestions
+        >>> qconfigs = model_report.generate_qconfig_mapping()
+        >>> # Optional: we can generate the equalization mapping based on the suggestions
+        >>> qconfigs = model_report.generate_equalization_mapping()
+        >>> # Optional: we get a ModelReportVisualizer instance to do any visualizations desired
+        >>> model_report_visualizer = tracer_reporter.generate_visualizer()
+    """
+    def __init__(self, model: GraphModule, desired_report_detectors: Set[DetectorBase]):
+        if len(desired_report_detectors) == 0:
+            raise ValueError("Should include at least 1 desired report")
+        # keep track of the model we wish to generate report for
+        self._model: GraphModule = model
+        # keep the reports private so they can't be modified
+        self._desired_report_detectors = desired_report_detectors
+        self._desired_detector_names = {detector.get_detector_name() for detector in desired_report_detectors}
+        # keep a mapping of desired reports to observers of interest
+        # this is to get the readings, and to remove them, can create a large set
+        # this set can then be used to traverse the graph and remove added observers
+        self._detector_name_to_observer_fqns: Dict[str, Set[str]] = {}
+        # initialize each report to have empty set of observers of interest
+        for desired_report in self._desired_detector_names:
+            self._detector_name_to_observer_fqns[desired_report] = set()
+        # flags to ensure that we can only prepare and remove observers once
+        self._prepared_flag = False
+        self._removed_observers = False
+        # store the reports that we generated for visualization purposes
+        # initially empty since no reports generated
+        self._generated_reports: Dict[str, Dict] = {}
+    def get_desired_reports_names(self) -> Set[str]:
+        """ Returns a copy of the desired reports for viewing """
+        return self._desired_detector_names.copy()
+    def get_observers_of_interest(self) -> Dict[str, Set[str]]:
+        """ Returns a copy of the observers of interest for viewing """
+        return self._detector_name_to_observer_fqns.copy()
+    def prepare_detailed_calibration(self) -> GraphModule:
+        r"""
+        Takes in a graph model and inserts the following observers:
+        - ModelReportObserver
+        Each observer is inserted based on the desired_reports into the relevant locations
+        Right now, each report in self._desired_detector_names has independent insertions
+            However, if a module already has a Observer of the same type, the insertion will not occur
+            This is because all of the same type of Observer collect same information, so redundant
+        Returns the same GraphModule with the observers inserted
+        """
+        # if already prepared once, cannot prepare again
+        if self._prepared_flag:
+            raise ValueError("Already ran preparing detailed callibration. Run the report generation next after callibration.")
+        # loop through each detector, find where placements should be, and keep track
+        insert_observers_fqns: Dict[str, Any] = {}
+        for detector in self._desired_report_detectors:
+            # determine observer points for each detector
+            obs_fqn_to_info = detector.determine_observer_insert_points(self._model)
+            # map each insert point to the observer to use
+            insert_observers_fqns.update(obs_fqn_to_info)
+            # update the set of observers this report cares about
+            self._detector_name_to_observer_fqns[detector.get_detector_name()] = set(obs_fqn_to_info.keys())
+        # now insert all the observers at their desired locations
+        for observer_fqn in insert_observers_fqns:
+            target_node = insert_observers_fqns[observer_fqn][DETECTOR_TARGET_NODE_KEY]
+            insert_obs = insert_observers_fqns[observer_fqn][DETECTOR_OBS_TO_INSERT_KEY]
+            insert_post = insert_observers_fqns[observer_fqn][DETECTOR_IS_POST_OBS_KEY]
+            observer_args = insert_observers_fqns[observer_fqn][DETECTOR_OBS_ARGS_KEY]
+            self._insert_observer_around_module(
+                observer_fqn, target_node, insert_obs, observer_args, insert_post
+            )
+        self._prepared_flag = True
+        return self._model
+    def _insert_observer_around_module(
+        self,
+        obs_fqn: str,
+        target_node: torch.fx.node.Node,
+        obs_to_insert: ObserverBase,
+        observer_args: Tuple,
+        insert_post: bool
+    ):
+        r"""
+        Helper function that inserts the observer into both the graph structure and the module of the model
+        Args
+            node_fqn (str): The fully qualified name of the observer we want to insert
+            target_node (torch.fx.node.Node): The node in model we are inserting observers around
+            obs_to_insert (ObserverBase): The observer we are inserting around target_node
+            observer_args (Tuple): The arguments we want to pass into the observer
+            insert_post (bool): whether this is meant to be a post observer for this node
+        """
+        # if we are inserting post, then our target node is the next node
+        if insert_post:
+            target_node = target_node.next
+        with self._model.graph.inserting_before(target_node):
+            self._model.add_submodule(obs_fqn, obs_to_insert)
+            self._model.graph.create_node(op="call_module", target=obs_fqn, args=observer_args)
+        # recompile model after inserts are made
+        self._model.recompile()
+    def _get_node_from_fqn(self, node_fqn: str) -> torch.fx.node.Node:
+        r"""
+        Takes in a node fqn and returns the node based on the fqn
+        Args
+            node_fqn (str): The fully qualified name of the node we want to find in model
+        Returns the Node object of the given node_fqn otherwise returns None
+        """
+        node_to_return = None
+        for node in self._model.graph.nodes:
+            # if the target matches the fqn, it's the node we are looking for
+            if node.target == node_fqn:
+                node_to_return = node
+                break
+        if node_to_return is None:
+            raise ValueError("The node_fqn is was not found within the module.")
+        # assert for MyPy
+        assert isinstance(node_to_return, torch.fx.node.Node)
+        return node_to_return
+    def generate_model_report(
+        self, remove_inserted_observers: bool
+    ) -> Dict[str, Tuple[str, Dict]]:
+        r"""
+        Generates all the requested reports.
+        Note:
+            You should have callibrated the model with relevant data before calling this
+        The reports generated are specified by the desired_reports specified in desired_reports
+        Can optionally remove all the observers inserted by the ModelReport instance
+        Args:
+            remove_inserted_observers (bool): True to remove the observers inserted by this ModelReport instance
+        Returns a mapping of each desired report name to a tuple with:
+            The textual summary of that report information
+            A dictionary containing relevant statistics or information for that report
+        Note:
+            Throws exception if we try to generate report on model we already removed observers from
+            Throws exception if we try to generate report without preparing for callibration
+        """
+        # if we haven't prepped model for callibration, then we shouldn't generate report yet
+        if not self._prepared_flag:
+            raise Exception("Cannot generate report without preparing model for callibration")
+        # if we already removed the observers, we cannot generate report
+        if self._removed_observers:
+            raise Exception("Cannot generate report on model you already removed observers from")
+        # keep track of all the reports of interest and their outputs
+        reports_of_interest = {}
+        for detector in self._desired_report_detectors:
+            # generate the individual report for the detector
+            report_output = detector.generate_detector_report(self._model)
+            reports_of_interest[detector.get_detector_name()] = report_output
+        # if user wishes to remove inserted observers, go ahead and remove
+        if remove_inserted_observers:
+            self._removed_observers = True
+            # get the set of all Observers inserted by this instance of ModelReport
+            all_observers_of_interest: Set[str] = set()
+            for desired_report in self._detector_name_to_observer_fqns:
+                observers_of_interest = self._detector_name_to_observer_fqns[desired_report]
+                all_observers_of_interest.update(observers_of_interest)
+            # go through all_observers_of_interest and remove them from the graph and model
+            for observer_fqn in all_observers_of_interest:
+                # remove the observer from the model
+                self._model.delete_submodule(observer_fqn)
+                # remove the observer from the graph structure
+                node_obj = self._get_node_from_fqn(observer_fqn)
+                if node_obj:
+                    self._model.graph.erase_node(node_obj)
+                else:
+                    raise ValueError("Node no longer exists in GraphModule structure")
+            # remember to recompile the model
+            self._model.recompile()
+        # save the generated reports for visualization purposes
+        saved_reports: Dict[str, Dict] = {
+            report_name : report_tuple[1] for report_name, report_tuple in reports_of_interest.items()
+        }
+        self._generated_reports = saved_reports
+        # return the reports of interest
+        return reports_of_interest
+    def _is_same_info_for_same_key(self, info_dict_a: Dict, info_dict_b: Dict) -> bool:
+        r"""
+        Takes in two dictionaries and ensures that any common keys between the two have the same
+        values.
+        Args:
+            info_dict_a (Dict): First dictionary we wish to compare
+            info_dict_b (Dict): Second dictionary we wish to compare
+        Returns True if all shared keys have same values, false otherwise
+        """
+        # get the set of keys for both
+        dict_a_keys: Set = set(info_dict_a.keys())
+        dict_b_keys: Set = set(info_dict_b.keys())
+        # get the insersection keys and check if same value for both dicts
+        intersecting_keys: Set = dict_a_keys.intersection(dict_b_keys)
+        for key in intersecting_keys:
+            dict_a_val = info_dict_a[key]
+            dict_b_val = info_dict_b[key]
+            # if it's a tensor we have to handle separately
+            if type(dict_a_val) == torch.Tensor:
+                # if dict_b_val not tensor, automatically false
+                if type(dict_b_val) != torch.Tensor or sum(dict_a_val != dict_b_val) != 0:
+                    return False
+            else:
+                # for non-tensor vals
+                if dict_a_val != dict_b_val:
+                    return False
+        # if no non matching shared keys found, return true
+        return True
+    def _reformat_reports_for_visualizer(self) -> OrderedDict:
+        r"""
+        Takes the generated reports and reformats them into the format that is desired by the
+        ModelReportVisualizer
+        Returns an OrderedDict mapping module_fqns to their features
+        """
+        # we want to reorder and reformat the information so it is ordered in terms of order
+        # found in the model
+        # first create new dict with all modules as keys and features under respective module
+        module_fqns_to_features: Dict[str, Dict] = {}
+        for report_name in self._generated_reports:
+            # get mod -> feature dict and go through
+            module_info = self._generated_reports[report_name]
+            for module_fqn in module_info:
+                # check if already in our accumulation dict
+                if module_fqn in module_fqns_to_features:
+                    # we merge all the features together
+                    new_info: Dict = module_info[module_fqn]
+                    present_info: Dict = module_fqns_to_features[module_fqn]
+                    # merge them together into the new unioned dict
+                    # same features keys -> same info, so okay if override
+                    # do safety check to make sure shared keys have same info
+                    if self._is_same_info_for_same_key(new_info, present_info):
+                        module_fqns_to_features[module_fqn] = {**new_info, **present_info}
+                    else:
+                        error_str = "You have the same key with different values across detectors. "
+                        error_str += "Someone incorrectly implemented a detector with conflicting keys to existing detectors."
+                        raise ValueError(error_str)
+                else:
+                    # we just set it
+                    module_fqns_to_features[module_fqn] = module_info[module_fqn]
+        # our ordered dict so that modules can be ordered in order of how they appear in model
+        features_by_module: OrderedDict[str, Dict] = OrderedDict()
+        # we loop through modules in graph in order
+        for fqn, module in self._model.named_modules():
+            # find that fqn in fqns_to_features
+            if fqn in module_fqns_to_features:
+                # add it to our ordered dict
+                features_by_module[fqn] = module_fqns_to_features[fqn]
+        # return the ordered dict of info we created
+        return features_by_module
+    def generate_visualizer(self) -> ModelReportVisualizer:
+        r"""
+        Generates a ModelReportVisualizer instance using the reports generated
+        by the generate_model_report() method.
+        Returns the generated ModelReportVisualizer instance initialized
+        Note:
+            Throws exception if attempt to get visualizers without generating report
+        """
+        # check if user has generated reports at least once
+        if len(self._generated_reports) == 0:
+            raise Exception("Unable to generate visualizers without first generating reports")
+        # get the ordered dict mapping modules to their full set of collected features / stats
+        module_fqns_to_features: OrderedDict = self._reformat_reports_for_visualizer()
+        # create and return ModelReportVisualizer instance
+        visualizer: ModelReportVisualizer = ModelReportVisualizer(module_fqns_to_features)
+        return visualizer
+    def _generate_qconfig_mapping_helper(
+        self,
+        detector_qconfig_info_combined: Dict[str, DetectorQConfigInfo],
+        generation_function: Callable
+    ) -> QConfigMapping:
+        r"""
+        This helper takes in the compiled detector qconfig info that
+        has been compiled together and merges it into a QConfigMapping
+        """
+        # keep track of the qconfigmapping
+        qconfig_mapping = QConfigMapping()
+        # loop through each module / fqn and attempt to create QConfigMapping
+        for fqn, module in self._model.named_modules():
+            # if we have a qconfig info for this module
+            if fqn in detector_qconfig_info_combined:
+                qconfig_info_compiled = detector_qconfig_info_combined[fqn]
+                # now generate the qconfig and add it to the mapping
+                generated_qconfig = generation_function(qconfig_info_compiled, module)
+                # add to our config
+                qconfig_mapping.set_module_name(fqn, generated_qconfig)
+        # return compiled mapping
+        return qconfig_mapping
+    def _update_detector_quantizaiton_qconfig_info(self, combined_info: DetectorQConfigInfo, new_info: DetectorQConfigInfo):
+        r"""
+        Takes in the old and new information and updates the combined information.
+        Args:
+            combined_info (DetectorQConfigInfo): The DetectorQConfigInfo we are compiling all of the information in
+            new_info (DetectorQConfigInfo): The DetectorQConfigInfo with the information we are trying to merge the new info
+                into it
+        """
+        combined_info.is_activation_dynamic = combined_info.is_activation_dynamic or new_info.is_activation_dynamic
+        combined_info.is_weight_per_channel = combined_info.is_weight_per_channel or new_info.is_weight_per_channel
+    def _update_detector_equalization_qconfig_info(self, combined_info: DetectorQConfigInfo, new_info: DetectorQConfigInfo):
+        r"""
+        Takes in the old and new information and updates the combined information.
+        Args:
+            combined_info (DetectorQConfigInfo): The DetectorQConfigInfo we are compiling all of the information in
+            new_info (DetectorQConfigInfo): The DetectorQConfigInfo with the information we are trying to merge the new info
+                into it
+        """
+        is_equalization_recommended = combined_info.is_equalization_recommended or new_info.is_equalization_recommended
+        combined_info.is_equalization_recommended = is_equalization_recommended
+    def _generate_module_fqn_to_detector_info_mapping(
+        self,
+        update_qconfig_info_function: Callable
+    ) -> Dict[str, DetectorQConfigInfo]:
+        r"""
+        Generates a QConfigMapping based on the suggestions of the
+        ModelReport API. The generated mapping encompasses all the
+        different types of feedback from the different detectors
+        all into one place.
+        These configs are based on the suggestions provided by the ModelReport API
+        and can only be generated once the reports have been generated.
+        Args:
+            update_qconfig_info_function (Callable) takes in a function that takes in two DetectorQConfigInfo
+            and updates the one that is being compiled
+        Returns a Dict mapping module_fqns to DetectorQConfigInfo objects
+        Note:
+            Throws exception if we try to generate mapping on model we already removed observers from
+            Throws exception if we try to generate mapping without preparing for callibration
+        """
+        # if we haven't prepped model for callibration, then we shouldn't generate mapping yet
+        if not self._prepared_flag:
+            raise Exception("Cannot generate report without preparing model for callibration")
+        # if we already removed the observers, we cannot mapping
+        if self._removed_observers:
+            raise Exception("Cannot generate report on model you already removed observers from")
+        # keep track of qconfig info for each module across detectors
+        detector_qconfig_info_combined: Dict[str, DetectorQConfigInfo] = {}
+        for detector in self._desired_report_detectors:
+            # get the info from the detector
+            detector_info: Dict[str, DetectorQConfigInfo] = detector.get_qconfig_info(self._model)
+            # we go through the modules
+            for module_fqn in detector_info:
+                # see if we already have info on it
+                if module_fqn in detector_qconfig_info_combined:
+                    # we combine the current options with what is there
+                    current_options = detector_qconfig_info_combined[module_fqn]
+                    detector_options = detector_info[module_fqn]
+                    update_qconfig_info_function(current_options, detector_options)
+                else:
+                    # we just use this for now
+                    detector_qconfig_info_combined[module_fqn] = detector_info[module_fqn]
+        return detector_qconfig_info_combined
+    def generate_qconfig_mapping(self) -> QConfigMapping:
+        r"""
+        Generates a QConfigMapping based on the suggestions of the
+        ModelReport API. The generated mapping encompasses all the
+        different types of feedback from the different detectors
+        all into one place.
+        These configs are based on the suggestions provided by the ModelReport API
+        and can only be generated once the reports have been generated.
+        Returns a QConfigMapping for the quantization configuration
+        Note:
+            Throws exception if we try to generate mapping on model we already removed observers from
+            Throws exception if we try to generate mapping without preparing for callibration
+        """
+        # get the mapping info
+        detector_qconfig_info_combined = self._generate_module_fqn_to_detector_info_mapping(
+            self._update_detector_quantizaiton_qconfig_info
+        )
+        # we will do a bit of processing and remove fqns that don't have input weight recommended
+        # now we generate the QConfig for each of the options
+        mapping: QConfigMapping = self._generate_qconfig_mapping_helper(
+            detector_qconfig_info_combined,
+            self._quantization_config_generator
+        )
+        # return the generated mapping
+        return mapping
+    def _quantization_config_generator(self, detector_qconfig_info: DetectorQConfigInfo, module: torch.nn.Module) -> QConfig:
+        r"""
+        Returns the quantization configuration generated by the DetectorQConfigInfo object
+        """
+        return detector_qconfig_info.generate_quantization_qconfig(module)
+    def _equalization_config_generator(
+        self,
+        detector_qconfig_info: DetectorQConfigInfo,
+        module: torch.nn.Module
+    ) -> EqualizationQConfig:
+        r"""
+        We ignore the module argument here, and only focus on thedetector_qconfig_info
+        Returns the equalization configuration generated by the DetectorQConfigInfo object
+        """
+        return detector_qconfig_info.generate_equalization_qconfig()
+    def generate_equalization_mapping(self) -> QConfigMapping:
+        r"""
+        Generates a QConfigMapping based on the suggestions of the
+        ModelReport API for equalization. The generated mapping encompasses all the
+        different types of feedback from the input-weight equalization detector.
+        These configs are based on the suggestions provided by the ModelReport API
+        and can only be generated once the reports have been generated.
+        Returns a QConfigMapping for the equalization configuration
+        """
+        # get the mapping info
+        detector_qconfig_info_combined = self._generate_module_fqn_to_detector_info_mapping(
+            self._update_detector_equalization_qconfig_info
+        )
+        # now we generate the QConfig for each of the options
+        mapping: QConfigMapping = self._generate_qconfig_mapping_helper(
+            detector_qconfig_info_combined,
+            self._equalization_config_generator
+        )
+        # return the generated mapping
+        return mapping

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/fx/prepare.py ADDED Viewed

	@@ -0,0 +1,1880 @@

+import copy
+import torch
+import warnings
+from torch.fx import (
+    GraphModule,
+)
+from torch.fx.graph import (
+    Graph,
+    Node,
+)
+from torch.fx.node import Argument
+from ..quantize import (
+    propagate_qconfig_,
+)
+from ..observer import (
+    _is_activation_post_process,
+    _PartialWrapper,
+)
+from ..qconfig import (
+    _is_reuse_input_qconfig,
+    QConfigAny,
+)
+from ..qconfig_mapping import (
+    QConfigMapping,
+)
+from .qconfig_mapping_utils import (
+    _generate_node_name_to_qconfig,
+    _update_qconfig_for_fusion,
+    _get_flattened_qconfig_dict,
+    _update_qconfig_for_qat,
+)
+from .quantize_handler import (
+    _default_root_node_getter,
+    _get_pattern_to_quantize_handlers,
+    QuantizeHandler,
+)
+from torch.ao.quantization import (
+    ObserverBase,
+    FixedQParamsObserver,
+    FixedQParamsFakeQuantize,
+    _DerivedObserverOrFakeQuantize,
+)
+from torch.ao.quantization.utils import (
+    Pattern,
+    NodePattern,
+)
+from ._equalize import (
+    is_equalization_observer,
+    node_supports_equalization,
+)
+from .pattern_utils import (
+    _sorted_patterns_dict,
+)
+from .match_utils import (
+    _MatchResultWithQConfig,
+    _find_matches,
+)
+from .utils import (
+    _insert_dequant_stubs_for_custom_module_lstm_output,
+    _is_custom_module_lstm,
+    _maybe_get_custom_module_lstm_from_node_arg,
+    _qconfig_satisfies_dtype_config_constraints,
+    get_custom_module_class_keys,
+    all_node_args_have_no_tensors,
+    assert_and_get_unique_device,
+    get_non_observable_arg_indexes_and_types,
+    get_new_attr_name_with_prefix,
+    node_arg_is_weight,
+    node_arg_is_bias,
+    NON_QUANTIZABLE_WEIGHT_OPS,
+    ObservedGraphModuleAttrs,
+)
+from torch.ao.quantization import (
+    PlaceholderObserver
+)
+from torch.ao.quantization.quantize import (
+    convert
+)
+from ..utils import (
+    _parent_name,
+    get_qconfig_dtypes,
+    get_swapped_custom_module_class,
+)
+from ..backend_config.utils import (
+    get_pattern_to_dtype_configs,
+    get_module_to_qat_module,
+    get_fusion_pattern_to_root_node_getter,
+)
+from ..backend_config import (
+    BackendConfig,
+    DTypeConfig,
+    get_native_backend_config,
+)
+from .custom_config import (
+    PrepareCustomConfig,
+    StandaloneModuleConfigEntry,
+)
+from torch.ao.quantization.quantizer import (
+    EdgeOrNode,
+    QuantizationSpec,
+    QuantizationSpecBase,
+    FixedQParamsQuantizationSpec,
+    SharedQuantizationSpec,
+    DerivedQuantizationSpec,
+)
+from torch.ao.quantization import ObserverOrFakeQuantize
+from torch._subclasses import FakeTensor
+from typing import Any, Dict, List, Optional, Set, Tuple, Type, Union
+from dataclasses import asdict
+__all__ = [
+    "insert_observers_for_model",
+    "prepare",
+    "propagate_dtypes_for_known_nodes",
+]
+# list of dtypes to not add observers to
+_DO_NOT_OBS_DTYPE_LIST = [int, float, torch.bool, None]
+_OBS_DTYPE_LIST = [
+    torch.quint8,
+    torch.qint8,
+    torch.qint32,
+    torch.float16,
+    torch.uint8,
+    torch.int8,
+    torch.int16,
+    torch.int32
+]
+_DEFAULT_FP32_OBS_OR_FQ_CTR = PlaceholderObserver.with_args(dtype=torch.float)
+# note: the following default target dtype info dicts are temporary,
+# should be moved to the new programmable API class soon
+_DEFAULT_FP32_QCONFIG_FOR_TARGET_DTYPE_INFO = {
+    "input_act_obs_or_fq_ctr": torch.ao.quantization.qconfig._default_fp32_placeholder_qconfig.activation,
+    "output_act_obs_or_fq_ctr": torch.ao.quantization.qconfig._default_fp32_placeholder_qconfig.activation
+}
+_DEFAULT_QUINT8_QCONFIG_FOR_TARGET_DTYPE_INFO = {
+    "input_act_obs_or_fq_ctr": torch.ao.quantization.qconfig._default_quint8_placeholder_qconfig.activation,
+    "output_act_obs_or_fq_ctr": torch.ao.quantization.qconfig._default_quint8_placeholder_qconfig.activation
+}
+def _get_observer_kwargs(quant_spec: Union[QuantizationSpec, FixedQParamsQuantizationSpec]):
+    kwargs_dict = asdict(quant_spec)
+    return copy.deepcopy(kwargs_dict)
+def _get_qspec_for_arg(
+    arg: Node,
+    input_qspec_map: Dict[Node, QuantizationSpecBase],
+    named_modules: Dict[str, torch.nn.Module]
+) -> Optional[QuantizationSpecBase]:
+    while _is_activation_post_process_node(arg, named_modules):
+        arg = arg.args[0]  # type: ignore[assignment]
+    return input_qspec_map.get(arg, None)
+def _create_obs_or_fq_from_qspec(
+    quantization_spec: Optional[QuantizationSpecBase],
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat: bool,
+):
+    """ Create observer or fake quantize objects based on quantization spec
+    Args:
+       quantization_spec: used to store parameters to create the observer or fake quantizer
+       obs_or_fq_map: this is a map from edge/output to the corresponding observer/fake_quant
+       instance, it may be reused for different edge/output depending on configuration
+    """
+    if quantization_spec is None:
+        return None
+    if isinstance(quantization_spec, SharedQuantizationSpec):
+        edge_or_node = quantization_spec.edge_or_node
+        assert edge_or_node in obs_or_fq_map, \
+            "please make sure only refer to edge or node that has " \
+            f"observer/fake_quant inserted: '{edge_or_node}' not in\n{obs_or_fq_map.keys()}"
+        return obs_or_fq_map[edge_or_node]
+    elif isinstance(quantization_spec, DerivedQuantizationSpec):
+        # can't use asdict, so not calling get_observer_kwargs here
+        kwargs = {
+            "dtype": quantization_spec.dtype,
+            "derive_qparams_fn": quantization_spec.derive_qparams_fn,
+            "quant_min": quantization_spec.quant_min,
+            "quant_max": quantization_spec.quant_max,
+            "qscheme": quantization_spec.qscheme,
+            "ch_axis": quantization_spec.ch_axis,
+        }
+        edge_or_nodes = quantization_spec.derived_from
+        obs_or_fqs = [obs_or_fq_map[k] for k in edge_or_nodes]
+        kwargs["obs_or_fqs"] = obs_or_fqs
+        return _DerivedObserverOrFakeQuantize.with_args(**kwargs)()
+    elif isinstance(quantization_spec, FixedQParamsQuantizationSpec):
+        kwargs = _get_observer_kwargs(quantization_spec)
+        observer_ctr = FixedQParamsObserver.with_args(**kwargs)
+        if is_qat:
+            return FixedQParamsFakeQuantize.with_args(observer=observer_ctr)
+        else:
+            return observer_ctr()
+    assert isinstance(quantization_spec, QuantizationSpec)
+    observer_or_fake_quant_ctr = quantization_spec.observer_or_fake_quant_ctr
+    kwargs = _get_observer_kwargs(quantization_spec)
+    kwargs.pop("observer_or_fake_quant_ctr")
+    # we will remove is_dynamic from QuantizationSpec because
+    # it seems that dynamic range quantization
+    obs_or_fq_class = observer_or_fake_quant_ctr
+    if isinstance(observer_or_fake_quant_ctr, _PartialWrapper):
+        obs_or_fq_class = observer_or_fake_quant_ctr.p.func  # type: ignore[union-attr, assignment]
+    if "PerChannel" not in obs_or_fq_class.__name__:  # type: ignore[operator, union-attr]
+        kwargs.pop("ch_axis")
+    return observer_or_fake_quant_ctr.with_args(**kwargs)()
+def _needs_obs_or_fq(
+        prev_output_dtype: Any,
+        prev_output_is_dynamic: bool,
+        cur_target_dtype: Any,
+        cur_target_is_dynamic: bool,
+        reuse_input_obs_or_fq: bool,
+        is_zeroth_arg: bool = False) -> bool:
+    """
+    note: we will treat "not specified" as torch.float for now
+    utility function that checks if we should insert an observer or fake quant node
+    base on the requested dtype for the nodes from user
+    is_zeroth_arg: we only dynamically quantize the first arg of the node right now
+      this should be removed when we enable configuring dynamic quantization
+      for a specific argument, this can be removed if we deprecate fx graph mode
+      quantization
+    """
+    # need to insert placeholder observer for dynamic quantization so that it can
+    # be converted to choose_qparams -> q -> dq in convert step
+    if cur_target_is_dynamic:
+        assert cur_target_dtype in _OBS_DTYPE_LIST, \
+            f"Expected cur_target_dtype to be torch.float, but got: {cur_target_dtype}"
+        assert prev_output_dtype not in _DO_NOT_OBS_DTYPE_LIST
+        return is_zeroth_arg
+    if reuse_input_obs_or_fq:
+        return False
+    # non dynamic quantization
+    if cur_target_dtype in _OBS_DTYPE_LIST:
+        return prev_output_dtype in _OBS_DTYPE_LIST + [torch.float] and cur_target_dtype != prev_output_dtype
+    # lots of error checking are skipped here for now
+    return False
+def _is_activation_post_process_node(node: Node, named_modules: Dict[str, torch.nn.Module]) -> bool:
+    return isinstance(node, torch.fx.Node) and node.op == "call_module" and \
+        _is_activation_post_process(named_modules[str(node.target)])
+def _get_dtype_and_is_dynamic(obs_or_fq: Optional[ObserverOrFakeQuantize]) -> Tuple[Optional[torch.dtype], bool]:
+    """ Given a constructor for observer or fake quant module, returns
+    a Tuple of dtype and is_dynamic
+    """
+    # TODO: instead of instantiating the instance, we can use inspect to get the default args
+    if obs_or_fq is None:
+        return None, False
+    else:
+        return obs_or_fq.dtype, getattr(obs_or_fq, "is_dynamic", False)  # type: ignore[return-value]
+def _is_input_arg_dtype_supported_by_backend(
+    arg: Argument,
+    node: Node,
+    qconfig: QConfigAny,
+    dtype_config: DTypeConfig,
+    backend_config: BackendConfig,
+) -> bool:
+    """ Check if the configured qconfig for the argument
+    is supported by the backend or not
+    """
+    if isinstance(arg, (list, tuple)):
+        return all(_is_input_arg_dtype_supported_by_backend(
+            a, node, qconfig,
+            dtype_config, backend_config) for a in arg)
+    if not isinstance(arg, Node):
+        return True
+    # TODO: support check for standalone module
+    is_weight = node_arg_is_weight(node, arg)
+    is_bias = node_arg_is_bias(node, arg)
+    is_activation = not is_weight and not is_bias
+    if is_activation:
+        input_act_obs_or_fq_ctr = node.meta["target_dtype_info"].get("input_act_obs_or_fq_ctr")
+        input_act_obs_or_fq = input_act_obs_or_fq_ctr() if input_act_obs_or_fq_ctr else None
+        qconfig_dtype, qconfig_is_dynamic = _get_dtype_and_is_dynamic(input_act_obs_or_fq)
+        # TODO(future PR): remove the cast to bool below after figuring
+        # out why backend_config has is_dynamic set to None in some cases.
+        return (dtype_config.input_dtype is None) or (
+            dtype_config.input_dtype == qconfig_dtype and
+            bool(dtype_config.is_dynamic) == bool(qconfig_is_dynamic) and
+            _qconfig_satisfies_dtype_config_constraints(qconfig, dtype_config.input_dtype_with_constraints)
+        )
+    elif is_weight:
+        # TODO: move dtype check into `_qconfig_satisfies_dtype_config_constraints` as well
+        weight_obs_or_fq_ctr = node.meta["target_dtype_info"].get("weight_obs_or_fq_ctr", None)
+        weight_obs_or_fq = weight_obs_or_fq_ctr() if weight_obs_or_fq_ctr else None
+        qconfig_weight_dtype, _ = _get_dtype_and_is_dynamic(weight_obs_or_fq)
+        backend_config_weight_dtype = dtype_config.weight_dtype
+        dtype_matches = qconfig_weight_dtype == backend_config_weight_dtype
+        qconfig_satisfies_constraints = _qconfig_satisfies_dtype_config_constraints(
+            qconfig, dtype_config.weight_dtype_with_constraints, is_activation=False)
+        return backend_config_weight_dtype is None or (dtype_matches and qconfig_satisfies_constraints)
+    else:  # bias
+        # TODO: move dtype check into `_qconfig_satisfies_dtype_config_constraints` as well
+        bias_obs_or_fq_ctr = node.meta["target_dtype_info"].get("bias_obs_or_fq_ctr", None)
+        bias_obs_or_fq = bias_obs_or_fq_ctr() if bias_obs_or_fq_ctr else None
+        qconfig_bias_dtype, _ = _get_dtype_and_is_dynamic(bias_obs_or_fq)
+        backend_config_bias_dtype = dtype_config.bias_dtype
+        return backend_config_bias_dtype is None or qconfig_bias_dtype == backend_config_bias_dtype
+def _is_output_dtype_supported_by_backend(
+    node: Node,
+    qconfig: QConfigAny,
+    dtype_config: DTypeConfig,
+) -> bool:
+    """ Check if the configured qconfig for the output
+    is supported by the backend or not
+    """
+    # TODO: move dtype check into `_qconfig_satisfies_dtype_config_constraints` as well
+    backend_config_output_dtype = dtype_config.output_dtype
+    # TODO: we should check is_dynamic here as well, the code from _is_input_arg_dtype_supported_by_backend
+    # from input activation check can be reused here
+    qconfig_output_dtype = None
+    output_act_obs_or_fq_ctr = node.meta["target_dtype_info"].get("output_act_obs_or_fq_ctr", _DEFAULT_FP32_OBS_OR_FQ_CTR)
+    output_act_obs_or_fq = output_act_obs_or_fq_ctr() if output_act_obs_or_fq_ctr else None
+    qconfig_output_dtype, qconfig_output_is_dynamic = _get_dtype_and_is_dynamic(output_act_obs_or_fq)
+    # TODO: this is a hack because we can only specify one activation_obs_or_fq for
+    # qconfig (qconfig.activation), and we are only supporting dynamically quantized
+    # linear op which has fp32 output dtype, this should be removed if we generalize
+    # the structure of qconfig in the future
+    if qconfig_output_is_dynamic:
+        qconfig_output_dtype = torch.float32
+    dtype_matches = qconfig_output_dtype == backend_config_output_dtype
+    qconfig_satisfies_constraints = _qconfig_satisfies_dtype_config_constraints(
+        qconfig, dtype_config.output_dtype_with_constraints)
+    return backend_config_output_dtype is None or (dtype_matches and qconfig_satisfies_constraints)
+def _is_observer_in_same_graph(
+    node: Node,
+    named_modules: Dict[str, torch.nn.Module],
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat,
+):
+    """ Check if observer in same graph
+    when the node output is not fp32 and input is 'placeholder'
+    the input is assumed to be quantized, so it is observed
+    in a different place rather than not observed.
+    """
+    node_output_dtype = _get_arg_target_dtype_as_output(node, named_modules, obs_or_fq_map, is_qat)
+    if len(node.args) > 0 and isinstance(node.args[0], Node):
+        if node_output_dtype in [torch.quint8, torch.uint8] and node.args[0].op == 'placeholder':
+            return False
+    return True
+def _is_pattern_dtype_config_and_qconfig_supported_by_backend(
+    pattern: Optional[Pattern],
+    matched_node_pattern: Optional[List[Node]],
+    qconfig: QConfigAny,
+    backend_config: BackendConfig,
+) -> bool:
+    """ Check if the dtype configuration of a pattern is supported by
+    the backend or not, and whether the qconfig satisfies constraints
+    specified in the corresponding dtype config.
+    """
+    if backend_config is None or pattern is None:
+        return True
+    assert matched_node_pattern is not None and len(matched_node_pattern) >= 1
+    pattern_to_dtype_configs = get_pattern_to_dtype_configs(backend_config)
+    dtype_configs: List[DTypeConfig] = pattern_to_dtype_configs.get(pattern, [])
+    pattern_to_root_node_getter = get_fusion_pattern_to_root_node_getter(backend_config)
+    root_node_getter = pattern_to_root_node_getter.get(pattern, _default_root_node_getter)
+    root_node = root_node_getter(matched_node_pattern)
+    input_node = root_node
+    output_node = matched_node_pattern[0]
+    for dtype_config in dtype_configs:
+        # check if arg dtype are supported
+        supported = True
+        for arg in list(input_node.args) + list(input_node.kwargs.values()):
+            supported = supported and _is_input_arg_dtype_supported_by_backend(
+                arg, input_node, qconfig, dtype_config, backend_config)
+        # check if output dtype is supported
+        supported = supported and _is_output_dtype_supported_by_backend(
+            output_node, qconfig, dtype_config)
+        if supported:
+            return True
+    return False
+def _get_standalone_module_configs(
+    node: Node,
+    named_modules: Dict[str, torch.nn.Module],
+    prepare_custom_config: PrepareCustomConfig,
+    parent_qconfig: QConfigAny,
+    parent_backend_config: Optional[BackendConfig],
+) -> Tuple[QConfigMapping, Tuple[Any, ...], PrepareCustomConfig, Optional[BackendConfig]]:
+    """
+    Returns the standalone module QConfigMapping and PrepareCustomConfig
+    for `node`, assuming that the module pointed to by `node` is
+    a standalone modules.
+    """
+    module_name = str(node.target)
+    module_type = type(named_modules[module_name])  # type: ignore[index]
+    # name config has precedence over type config
+    config_entry = StandaloneModuleConfigEntry(None, (), None, None)
+    config_entry = prepare_custom_config.standalone_module_classes.get(module_type, config_entry)
+    config_entry = prepare_custom_config.standalone_module_names.get(module_name, config_entry)
+    # fallback to use parent module's qconfig if user didn't specify qconfig dict
+    qconfig_mapping = config_entry.qconfig_mapping or QConfigMapping().set_global(parent_qconfig)
+    example_inputs = config_entry.example_inputs
+    prepare_custom_config = config_entry.prepare_custom_config or PrepareCustomConfig()
+    backend_config = config_entry.backend_config or parent_backend_config
+    return (qconfig_mapping, example_inputs, prepare_custom_config, backend_config)
+def _qat_swap_modules(
+        root: torch.nn.Module,
+        module_to_qat_module: Dict[Pattern, Type[torch.nn.Module]]) -> None:
+    convert(root, mapping=module_to_qat_module, inplace=True, remove_qconfig=False)
+def _add_matched_node_name_to_set(matched_node_pattern: NodePattern, s: Set[str]):
+    if isinstance(matched_node_pattern, Node):
+        s.add(matched_node_pattern.name)
+    elif isinstance(matched_node_pattern, (list, tuple)):
+        for maybe_node in matched_node_pattern:
+            _add_matched_node_name_to_set(maybe_node, s)
+def _insert_obs_or_fq(
+    node: Node,
+    obs_or_fq: ObserverOrFakeQuantize,
+    model: torch.nn.Module,
+    named_modules: Dict[str, torch.nn.Module],
+    graph: Graph,
+) -> Node:
+    """
+    Attaches `obs_or_fq` to `model`, and creates a node which calls
+    `obs_or_fq` on the output of `node`.
+    obs_or_fq: an instance of Observer or FakeQuantize module
+    """
+    model_device = assert_and_get_unique_device(model)
+    if model_device:
+        obs_or_fq.to(model_device)
+    # add obs_or_fq module as attribute
+    if is_equalization_observer(obs_or_fq):
+        prefix = node.name + '_equalization_process_'
+    else:
+        prefix = 'activation_post_process_'
+    get_new_obs_or_fq_name = get_new_attr_name_with_prefix(prefix)
+    obs_or_fq_name = get_new_obs_or_fq_name(model)
+    setattr(model, obs_or_fq_name, obs_or_fq)
+    named_modules[obs_or_fq_name] = obs_or_fq
+    with graph.inserting_after(node):
+        new_obs = graph.create_node(
+            'call_module', obs_or_fq_name, (node,), {})
+    return new_obs
+def _set_target_dtype_info_for_matched_node_pattern(
+    matched_node_pattern: NodePattern,
+    last_node: Node,
+    qconfig: QConfigAny,
+    qhandler: Optional[QuantizeHandler],
+    backend_config: BackendConfig,
+    named_modules: Dict[str, torch.nn.Module],
+    cache_for_no_tensor_check: Dict[Node, bool],
+    processed_nodes: Set[Node],
+) -> None:
+    """ Sets the target_dtype_info for each node in matched_node_pattern
+    Note: processed_nodes is used to ensure we only process each node once
+    """
+    if isinstance(matched_node_pattern, (list, tuple)):
+        for node_pattern in matched_node_pattern:
+            _set_target_dtype_info_for_matched_node_pattern(
+                node_pattern,
+                last_node,
+                qconfig,
+                qhandler,
+                backend_config,
+                named_modules,
+                cache_for_no_tensor_check,
+                processed_nodes
+            )
+    # set target_dtype_info if matched_node_pattern is a Node
+    # other types of matched object, e.g. int, float literals, are ignored
+    elif isinstance(matched_node_pattern, Node):
+        # for pyre
+        assert isinstance(matched_node_pattern, Node)
+        node = matched_node_pattern
+        if node in processed_nodes:
+            return
+        processed_nodes.add(node)
+        if qconfig is None:
+            return
+        # TODO: refactor the following code in terms of apply a qconfig to a pattern
+        # e.g. for a pattern with op1 -> op2 -> op3, and qconfig = QConfig(input_act=obs0, output_act=obs1)
+        # we set the input_obs_or_fq_ctr for the arguments of op1 to based on qconfig.input_act,
+        # and set output_obs_or_fq_ctr based on qconfig.output_act
+        # this also requires we extend the structure of QConfig to support more fine
+        # grained configurations
+        target_dtype_info: Dict[str, Any] = (
+            _get_target_activation_dtype_for_node(
+                node,
+                qconfig,
+                qhandler,
+                named_modules,
+                backend_config,
+                cache_for_no_tensor_check,
+            )
+        )
+        node.meta["target_dtype_info"] = target_dtype_info
+def _get_target_activation_dtype_for_node(
+    node: Node,
+    qconfig: QConfigAny,
+    qhandler: Optional[QuantizeHandler],
+    named_modules: Dict[str, torch.nn.Module],
+    backend_config: BackendConfig,
+    cache_for_no_tensor_check: Dict[Node, bool],
+) -> Dict[str, Any]:
+    """
+    For each op attribute in the op's input activation, output activation,
+    weight, bias - returns the settings of dtype and is_dynamic we expect
+    for the `quantize` call in the reference model representation, or None
+    if there is no `quantize` call needed.
+    For example, if we have a node corresponding to `op0` in
+      x0 -> op0 -> x1
+    And we want a reference quantized representation to be
+      x0 -> quant_static -> dequant -> op0 -> quant_dynamic -> dequant -> x1
+    Then this function will return
+      {
+        "input_act_obs_or_fq_ctr": MinMaxObserver.with_args(dtype=torch.quint8, is_dynamic=False),
+        "output_act_obs_or_fq_ctr": MinMaxObserver.with_args(dtype=torch.quint8, is_dynamic=False),
+      }
+    TODO(future PR, if needed): explicitly spell out the non-Tensor
+    dtypes.
+    """
+    args_have_no_tensors = \
+        all_node_args_have_no_tensors(
+            node, named_modules, cache_for_no_tensor_check)
+    if args_have_no_tensors:
+        return {
+            "input_act_obs_or_fq_ctr": None,
+            "output_act_obs_or_fq_ctr": None,
+        }
+    # get qconfig to determine the eventual dtype of this node
+    if qconfig is not None:
+        act_dtype, weight_dtype, input_act_is_dynamic = \
+            get_qconfig_dtypes(qconfig)
+        # Currently `QConfig` only has one `activation` field.
+        # For static quantization, it is reused for both input
+        # and output activation. For dynamic quantization, this
+        # field is currently only used for the input activation,
+        # with the output activation being in fp32.
+        # In the future this may change as we add more fields
+        # to the `QConfig` object.
+        output_act_dtype = act_dtype \
+            if (not input_act_is_dynamic) else torch.float
+        bias_dtype = torch.float16 \
+            if (
+                act_dtype == torch.float16
+                and weight_dtype == torch.float16
+                and (not input_act_is_dynamic)
+            ) else torch.float
+        is_general_tensor_value_op = \
+            (qhandler is not None and qhandler.is_general_tensor_value_op())
+        _is_standalone_module = (
+            qhandler is not None and qhandler.is_standalone_module()
+        )
+        weight_index = None
+        if isinstance(node, Node) and node.op == "call_function" and \
+           node.target in backend_config._pattern_complex_format_to_config:
+            weight_index = backend_config._pattern_complex_format_to_config[node.target]._input_type_to_index.get("weight")
+        bias_index = None
+        if isinstance(node, Node) and node.op == "call_function" and \
+           node.target in backend_config._pattern_complex_format_to_config:
+            bias_index = backend_config._pattern_complex_format_to_config[node.target]._input_type_to_index.get("bias")
+        return {
+            "input_act_obs_or_fq_ctr": qconfig.activation,
+            "weight_obs_or_fq_ctr": qconfig.weight,
+            "bias_obs_or_fq_ctr": PlaceholderObserver.with_args(dtype=bias_dtype),
+            "weight_index": weight_index,
+            "bias_index": bias_index,
+            "output_act_obs_or_fq_ctr": qconfig.activation,
+            "reuse_input_obs_or_fq": _is_reuse_input_qconfig(qconfig),
+            "input_output_share_observers": is_general_tensor_value_op,
+            "_is_standalone_module": _is_standalone_module,
+        }
+    return copy.copy(_DEFAULT_FP32_QCONFIG_FOR_TARGET_DTYPE_INFO)
+def _get_output_act_obs_or_fq(
+    arg: Node,
+    named_modules: Dict[str, torch.nn.Module],
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat: bool,
+) -> ObserverOrFakeQuantize:
+    """ Get the constructor for observer or fake quant object for
+    the argument in the original graph as the output of previous node,
+    skipping inserted observers
+    We are assuming that the observers are inserted correctly, and the dtype for
+    argument in quantized graph will match what is specified by the qconfig
+    """
+    assert isinstance(arg, Node)
+    if "quantization_annotation" in arg.meta:
+        return _create_obs_or_fq_from_qspec(arg.meta["quantization_annotation"].output_qspec, obs_or_fq_map, is_qat)
+    # Custom module LSTM output is a tuple that we broke down into the internal nodes in order
+    # to insert DeQuantStubs (see `_insert_dequant_stubs_for_custom_module_lstm_output`).
+    # Since we modified the graph in this case, we must trace back from the args through
+    # the specific nodes we added in order to reach the original LSTM node. Otherwise, we would
+    # not be able to accurately detect whether this node is a consumer of custom module LSTM.
+    custom_module_lstm_node = _maybe_get_custom_module_lstm_from_node_arg(arg, named_modules)
+    output_act_obs_or_fq_ctr = None
+    if custom_module_lstm_node is not None:
+        output_act_obs_or_fq_ctr = custom_module_lstm_node.meta["target_dtype_info"]["output_act_obs_or_fq_ctr"]
+        output_act_obs_or_fq = output_act_obs_or_fq_ctr() if output_act_obs_or_fq_ctr else None
+    elif _is_activation_post_process_node(arg, named_modules):
+        observed_arg = arg.args[0]
+        assert isinstance(observed_arg, Node), "Currently we only support observing Node"
+        if "quantization_annotation" in observed_arg.meta:
+            output_act_obs_or_fq = \
+                _create_obs_or_fq_from_qspec(
+                    observed_arg.meta["quantization_annotation"].output_qspec, obs_or_fq_map, is_qat)
+        else:
+            assert "target_dtype_info" in observed_arg.meta
+            output_act_obs_or_fq_ctr = observed_arg.meta["target_dtype_info"]["output_act_obs_or_fq_ctr"]
+            output_act_obs_or_fq = output_act_obs_or_fq_ctr() if output_act_obs_or_fq_ctr else None
+    else:
+        if "target_dtype_info" in arg.meta:
+            output_act_obs_or_fq_ctr = \
+                arg.meta["target_dtype_info"].get("output_act_obs_or_fq_ctr", _DEFAULT_FP32_OBS_OR_FQ_CTR)
+        else:
+            output_act_obs_or_fq_ctr = _DEFAULT_FP32_OBS_OR_FQ_CTR
+        output_act_obs_or_fq = output_act_obs_or_fq_ctr() if output_act_obs_or_fq_ctr else None
+    return output_act_obs_or_fq
+def _get_arg_target_dtype_as_output(
+    arg: Node,
+    named_modules: Dict[str, torch.nn.Module],
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat: bool,
+) -> Optional[torch.dtype]:
+    arg_as_output_act_obs_or_fq = _get_output_act_obs_or_fq(arg, named_modules, obs_or_fq_map, is_qat)
+    arg_as_output_target_dtype, _ = _get_dtype_and_is_dynamic(arg_as_output_act_obs_or_fq)
+    return arg_as_output_target_dtype
+def _get_arg_as_input_act_obs_or_fq(
+    arg: Node,
+    node: Node,
+    named_modules: Dict[str, torch.nn.Module],
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat: bool,
+) -> Optional[ObserverOrFakeQuantize]:
+    """ Get the observer or fake quant constructor for the Argument `arg`, as input
+    to Node `node`
+    """
+    assert isinstance(arg, Node)
+    # "input_qspec_map" is the more general design we'll use for pt2e path
+    # it is a map from input argument node to observer or fake quant constructor, for example
+    # for the following graph:
+    # x -> conv -> output
+    #
+    # we may annotate conv node like the following:
+    # conv.meta[...] = QuantizationAnnotation("input_qspec_map": {x: MinMaxObserver.with_args(dtype=torch.qint8)}, ...)
+    #
+    if "quantization_annotation" in node.meta:
+        input_qspec_map = node.meta["quantization_annotation"].input_qspec_map
+        input_arg_qspec = _get_qspec_for_arg(arg, input_qspec_map, named_modules)
+        if input_arg_qspec is None:
+            input_arg_obs_or_fq = _DEFAULT_FP32_OBS_OR_FQ_CTR()
+        else:
+            input_arg_obs_or_fq = _create_obs_or_fq_from_qspec(input_arg_qspec, obs_or_fq_map, is_qat)
+        return input_arg_obs_or_fq
+    # we can remove the following path in the future if fx graph mode quantization is
+    # no longer used
+    is_weight = node_arg_is_weight(node, arg)
+    is_bias = node_arg_is_bias(node, arg)
+    is_activation = not is_weight and not is_bias
+    obs_or_fq_ctr = None
+    if is_activation:
+        obs_or_fq_ctr = node.meta["target_dtype_info"].get("input_act_obs_or_fq_ctr", _DEFAULT_FP32_OBS_OR_FQ_CTR)
+    elif is_weight:
+        if node.target not in NON_QUANTIZABLE_WEIGHT_OPS:
+            obs_or_fq_ctr = node.meta["target_dtype_info"].get("weight_obs_or_fq_ctr", _DEFAULT_FP32_OBS_OR_FQ_CTR)
+    else:
+        obs_or_fq_ctr = node.meta["target_dtype_info"].get("bias_obs_or_fq_ctr", _DEFAULT_FP32_OBS_OR_FQ_CTR)
+    return obs_or_fq_ctr() if obs_or_fq_ctr else None
+def _maybe_insert_input_observer_for_arg_or_kwarg(
+    node: Union[Node, Any],
+    arg: Argument,
+    qconfig: QConfigAny,
+    model: torch.nn.Module,
+    named_modules: Dict[str, torch.nn.Module],
+    graph: Graph,
+    qhandler: Optional[QuantizeHandler],
+    prepare_custom_config: PrepareCustomConfig,
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat: bool,
+    backend_config: Optional[BackendConfig] = None,
+) -> Argument:
+    """
+    Given a `node` and an `arg`, inserts an input observer between
+    `node` and `arg` if necessary.
+    """
+    # for ops such as torch.cat([x0, x1]),
+    # traverse through the list
+    if isinstance(arg, (list, tuple)):
+        new_arg_to_return = []
+        for inner_arg in arg:
+            new_inner_arg = _maybe_insert_input_observer_for_arg_or_kwarg(
+                node, inner_arg, qconfig, model, named_modules,
+                graph,
+                qhandler,
+                prepare_custom_config,
+                obs_or_fq_map,
+                is_qat,
+                backend_config)
+            new_arg_to_return.append(new_inner_arg)
+        return type(arg)(new_arg_to_return)
+    if not isinstance(arg, Node):
+        return arg
+    assert isinstance(arg, Node)
+    # default (no observer)
+    new_arg = arg
+    is_standalone_module = qhandler is not None and qhandler.is_standalone_module()
+    # TODO: move this to a separate function
+    if not is_standalone_module:
+        # Note: qconfig can be None in this branch this we are getting act/fq from
+        # node.meta now
+        # regular flow for most nodes, except standalone modules
+        if "quantization_annotation" in node.meta:
+            reuse_input_obs_or_fq = node.meta["quantization_annotation"]._reuse_input_obs_or_fq
+        else:
+            assert "target_dtype_info" in node.meta
+            # TODO: we are assuming "target_dtype_info" exists here, maybe
+            # a default value also need to be provided here
+            target_dtype_info = node.meta["target_dtype_info"]
+            # for nodes that doesn't have `reuse_input_obs_or_fq` configured,
+            # we'll default to False, this makes configuring this field optional for users
+            reuse_input_obs_or_fq = target_dtype_info.get("reuse_input_obs_or_fq", False)
+        arg_as_input_act_obs_or_fq = _get_arg_as_input_act_obs_or_fq(arg, node, named_modules, obs_or_fq_map, is_qat)
+        arg_as_input_target_dtype, arg_as_input_target_is_dynamic = _get_dtype_and_is_dynamic(arg_as_input_act_obs_or_fq)
+        arg_as_output_act_obs_or_fq = _get_output_act_obs_or_fq(arg, named_modules, obs_or_fq_map, is_qat)
+        arg_as_output_target_dtype, arg_as_output_target_is_dynamic = _get_dtype_and_is_dynamic(arg_as_output_act_obs_or_fq)
+        needs_obs_or_fq = _needs_obs_or_fq(
+            arg_as_output_target_dtype,
+            arg_as_output_target_is_dynamic,
+            arg_as_input_target_dtype,
+            arg_as_input_target_is_dynamic,
+            reuse_input_obs_or_fq,
+            is_zeroth_arg=len(node.args) > 0 and arg is node.args[0],
+        )
+    else:
+        assert qconfig is not None
+        # custom flow for standalone modules
+        _, _, sm_prepare_custom_config, _ = \
+            _get_standalone_module_configs(
+                node, named_modules, prepare_custom_config, qconfig, backend_config)
+        sm_input_quantized_idxs = sm_prepare_custom_config.input_quantized_indexes
+        # for args, this is set to the index of the current arg
+        # for kwargs, this is left at None
+        cur_input_idx = None
+        for arg_idx, arg_to_check in enumerate(node.args):
+            if arg_to_check is arg:
+                cur_input_idx = arg_idx
+                break
+        if cur_input_idx is None:
+            needs_obs_or_fq = False
+        else:
+            arg_as_output_target_dtype = _get_arg_target_dtype_as_output(arg, named_modules, obs_or_fq_map, is_qat)
+            arg_as_input_target_dtype = torch.quint8 if cur_input_idx in sm_input_quantized_idxs \
+                else torch.float
+            needs_obs_or_fq = (
+                (arg_as_output_target_dtype != arg_as_input_target_dtype) and
+                (arg_as_input_target_dtype != torch.float)
+            )
+        act_post_process_ctr = qconfig.activation
+        arg_as_input_act_obs_or_fq = act_post_process_ctr() if act_post_process_ctr else None
+    if needs_obs_or_fq:
+        existing_obs_node = None
+        # Before using the new observer, check if an observer
+        # of the correct type already exists. If it does, use it.
+        # This prevents duplicate observer insertions if a node is
+        # used by multiple nodes.
+        # TODO: this is looking into how the value is used in the future
+        # we should remove this
+        # removing this means we insert one observer for each use, even if they
+        # have the same dtype, we can have an extra pass that removes the extra observers
+        for maybe_obs_node in arg.users.keys():
+            if maybe_obs_node.op == 'call_module':
+                maybe_obs_mod = named_modules[maybe_obs_node.target]  # type: ignore[index]
+                if (
+                    type(maybe_obs_mod) == type(arg_as_input_act_obs_or_fq) and
+                    maybe_obs_mod.dtype == arg_as_input_target_dtype  # type: ignore[possibly-undefined]
+                ):
+                    arg_as_input_act_obs_or_fq = maybe_obs_mod  # type: ignore[assignment]
+                    existing_obs_node = maybe_obs_node
+                    break
+        assert arg_as_input_act_obs_or_fq is not None
+        obs_or_fq_map[(arg, node)] = arg_as_input_act_obs_or_fq
+        if existing_obs_node is None:
+            new_obs_node = _insert_obs_or_fq(
+                arg, arg_as_input_act_obs_or_fq, model, named_modules, graph)
+            # override this arg to be the observed arg
+            new_arg = new_obs_node
+        else:
+            new_arg = existing_obs_node
+    return new_arg
+def _maybe_insert_input_observers_for_node(
+    node: Node,
+    qconfig: QConfigAny,
+    model: torch.nn.Module,
+    named_modules: Dict[str, torch.nn.Module],
+    graph: Graph,
+    qhandler: Optional[QuantizeHandler],
+    prepare_custom_config: PrepareCustomConfig,
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat: bool,
+    backend_config: Optional[BackendConfig] = None
+) -> None:
+    """
+    If needed, inserts observers to the input args and kwargs of `node`.
+    Note: modifies `node` inplace.
+    For example, if cur_node needs an observer after prev_node, we change from
+      prev_node -> cur_node
+    To
+      prev_node -> obs -> cur_node
+    Note: backend_config only needed for standalone_module node
+    """
+    # Look through every input arg.  If that arg's target dtype does not
+    # match the current node's target dtype, insert an observer.
+    new_args = []
+    for arg in node.args:
+        new_arg = _maybe_insert_input_observer_for_arg_or_kwarg(
+            node, arg, qconfig, model, named_modules, graph,
+            qhandler,
+            prepare_custom_config,
+            obs_or_fq_map,
+            is_qat,
+            backend_config)
+        new_args.append(new_arg)
+    new_kwargs = {}
+    for k, kwarg in node.kwargs.items():
+        new_kwarg = _maybe_insert_input_observer_for_arg_or_kwarg(
+            node, kwarg, qconfig, model, named_modules, graph,
+            qhandler,
+            prepare_custom_config,
+            obs_or_fq_map,
+            is_qat,
+            backend_config)
+        new_kwargs[k] = new_kwarg
+    # assign the new args and kwargs to the node, inplace
+    node.args = tuple(new_args)
+    node.kwargs = new_kwargs
+def _maybe_insert_input_equalization_observers_for_node(
+    node: Node,
+    equalization_qconfig: Any,
+    model: torch.nn.Module,
+    named_modules: Dict[str, torch.nn.Module],
+    graph: Graph,
+    is_branch: bool,
+) -> None:
+    """
+    If `node` needs to be equalized, find the input/weight observers it needs in
+    `equalization_qconfig`, creates them, and inserts it into `graph`.
+    If `node` does not need an equalization observer, returns None.
+    """
+    if equalization_qconfig is None or not node_supports_equalization(node, named_modules):
+        return
+    if is_branch:
+        warnings.warn(
+            f"Cannot equalize {node} because it is part of a branch."
+        )
+        return
+    new_args = []
+    for arg in node.args:
+        if not isinstance(arg, Node) or node_arg_is_bias(node, arg):
+            new_args.append(arg)
+            continue
+        is_weight = node_arg_is_weight(node, arg)
+        act_eq_process_ctr = equalization_qconfig.weight if is_weight else \
+            equalization_qconfig.input_activation
+        new_eq_obs_mod = act_eq_process_ctr()
+        new_eq_obs_node = _insert_obs_or_fq(
+            arg, new_eq_obs_mod, model, named_modules, graph)
+        new_args.append(new_eq_obs_node)
+    # assign the new args and kwargs to the node, inplace
+    node.args = tuple(new_args)
+def _maybe_insert_output_observer_for_node(
+    node: Node,
+    model: torch.nn.Module,
+    named_modules: Dict[str, torch.nn.Module],
+    graph: Graph,
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat: bool,
+) -> Optional[Node]:
+    """
+    If `node` needs an output observer, creates it, inserts it into `graph`
+    and returns it.
+    If `node` does not need an output observer, returns None.
+    Note: inserting dynamic quantization ops for output is not supported in fx graph mode
+    quantization code path right now
+    """
+    assert node.op != 'output', 'observer insertion for outputs is handled elsewhere'
+    is_standalone_module = False
+    if "quantization_annotation" in node.meta:
+        output_act_obs_or_fq = _create_obs_or_fq_from_qspec(
+            node.meta["quantization_annotation"].output_qspec, obs_or_fq_map, is_qat
+        )
+    else:
+        assert "target_dtype_info" in node.meta
+        is_standalone_module = node.meta["target_dtype_info"].get("_is_standalone_module", False)
+        output_act_obs_or_fq_ctr = node.meta["target_dtype_info"].get("output_act_obs_or_fq_ctr")
+        output_act_obs_or_fq = output_act_obs_or_fq_ctr() if output_act_obs_or_fq_ctr else None
+    target_dtype, target_is_dynamic = _get_dtype_and_is_dynamic(output_act_obs_or_fq)
+    # uncomment after we support reuse_input_obs_or_fq properly by having separate
+    # implemntations for this key instead of reusing the input_output_share_observers
+    # code
+    # reuse_input_obs_or_fq = node.meta["target_dtype_info"].get("reuse_input_obs_or_fq", False)
+    # for now we set this to False since reuse_input_obs_or_fq for
+    # the output of a node is implementation in the same code path as observer sharing,
+    # we should refactor this part to make it clearer in the future
+    # and we would be able to read this from config directly
+    reuse_input_obs_or_fq = False
+    # Note: prev_output_dtype = torch.float and prev_output_is_dynamic=False
+    # because the prev_output is the output of an fp32 op, althought technically
+    # we should get the dtype of the output from node.meta["val"] in the future
+    # if we deprecate fx graph mode quantization
+    needs_obs_or_fq = _needs_obs_or_fq(torch.float, False, target_dtype, target_is_dynamic, reuse_input_obs_or_fq)
+    # currently the activation in QConfig(activation=...,) is for both input
+    # and output, and when the activation is configured to be dynamic quantization
+    # e.g. PlaceholderObserver(dtype=torch.quint8, is_dynamic=True, ...), it means
+    # the input should by dynamically quantized, but output should not be quantized
+    #
+    # there is no way we can specify different observer/fq for input and output
+    # activation through QConfig today, this limitation is lifted in the
+    # quantizer/annotation API in pytorch 2.0 export quantization code path,
+    # but since this code is reused, annotating output to be dynamically quantized
+    # would not work either for that.
+    # we can change QConfig to support input/output activation if we want
+    # to remove the following check, or if we can deprecate fx graph mode quantization
+    if target_is_dynamic:
+        needs_obs_or_fq = False
+    # we never insert observers to output of standalone module, we assume
+    # if needed, they are inserted inside the standalone module
+    needs_obs_or_fq = needs_obs_or_fq and \
+        (not is_standalone_module)
+    if needs_obs_or_fq:
+        obs_or_fq_map[node] = output_act_obs_or_fq
+        return _insert_obs_or_fq(node, output_act_obs_or_fq, model, named_modules, graph)
+    else:
+        return None
+def _maybe_insert_observers_before_graph_output(
+    graph_output_node: Node,
+    model: torch.nn.Module,
+    named_modules: Dict[str, torch.nn.Module],
+    graph: Graph,
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat: bool,
+) -> None:
+    """
+    If the output needs to be quantized and there are any nodes
+    in the output which are not already observed, inserts observers
+    for those nodes.
+    """
+    def _recursive_maybe_replace_node_with_obs(
+        maybe_node: Argument,
+        model: torch.nn.Module,
+        named_modules: Dict[str, torch.nn.Module],
+        graph: Graph,
+    ) -> Argument:
+        """
+        Navigate an arbitrary data structure of lists, tuples, dicts.
+        For each container type, recurse on all inputs. Once any Node
+        is found, insert an observer if needed and do not recurse further.
+        For example, given a structure of
+          {'foo1': [[bar1]], 'foo2': {'foo3': [[[bar3]]]}}
+        we recurse down to bar1 and bar3, observe them if necessary,
+        and if we inserted an observer then replace the original node
+        with its observer.
+        Returns the data structure with all nodes needing observation being
+        replaced by their observers.
+        """
+        if isinstance(maybe_node, Node):
+            # check dtype of this node
+            arg_as_output_target_dtype = _get_arg_target_dtype_as_output(maybe_node, named_modules, obs_or_fq_map, is_qat)
+            observer_mod = None
+            arg_as_input_target_dtype = torch.float
+            if "target_dtype_info" in maybe_node.meta:
+                observer_cls = maybe_node.meta["target_dtype_info"].get("input_act_obs_or_fq_ctr", None)
+                if observer_cls is not None:
+                    observer_mod = observer_cls()
+                    arg_as_input_target_dtype = observer_mod.dtype
+            # TODO: this does not handle dynamic quantization yet
+            need_obs = (
+                arg_as_output_target_dtype != arg_as_input_target_dtype and
+                arg_as_input_target_dtype != torch.float
+            )
+            if need_obs:
+                assert observer_mod is not None
+                # insert observer
+                observer_node = _insert_obs_or_fq(
+                    maybe_node, observer_mod, model, named_modules, graph)
+                return observer_node
+            else:
+                return maybe_node
+        elif isinstance(maybe_node, (list, tuple)):
+            results = []
+            for inner_node in maybe_node:
+                results.append(_recursive_maybe_replace_node_with_obs(
+                    inner_node, model, named_modules, graph))
+            if isinstance(maybe_node, list):
+                return results
+            else:
+                return tuple(results)
+        elif isinstance(maybe_node, dict):
+            results_dict = {}
+            for k, inner_v in maybe_node.items():
+                results_dict[k] = _recursive_maybe_replace_node_with_obs(
+                    inner_v, model, named_modules, graph)
+            return results_dict
+        elif maybe_node is None:
+            return None
+        else:
+            raise Exception("Unhandled type for returned node:", maybe_node)
+    new_args = []
+    for old_arg in graph_output_node.args:
+        new_args.append(
+            _recursive_maybe_replace_node_with_obs(
+                old_arg, model, named_modules, graph))
+    graph_output_node.args = tuple(new_args)  # type: ignore[assignment]
+def _maybe_propagate_dtype_for_node(
+    node: Node,
+    target_dtype: Union[torch.dtype, type],
+    node_name_to_match_result_with_qconfig: Dict[str, _MatchResultWithQConfig],
+) -> None:
+    """
+    Assigns `target_dtype` to `node`, setting `is_dynamic` to False. If `node`
+    is a general tensor shape op, also call this function recursively on
+    the first argument, to propagate the dtype to the caller.
+    """
+    node.meta["target_dtype_info"]["input_act_obs_or_fq_ctr"] = None
+    node.meta["target_dtype_info"]["output_act_obs_or_fq_ctr"] = None
+    # if this is a copy node, propagate to first arg
+    root_node, _, pattern, qhandler, qconfig = node_name_to_match_result_with_qconfig.get(
+        node.name, (None, None, None, None, None))
+    # TODO: probably need to remove `is_general_tensor_value_op`
+    if qhandler is not None and qhandler.is_general_tensor_value_op():
+        prev_node = node.args[0]
+        if isinstance(prev_node, Node):
+            _maybe_propagate_dtype_for_node(
+                prev_node, target_dtype, node_name_to_match_result_with_qconfig)
+def propagate_dtypes_for_known_nodes(
+    graph: Graph,
+    node_name_to_match_result_with_qconfig: Dict[str, _MatchResultWithQConfig],
+) -> None:
+    """
+    Currently we assume that inputs to the graph are either `torch.float` or
+    `torch.quint8`, which is not always correct. For ops such as
+    `x.masked_fill(mask, value)`, we know that the dtype of  `mask` is a
+    `BoolTensor`. Propagate this information throughout the graph.
+    Note: not all dtypes in the graph will be correct after this pass, but a
+    higher percentage of them will be correct. Hopefully in the future we can
+    replace this with a better way to reason about dtypes of tensors.
+    """
+    for node in graph.nodes:
+        non_observable_arg_dict = get_non_observable_arg_indexes_and_types(node)
+        for arg_type in non_observable_arg_dict:
+            non_observable_indices = non_observable_arg_dict[arg_type](node)
+            for index in non_observable_indices:
+                arg = node.args[index]
+                # when an argument is a tuple, it does not show up as another node so we need to go through
+                # all elements of the tuple manually
+                if isinstance(arg, (tuple, list)):
+                    arg_list = list(arg)
+                else:
+                    arg_list = [arg]
+                for cur_arg in arg_list:
+                    # hard coded arguments show up but aren't `Node` typed and do not need dtype propagated
+                    if isinstance(cur_arg, torch.fx.node.Node):
+                        _maybe_propagate_dtype_for_node(
+                            cur_arg, arg_type, node_name_to_match_result_with_qconfig)
+def _maybe_make_input_output_share_observers(
+    node: Node,
+    model: torch.nn.Module,
+    named_modules: Dict[str, torch.nn.Module],
+) -> bool:
+    """
+    Ensures that we share an observer
+    for all input arguments as well as the output argument. In detail, given
+    a graph of
+      x0 -> obs0 -> op -> x2
+                  /
+      x1 -> obs1 /
+    where node obs0 points to observer instance observer0,
+    obs1 points to observer1 and obs2 points to observer2, we make nodes obs1
+    and ob2 point to observer0.
+    Returns: whether the operation succeeded or not
+    """
+    first_arg = None
+    # find the first non-Tensor arg
+    for i in range(len(node.args)):
+        if isinstance(node.args[i], (Node, list, tuple)):
+            first_arg = node.args[i]
+            break
+    # if there is no non-Tensor arg, return directly
+    if first_arg is None:
+        return False
+    if isinstance(first_arg, (list, tuple)):
+        first_arg_arg = first_arg[0]
+    elif isinstance(first_arg, Node):
+        first_arg_arg = first_arg
+    else:
+        return False
+    # if we have a graph such as
+    #   observed_node -> non_observed_node -> cat
+    # we need to navigate up to the first observer
+    iteration_guard = 0
+    while not _is_activation_post_process_node(first_arg_arg, named_modules):
+        if not isinstance(first_arg_arg, Node):
+            return False
+        # did not find an activation_post_process for the op
+        if first_arg_arg.op == "placeholder":
+            return False
+        # trace back the args until we found the first Tensor/Node
+        trace_back_node = None
+        for i in range(len(first_arg_arg.args)):
+            trace_back_node = first_arg_arg.args[i]
+            if isinstance(trace_back_node, Node):
+                break
+        if trace_back_node is None:
+            return False
+        first_arg_arg = trace_back_node
+        iteration_guard += 1
+        if iteration_guard > 10000:
+            raise AssertionError('Unable to find observer of previous node')
+    assert isinstance(first_arg_arg, Node)
+    target_to_use = first_arg_arg.target
+    assert isinstance(target_to_use, str)
+    obs_mod_to_use = named_modules[target_to_use]
+    if isinstance(first_arg, (list, tuple)):
+        # set all other input observer nodes to use that module
+        for input_idx, input_arg in enumerate(first_arg):
+            if input_idx == 0:
+                continue
+            iteration_guard = 0
+            while not _is_activation_post_process_node(input_arg, named_modules):
+                # failed to trace back since no input arg for the current node
+                if len(input_arg.args) < 1:
+                    return False
+                input_arg = input_arg.args[0]
+                iteration_guard += 1
+                if iteration_guard > 10000:
+                    raise AssertionError('Unable to find observer of previous node')
+            parent_name, name = _parent_name(input_arg.target)
+            setattr(named_modules[parent_name], name, obs_mod_to_use)
+    # set the output observer node to use that module
+    for output_obs_node in node.users.keys():
+        assert _is_activation_post_process_node(output_obs_node, named_modules)
+        parent_name, name = _parent_name(output_obs_node.target)
+        setattr(named_modules[parent_name], name, obs_mod_to_use)
+    # TODO(future PR): delete the orphaned observer modules
+    return True
+def _remove_output_observer(
+        node: Node,
+        model: torch.nn.Module,
+        named_modules: Dict[str, torch.nn.Module]):
+    items = list(node.users.items())
+    for output_obs_node, _ in items:
+        assert _is_activation_post_process_node(output_obs_node, named_modules)
+        output_obs_node.replace_all_uses_with(node)
+        model.graph.erase_node(output_obs_node)  # type: ignore[union-attr, operator]
+def _swap_custom_module_to_observed(
+        node: Node,
+        qconfig: QConfigAny,
+        named_modules: Dict[str, torch.nn.Module],
+        prepare_custom_config: PrepareCustomConfig):
+    custom_module = named_modules[node.target]  # type: ignore[index]
+    custom_module_class_mapping = prepare_custom_config.float_to_observed_mapping
+    observed_custom_module_class = \
+        get_swapped_custom_module_class(
+            custom_module, custom_module_class_mapping, qconfig)
+    observed_custom_module = \
+        observed_custom_module_class.from_float(custom_module)
+    parent_name, name = _parent_name(node.target)
+    setattr(named_modules[parent_name], name, observed_custom_module)
+def insert_observers_for_model(
+    model: GraphModule,
+    node_name_to_match_result_with_qconfig: Dict[str, _MatchResultWithQConfig],
+    node_name_to_qconfig: Dict[str, QConfigAny],
+    prepare_custom_config: PrepareCustomConfig,
+    equalization_config_map: Dict[str, Any],
+    backend_config: BackendConfig,
+    observed_node_names: Set[str],
+    is_qat: bool,
+) -> Optional[Node]:
+    """
+    Inserts observers, using the following high level algorithm:
+    For each node in the graph:
+      1. determine the target dtype of this node in the quantized graph, and save
+           it for future steps
+      2. determine the target dtype or all args and kwargs of this node
+      3. if any arg or kwarg's target dtype does not match the current node's
+           dtype, insert an observer
+      4. if the current node needs an output observer, insert it
+    For example:
+    - starting graph:
+        x0 -> linear -> x1
+    - observed graph after processing x0:
+        x0(fp32)
+    - observed graph after processing linear:
+        x0(fp32) -> x0_obs0(int8) -> linear(int8) -> linear_obs0(int8)
+    - observed graph after processing x1:
+        x0(fp32) -> x0_obs0(int8) -> linear(int8) -> linear_obs0(int8) -> x1
+    After a node is processed, the naive observer placement is guaranteed to be
+    complete for that node and all of its predecessors. There can be future
+    passes which optimize the graph by deduplicating observers, etc.
+    """
+    # node.meta["target_dtype_info"] stores the target dtype information
+    # that's derived from qconfig for the Node, for example, if we have
+    # a conv2d node that has a qconfig
+    # qconfig = QConfig(activation=..., weight=...)
+    # # information for input and bias node omitted
+    # # for getattr node
+    # # weight = getattr(self, 'weight')
+    # weight.meta["target_dtype_info"] = {
+    #    'output_act_obs_or_fq_ctr': qconfig.weight,
+    # }
+    # # for conv2d node
+    # # conv2d = call_function[target=torch.nn.functional.conv2d](
+    # #            args=(input, weight, bias))
+    # conv2d.meta["target_dtype_info"] = {
+    #   'input_act_obs_or_fq_ctr': qconfig.activation
+    #   'weight_obs_or_fq_ctr': qconfig.weight,
+    #   'bias_obs_or_fq_ctr': PlaceholderObserver.with_args(dtype=torch.float32),
+    #   'output_act_obs_or_fq_ctr': qconfig.activation,
+    # }
+    #
+    cache_for_no_tensor_check: Dict[Node, bool] = {}
+    # first, populate the dtype map based only on qconfig and qhandler
+    # this assumes:
+    # graph inputs are fp32 by default, and int8 where overriden
+    # other nodes output dtype is specified by the qconfig
+    named_modules = dict(model.named_modules(remove_duplicate=False))
+    input_quantized_idxs: List[int] = prepare_custom_config.input_quantized_indexes
+    output_quantized_idxs: List[int] = prepare_custom_config.output_quantized_indexes
+    processed_nodes: Set[Node] = set()
+    # initialize target_dtype_info
+    for node in model.graph.nodes:
+        node.meta["target_dtype_info"] = copy.copy(_DEFAULT_FP32_QCONFIG_FOR_TARGET_DTYPE_INFO)
+    inputs_seen_counter = 0
+    outputs_seen_counter = 0
+    placeholder_node_to_input_index: Dict[Node, int] = {}
+    # TODO: we probably don't need this counter since each graph will only have
+    # one output node?
+    output_node_to_output_index: Dict[Node, int] = {}
+    for node in model.graph.nodes:
+        if node.op == "placeholder":
+            placeholder_node_to_input_index[node] = inputs_seen_counter
+            inputs_seen_counter += 1
+        if node.op == "output":
+            output_node_to_output_index[node] = outputs_seen_counter
+            outputs_seen_counter += 1
+    # Step 1, set the observer or fake quantize module constructor for each node in the
+    # matched_node_pattern
+    for match_res_with_qconfig in node_name_to_match_result_with_qconfig.values():
+        last_node, matched_node_pattern, pattern, qhandler, qconfig = match_res_with_qconfig
+        assert qhandler is not None
+        _set_target_dtype_info_for_matched_node_pattern(
+            matched_node_pattern,
+            last_node,
+            qconfig,
+            qhandler,
+            backend_config,
+            named_modules,
+            cache_for_no_tensor_check,
+            processed_nodes
+        )
+    # Step 2. Special cases for some operators, we might be able to remove them
+    # in the future if we know dtype information of each node better
+    # Step 2.1. some settings are not based on patterns, we need to process each node
+    # instead
+    for node in model.graph.nodes:
+        if node.op == "placeholder" and placeholder_node_to_input_index[node] in input_quantized_idxs:
+            # users are not supposed to call calculate_qparams on PlaceholderObserver, and
+            # this is OK because we are using this as a way to encode the dtypes of input
+            # tensor, we won't actually insert these observers in the graph and won't
+            # actually call calculate_qparams
+            node.meta["target_dtype_info"] = copy.copy(_DEFAULT_QUINT8_QCONFIG_FOR_TARGET_DTYPE_INFO)
+        elif node.op in ("call_module", "call_method", "call_function"):
+            args_have_no_tensors = \
+                all_node_args_have_no_tensors(
+                    node, named_modules, cache_for_no_tensor_check)
+            if args_have_no_tensors:
+                node.meta["target_dtype_info"] = {
+                    "input_act_obs_or_fq_ctr": None,
+                    "output_act_obs_or_fq_ctr": None,
+                }
+        elif node.op == "output" and output_node_to_output_index[node] in output_quantized_idxs:
+            # TODO(future PR): update the output_quantized_idxs API to match
+            # arbitrary data structures. There is always a single output, and
+            # that output can have arbitrary nesting of values. List[int] is
+            # not the right data type for this.
+            # TODO(future PR): support more dtypes in model outputs, if necessary
+            node.meta["target_dtype_info"] = copy.copy(_DEFAULT_QUINT8_QCONFIG_FOR_TARGET_DTYPE_INFO)
+    # Step 2.2, for nodes with known input dtypes, propagate them throughout the
+    # graph. For example, if there is a call such as
+    #   x1 = x0.masked_fill(mask, 1)
+    # we propagate the type of mask to be torch.bool
+    propagate_dtypes_for_known_nodes(model.graph, node_name_to_match_result_with_qconfig)
+    # Step 3, check if the requested target_dtype_info is supported by backend or not
+    # if not, we'll reset the target_dtye_info to use the default (float Tensor)
+    # reset the counters and set of processed_nodes
+    processed_nodes: Set[Node] = set()
+    for match_res_with_qconfig in node_name_to_match_result_with_qconfig.values():
+        last_node, matched_node_pattern, pattern, qhandler, qconfig = match_res_with_qconfig
+        is_supported_by_backend = _is_pattern_dtype_config_and_qconfig_supported_by_backend(
+            pattern, matched_node_pattern, qconfig, backend_config)
+        assert qhandler is not None
+        # get output_act_dtype so that we don't also reset the special typed nodes
+        # TODO: we might want to handle these more uniformly with the default path
+        # this can be improved if we can use node.meta["val"]
+        output_act_or_fq_ctr = node.meta["target_dtype_info"]["output_act_obs_or_fq_ctr"]
+        output_act_or_fq = output_act_or_fq_ctr() if output_act_or_fq_ctr else None
+        output_act_dtype, _ = _get_dtype_and_is_dynamic(output_act_or_fq)
+        if not is_supported_by_backend and output_act_dtype not in [None, int, float, torch.bool]:
+            # restore target_dtype_info to default if it is not supported by backend
+            _set_target_dtype_info_for_matched_node_pattern(
+                matched_node_pattern,
+                last_node,
+                torch.ao.quantization.qconfig._default_fp32_placeholder_qconfig,
+                None,
+                backend_config,
+                named_modules,
+                cache_for_no_tensor_check,
+                processed_nodes
+            )
+    # After this point, the current node and all of its arguments
+    # have a target_dtype_info assigned. Now, we insert observers for inputs
+    # of this node (if needed for this node), and the output of this node
+    # (if needed for this node).
+    # Since we are mutating the graph as we go, we iterate over the original
+    # nodes before observer insertion, instead of model.graph.nodes.
+    nodes_before_observation = list(model.graph.nodes)
+    # Avoid duplicates custom module swaps for multiple nodes with same target.
+    custom_module_names_already_swapped: Set[str] = set()
+    # TODO: reuse placeholder_node_to_input_index and output_node_to_output_index
+    # reset inputs/outputs counters
+    inputs_seen_counter = 0
+    outputs_seen_counter = 0
+    results_node = None
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize] = {}
+    # TODO: change this to insert obs/fq by pattern instead of by node
+    for node in nodes_before_observation:
+        if node.op == 'placeholder':
+            # if a graph input is in fp32, it does not need observation
+            # if a graph input is in int8, we assume the observation happens
+            #   outside of the graph, and no additional observation is needed
+            pass
+        elif node.op in ('call_module', 'call_method', 'call_function', 'output'):
+            # check for matches
+            last_node, matched_node_pattern, pattern, qhandler, qconfig = (
+                node_name_to_match_result_with_qconfig.get(node.name, (None, None, None, None, None))  # type: ignore[assignment]
+            )
+            equalization_qconfig = equalization_config_map.get(node.name, None)
+            this_node_dtype_info = node.meta["target_dtype_info"]
+            if "val" in node.meta:
+                output_is_a_tensor = (
+                    this_node_dtype_info is not None and
+                    isinstance(node.meta["val"], FakeTensor)
+                )
+            else:
+                output_is_a_tensor = this_node_dtype_info is not None
+            skip_inserting_observers = (
+                (qconfig is None) or
+                not output_is_a_tensor
+            ) and (
+                not node.op == 'output'
+            )
+            # TODO: take a closer look to see if we can remove this check
+            # right now it is here because of `observed_node_names`, we are using
+            # it as an indicator for swapping the modules to reference modules in
+            # convert
+            is_supported_by_backend = _is_pattern_dtype_config_and_qconfig_supported_by_backend(
+                pattern, matched_node_pattern, qconfig, backend_config)
+            if not skip_inserting_observers and is_supported_by_backend:
+                named_modules = dict(model.named_modules(remove_duplicate=False))
+                if node.op != 'output':
+                    assert matched_node_pattern is not None
+                    # add matched nodes to the observed node name set
+                    _add_matched_node_name_to_set(matched_node_pattern, observed_node_names)
+                    # This is currently only used for equalization.
+                    # Checks if the current node is in a branch in which the two
+                    # first layers are both being quantized.
+                    #
+                    # ex.       conv2
+                    #         /
+                    #      x -> conv1
+                    #
+                    # If this is the case, we will not apply equalization to the
+                    # initial two layers.
+                    is_quantized_branch = False
+                    if (
+                        len(node.args) > 0 and
+                        isinstance(node.args[0], Node) and
+                        len(node.args[0].users) > 1
+                    ):
+                        for user in node.args[0].users:
+                            # Checks if there exists another user being quantized
+                            is_user_quantized = (
+                                node_name_to_qconfig.get(user.name, None) is not None or
+                                (user.op == 'call_module' and isinstance(named_modules[str(user.target)], ObserverBase))
+                            )
+                            if user != node and is_user_quantized:
+                                is_quantized_branch = True
+                    pattern_to_root_node_getter = get_fusion_pattern_to_root_node_getter(backend_config)
+                    root_node_getter = pattern_to_root_node_getter.get(pattern, _default_root_node_getter)
+                    root_node = root_node_getter(matched_node_pattern)
+                    is_input_node_of_the_pattern = node is root_node
+                    if is_input_node_of_the_pattern:
+                        # this modifies node inplace
+                        _maybe_insert_input_observers_for_node(
+                            node, qconfig, model, named_modules, model.graph,
+                            qhandler,
+                            prepare_custom_config,
+                            obs_or_fq_map,
+                            is_qat,
+                            backend_config)
+                        # insert equalization input observers if needed
+                        _maybe_insert_input_equalization_observers_for_node(
+                            node, equalization_qconfig, model, named_modules, model.graph,
+                            is_quantized_branch)
+                    is_last_node_of_pattern = node is last_node
+                    input_output_share_observers = node.meta["target_dtype_info"].get("input_output_share_observers", False)
+                    reuse_input_obs_or_fq = node.meta["target_dtype_info"].get("reuse_input_obs_or_fq", False)
+                    if is_last_node_of_pattern:
+                        if _is_custom_module_lstm(node, named_modules, qconfig, qhandler):
+                            # Currently custom module outputs are assumed to be already quantized,
+                            # so we need to insert a DeQuantStub after the output. For custom module
+                            # LSTM specifically, the outputs are also a nested tuple, so we must first
+                            # break down the tuple to insert DeQuantStubs after the internal nodes.
+                            # TODO: This currently diverges from how custom modules are handled today,
+                            # where we insert observers after the output instead of DeQuantStubs, and
+                            # replace these observers with "dequantize" nodes during convert. Conceptually,
+                            # these output observers are the same as DeQuantStubs. In the future, we
+                            # should resolve this inconsistency by inserting DeQuantStubs for all custom
+                            # modules, not just for LSTM.
+                            _insert_dequant_stubs_for_custom_module_lstm_output(node, model, named_modules, model.graph)
+                            if node.target not in custom_module_names_already_swapped:
+                                custom_module_names_already_swapped.add(node.target)
+                                _swap_custom_module_to_observed(node, qconfig, named_modules, prepare_custom_config)
+                        else:
+                            # this returns the new observer node if it was needed
+                            maybe_output_obs_node = _maybe_insert_output_observer_for_node(
+                                node, model, named_modules, model.graph, obs_or_fq_map, is_qat)
+                            if maybe_output_obs_node is not None:
+                                # Update users of original node to use the output observer
+                                # instead. For example, change
+                                #
+                                #           next_node
+                                #          /
+                                #   cur_node -> obs
+                                #
+                                # to
+                                #
+                                #                 next_node
+                                #                 /
+                                #   cur_node -> obs
+                                #
+                                # We need to save orig users before updating uses because
+                                # the list of users will change as we update uses
+                                orig_users = list(node.users.keys())
+                                for user_node in orig_users:
+                                    if user_node is maybe_output_obs_node:
+                                        continue
+                                    user_node.replace_input_with(node, maybe_output_obs_node)
+                                _is_observer_in_same_graph_ = _is_observer_in_same_graph(
+                                    node, named_modules, obs_or_fq_map, is_qat)
+                                # for ops whose inputs and outputs share observer/fqs, we modify the graph
+                                # to make all inputs and outputs use the first input's
+                                # observer/fq
+                                if (input_output_share_observers and _is_observer_in_same_graph_) or \
+                                        reuse_input_obs_or_fq:
+                                    if not _maybe_make_input_output_share_observers(node, model, named_modules):
+                                        _remove_output_observer(node, model, named_modules)
+                                if qhandler is not None and qhandler.is_custom_module():
+                                    if node.target not in custom_module_names_already_swapped:
+                                        custom_module_names_already_swapped.add(node.target)
+                                        _swap_custom_module_to_observed(node, qconfig, named_modules, prepare_custom_config)
+                else:  # output
+                    _maybe_insert_observers_before_graph_output(node, model, named_modules, model.graph, obs_or_fq_map, is_qat)
+        #
+        # After this point, the current node has input and output observers
+        # that it needs for itself inserted.
+        #
+        # increment the counters, so future inputs and outputs are assigned
+        # correct dtypes
+        if node.op == 'placeholder':
+            inputs_seen_counter += 1
+        elif node.op == 'output':
+            outputs_seen_counter += 1
+            results_node = node
+    return results_node
+def _run_prepare_fx_on_standalone_modules(
+    model: torch.nn.Module,
+    is_qat: bool,
+    named_modules: Dict[str, torch.nn.Module],
+    node_name_to_match_result_with_qconfig: Any,
+    prepare_custom_config: PrepareCustomConfig,
+    backend_config: BackendConfig,
+) -> None:
+    """
+    Runs prepare_fx on each standalone module. Note: this does
+    not modify the graph, it just replaces the unobserved modules with
+    their observed versions.
+    """
+    for (root_node, _, pattern, qhandler, qconfig) in node_name_to_match_result_with_qconfig.values():
+        if qhandler is None:
+            continue
+        elif not qhandler.is_standalone_module():
+            continue
+        sm_qconfig_mapping, sm_example_inputs, sm_prepare_custom_config, \
+            sm_backend_config = _get_standalone_module_configs(
+                root_node, named_modules, prepare_custom_config, qconfig, backend_config)
+        standalone_module = named_modules[root_node.target]
+        prepare = \
+            torch.ao.quantization.quantize_fx._prepare_standalone_module_fx  # type: ignore[attr-defined]
+        observed_standalone_module = \
+            prepare(
+                standalone_module,
+                sm_qconfig_mapping,
+                is_qat,
+                example_inputs=sm_example_inputs,
+                prepare_custom_config=sm_prepare_custom_config,
+                backend_config=sm_backend_config)
+        parent_name, name = _parent_name(root_node.target)
+        setattr(named_modules[parent_name], name, observed_standalone_module)
+        named_modules[root_node.target] = observed_standalone_module
+def _save_state(
+    observed: GraphModule,
+    node_name_to_qconfig: Dict[str, QConfigAny],
+    node_name_to_scope: Dict[str, Tuple[str, type]],
+    prepare_custom_config: PrepareCustomConfig,
+    equalization_node_name_to_qconfig: Dict[str, Any],
+    qconfig_mapping: QConfigMapping,
+    is_qat: bool,
+    observed_node_names: Set[str],
+) -> None:
+    observed.meta["_observed_graph_module_attrs"] = (
+        ObservedGraphModuleAttrs(
+            node_name_to_qconfig=node_name_to_qconfig,
+            node_name_to_scope=node_name_to_scope,
+            prepare_custom_config=prepare_custom_config,
+            equalization_node_name_to_qconfig=equalization_node_name_to_qconfig,
+            qconfig_mapping=qconfig_mapping,
+            is_qat=is_qat,
+            observed_node_names=observed_node_names,
+        )
+    )
+def prepare(
+        model: GraphModule,
+        qconfig_mapping: Union[QConfigMapping, Dict[str, Any]],
+        is_qat: bool,
+        node_name_to_scope: Dict[str, Tuple[str, type]],
+        example_inputs: Tuple[Any, ...],
+        prepare_custom_config: Union[PrepareCustomConfig, Dict[str, Any], None] = None,
+        _equalization_config: Union[QConfigMapping, Dict[str, Any], None] = None,
+        backend_config: Union[BackendConfig, Dict[str, Any], None] = None,
+        is_standalone_module: bool = False) -> GraphModule:
+    """ standalone_module means it a submodule that is not inlined in
+    parent module, and will be quantized separately as one unit.
+    How the standalone module is observed is specified by `input_quantized_idxs` and
+    `output_quantized_idxs` in the prepare_custom_config for the standalone module
+    Args:
+        node_name_to_scope: mapping from node name to the scope of the module which contains the node.
+        The scope is a tuple of fully qualified path of the module and the type of the module
+    Returns:
+        model(GraphModule): prepared standalone module
+        attributes related to standalone module
+        in model.meta["_observed_graph_module_attrs"]:
+            is_observed_standalone_module (bool): boolean value that shows whether the
+            current model is a observed standalone module or not
+            standalone_module_input_quantized_idxs(List[Int]): a list of
+                indexes for the graph input that is expected to be quantized,
+                same as input_quantized_idxs configuration provided
+                for the standalone module
+            standalone_module_output_quantized_idxs(List[Int]): a list of
+                indexs for the graph output that is quantized
+                same as input_quantized_idxs configuration provided
+                for the standalone module
+    """
+    if prepare_custom_config is None:
+        prepare_custom_config = PrepareCustomConfig()
+    if _equalization_config is None:
+        _equalization_config = QConfigMapping()
+    if isinstance(qconfig_mapping, Dict):
+        warnings.warn(
+            "Passing a QConfig dictionary to prepare is deprecated and will not be supported "
+            "in a future version. Please pass in a QConfigMapping instead.")
+        qconfig_mapping = QConfigMapping.from_dict(qconfig_mapping)
+    if isinstance(_equalization_config, Dict):
+        warnings.warn(
+            "Passing a QConfig dictionary to prepare for equalization is deprecated and will not "
+            "be supported in a future version. Please pass in a QConfigMapping instead.")
+        _equalization_config = QConfigMapping.from_dict(_equalization_config)
+    if isinstance(prepare_custom_config, Dict):
+        warnings.warn(
+            "Passing a prepare_custom_config_dict to prepare is deprecated and will not be supported "
+            "in a future version. Please pass in a PrepareCustomConfig instead.")
+        prepare_custom_config = PrepareCustomConfig.from_dict(prepare_custom_config)
+    if isinstance(backend_config, Dict):
+        warnings.warn(
+            "Passing a backend_config_dict to prepare is deprecated and will not be supported "
+            "in a future version. Please pass in a BackendConfig instead.")
+        backend_config = BackendConfig.from_dict(backend_config)
+    assert isinstance(qconfig_mapping, QConfigMapping)
+    assert isinstance(_equalization_config, QConfigMapping)
+    qconfig_mapping = copy.deepcopy(qconfig_mapping)
+    _equalization_config = copy.deepcopy(_equalization_config)
+    # mapping from a tuple of nodes in reverse order to uninitialized
+    #   QuantizeHandler subclass. For example,
+    # {
+    #   # match a single node
+    #   (<class 'torch.nn.modules.conv.Conv3d'>:
+    #     <class 'torch.ao.quantization.fx.quantize.ConvRelu'>),
+    #   # match multiple nodes in reverse order
+    #   ((<function relu at 0x7f766a7360d0>, <built-in function add>):
+    #     <class 'torch.ao.quantization.fx.quantize.Add'>),
+    # }
+    pattern_to_quantize_handler: Dict[Pattern, QuantizeHandler] = {}
+    if backend_config is None:
+        backend_config = get_native_backend_config()
+    pattern_to_quantize_handler = _get_pattern_to_quantize_handlers(backend_config)
+    pattern_to_quantize_handler = _sorted_patterns_dict(pattern_to_quantize_handler)
+    root_node_getter_mapping = \
+        get_fusion_pattern_to_root_node_getter(backend_config)
+    _update_qconfig_for_fusion(model, qconfig_mapping)
+    _update_qconfig_for_fusion(model, _equalization_config)
+    flattened_qconfig_dict = _get_flattened_qconfig_dict(qconfig_mapping)
+    # TODO: support regex as well
+    propagate_qconfig_(model, flattened_qconfig_dict, prepare_custom_config.to_dict())
+    if is_qat:
+        module_to_qat_module = get_module_to_qat_module(backend_config)
+        _qat_swap_modules(model, module_to_qat_module)
+        _update_qconfig_for_qat(qconfig_mapping, backend_config)
+    # mapping from fully qualified module name to module instance
+    # for example,
+    # {
+    #   '': Model(...),
+    #   'linear': Linear(...),
+    #   'linear.weight_fake_quant': PerChannelMinMaxObserver(...),
+    # }
+    named_modules = dict(model.named_modules(remove_duplicate=False))
+    # fill node_name_to_qconfig, a map from node name to qconfig, used in _find_matches
+    equalization_node_name_to_qconfig = _generate_node_name_to_qconfig(
+        model, named_modules, model.graph, _equalization_config, node_name_to_scope)
+    node_name_to_qconfig = _generate_node_name_to_qconfig(model, named_modules, model.graph, qconfig_mapping, node_name_to_scope)
+    # match the patterns that will get quantized
+    standalone_module_names = list(prepare_custom_config.standalone_module_names.keys())
+    standalone_module_classes = list(prepare_custom_config.standalone_module_classes.keys())
+    custom_module_classes = get_custom_module_class_keys(prepare_custom_config.float_to_observed_mapping)
+    matches_without_qconfig = _find_matches(
+        model.graph, named_modules, pattern_to_quantize_handler, root_node_getter_mapping,
+        standalone_module_names, standalone_module_classes, custom_module_classes)
+    # map qconfig instances to matches
+    node_name_to_match_result_with_qconfig = {}
+    for node_name, match_without_qconfig in matches_without_qconfig.items():
+        match_with_qconfig = (*match_without_qconfig, node_name_to_qconfig[node_name])
+        node_name_to_match_result_with_qconfig[node_name] = match_with_qconfig
+    _run_prepare_fx_on_standalone_modules(
+        model, is_qat, named_modules, node_name_to_match_result_with_qconfig, prepare_custom_config, backend_config)
+    # record names for the set of observed node, so that in convert step
+    # we know whether we need to convert a floating point module to reference
+    # quantized module or not
+    observed_node_names: Set[str] = set()
+    result_node = insert_observers_for_model(
+        model,
+        node_name_to_match_result_with_qconfig,
+        node_name_to_qconfig,
+        prepare_custom_config,
+        equalization_node_name_to_qconfig,
+        backend_config,
+        observed_node_names,
+        is_qat,
+    )
+    model = GraphModule(model, model.graph)
+    _save_state(model, node_name_to_qconfig, node_name_to_scope,
+                prepare_custom_config, equalization_node_name_to_qconfig,
+                qconfig_mapping, is_qat, observed_node_names)
+    if is_standalone_module:
+        assert result_node is not None
+        assert isinstance(result_node.args[0], Node), \
+            "standalone module only supports returning simple value currently"\
+            "(not tuple, dict etc.)"
+        # these inputs are observed in parent
+        # converting List[int] to Tensor since module attribute is
+        # Union[Tensor, Module]
+        input_quantized_idxs: List[int] = prepare_custom_config.input_quantized_indexes
+        output_quantized_idxs: List[int] = prepare_custom_config.output_quantized_indexes
+        observed_graph_module_attrs = model.meta["_observed_graph_module_attrs"]
+        # inplace modification
+        observed_graph_module_attrs.is_observed_standalone_module = True
+        observed_graph_module_attrs.standalone_module_input_quantized_idxs = \
+            input_quantized_idxs
+        observed_graph_module_attrs.standalone_module_output_quantized_idxs = \
+            output_quantized_idxs
+    return model

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/__pycache__/duplicate_dq_pass.cpython-311.pyc ADDED Viewed

Binary file (4.62 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/__pycache__/prepare.cpython-311.pyc ADDED Viewed

Binary file (18.4 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/__pycache__/qat_utils.cpython-311.pyc ADDED Viewed

Binary file (38 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/__pycache__/utils.cpython-311.pyc ADDED Viewed

Binary file (26.8 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/duplicate_dq_pass.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import logging
+import operator
+import torch
+from torch.ao.quantization.pt2e.utils import (
+    _filter_sym_size_users,
+    _is_valid_annotation,
+)
+from torch.fx.node import map_arg
+from torch.fx.passes.infra.pass_base import PassBase, PassResult
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.WARNING)
+__all__ = ["DuplicateDQPass"]
+_QUANTIZE_OPS = [
+    torch.ops.quantized_decomposed.quantize_per_tensor.default,
+    torch.ops.quantized_decomposed.quantize_per_tensor.tensor,
+    torch.ops.quantized_decomposed.quantize_per_channel.default,
+]
+_DEQUANTIZE_OPS = [
+    torch.ops.quantized_decomposed.dequantize_per_tensor.default,
+    torch.ops.quantized_decomposed.dequantize_per_tensor.tensor,
+    torch.ops.quantized_decomposed.dequantize_per_channel.default,
+]
+def _maybe_duplicate_dq(
+    gm: torch.fx.GraphModule, dq_node: torch.fx.Node, user: torch.fx.Node
+):
+    annotation = user.meta.get("quantization_annotation", None)
+    if not _is_valid_annotation(annotation):
+        return
+    with gm.graph.inserting_after(dq_node):
+        new_node = gm.graph.node_copy(dq_node)
+        def maybe_replace_node(n: torch.fx.Node) -> torch.fx.Node:
+            if n == dq_node:
+                return new_node
+            else:
+                return n
+        new_args = map_arg(user.args, maybe_replace_node)
+        new_kwargs = map_arg(user.kwargs, maybe_replace_node)
+        user.args = new_args
+        user.kwargs = new_kwargs
+class DuplicateDQPass(PassBase):
+    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
+        for node in graph_module.graph.nodes:
+            if node.op == "call_function" and node.target in _DEQUANTIZE_OPS:
+                dq_users = _filter_sym_size_users(node)
+                if len(dq_users) <= 1:
+                    continue
+                # Do not duplicate dq for dynamic quantization
+                # Pattern: choose_qparam - getitem - q - dq
+                q_node = node.args[0]
+                if q_node.op == "call_function" and q_node.target in _QUANTIZE_OPS:
+                    getitem_node = q_node.args[1]
+                    if (
+                        isinstance(getitem_node, torch.fx.node.Node)
+                        and getitem_node.op == "call_function"
+                        and getitem_node.target == operator.getitem
+                    ):
+                        choose_qparam_node = getitem_node.args[0]
+                        if (
+                            isinstance(choose_qparam_node, torch.fx.node.Node)
+                            and choose_qparam_node.op == "call_function"
+                            and choose_qparam_node.target
+                            == torch.ops.quantized_decomposed.choose_qparams.tensor
+                        ):
+                            continue
+                for user in dq_users:
+                    _maybe_duplicate_dq(graph_module, node, user)
+        graph_module.graph.eliminate_dead_code()
+        graph_module.recompile()
+        return PassResult(graph_module, True)

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/export_utils.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import types
+import torch
+import torch.nn.functional as F
+__all__ = [
+    "model_is_exported",
+    "_WrapperModule",
+]
+class _WrapperModule(torch.nn.Module):
+    """Class to wrap a callable in an :class:`torch.nn.Module`. Use this if you
+    are trying to export a callable.
+    """
+    def __init__(self, fn):
+        super().__init__()
+        self.fn = fn
+    def forward(self, *args, **kwargs):
+        """Simple forward that just calls the ``fn`` provided to :meth:`WrapperModule.__init__`."""
+        return self.fn(*args, **kwargs)
+def model_is_exported(m: torch.nn.Module) -> bool:
+    """
+    Return True if the `torch.nn.Module` was exported, False otherwise
+    (e.g. if the model was FX symbolically traced or not traced at all).
+    """
+    return isinstance(m, torch.fx.GraphModule) and any(
+        "val" in n.meta for n in m.graph.nodes
+    )
+def _replace_dropout(m: torch.fx.GraphModule, train_to_eval: bool):
+    """
+    Switch dropout patterns in the model between train and eval modes.
+    Dropout has different behavior in train vs eval mode. For exported models,
+    however, calling `model.train()` or `model.eval()` does not automatically switch
+    the dropout behavior between the two modes, so here we need to rewrite the aten
+    dropout patterns manually to achieve the same effect.
+    See https://github.com/pytorch/pytorch/issues/103681.
+    """
+    # Avoid circular dependencies
+    from .utils import get_aten_graph_module
+    # Needed to ensure subgraph matches are self-contained
+    m.graph.eliminate_dead_code()
+    m.recompile()
+    for inplace in [False, True]:
+        def dropout_train(x):
+            return F.dropout(x, p=0.5, training=True, inplace=inplace)
+        def dropout_eval(x):
+            return F.dropout(x, p=0.5, training=False, inplace=inplace)
+        example_inputs = (torch.randn(1),)
+        if train_to_eval:
+            match_pattern = get_aten_graph_module(
+                _WrapperModule(dropout_train), example_inputs
+            )
+            replacement_pattern = get_aten_graph_module(
+                _WrapperModule(dropout_eval), example_inputs
+            )
+        else:
+            match_pattern = get_aten_graph_module(
+                _WrapperModule(dropout_eval), example_inputs
+            )
+            replacement_pattern = get_aten_graph_module(
+                _WrapperModule(dropout_train), example_inputs
+            )
+        from torch.fx.subgraph_rewriter import replace_pattern_with_filters
+        replace_pattern_with_filters(
+            m,
+            match_pattern,
+            replacement_pattern,
+            match_filters=[],
+            ignore_literals=True,
+        )
+        m.recompile()
+def _replace_batchnorm(m: torch.fx.GraphModule, train_to_eval: bool):
+    """
+    Switch batchnorm patterns in the model between train and eval modes.
+    Batchnorm has different behavior in train vs eval mode. For exported models,
+    however, calling `model.train()` or `model.eval()` does not automatically switch
+    the batchnorm behavior between the two modes, so here we need to rewrite the aten
+    batchnorm patterns manually to achieve the same effect.
+    """
+    # TODO(Leslie): This function still fails to support custom momentum and eps value.
+    # Enable this support in future updates.
+    # Avoid circular dependencies
+    from .utils import get_aten_graph_module
+    # Needed to ensure subgraph matches are self-contained
+    m.graph.eliminate_dead_code()
+    m.recompile()
+    def bn_train(
+        x: torch.Tensor,
+        bn_weight: torch.Tensor,
+        bn_bias: torch.Tensor,
+        bn_running_mean: torch.Tensor,
+        bn_running_var: torch.Tensor,
+    ):
+        return F.batch_norm(
+            x, bn_running_mean, bn_running_var, bn_weight, bn_bias, training=True
+        )
+    def bn_eval(
+        x: torch.Tensor,
+        bn_weight: torch.Tensor,
+        bn_bias: torch.Tensor,
+        bn_running_mean: torch.Tensor,
+        bn_running_var: torch.Tensor,
+    ):
+        return F.batch_norm(
+            x, bn_running_mean, bn_running_var, bn_weight, bn_bias, training=False
+        )
+    example_inputs = (
+        torch.randn(1, 1, 3, 3),  # x
+        torch.randn(1),  # bn_weight
+        torch.randn(1),  # bn_bias
+        torch.randn(1),  # bn_running_mean
+        torch.randn(1),  # bn_running_var
+    )
+    if train_to_eval:
+        match_pattern = get_aten_graph_module(_WrapperModule(bn_train), example_inputs)
+        replacement_pattern = get_aten_graph_module(
+            _WrapperModule(bn_eval), example_inputs
+        )
+    else:
+        match_pattern = get_aten_graph_module(_WrapperModule(bn_eval), example_inputs)
+        replacement_pattern = get_aten_graph_module(
+            _WrapperModule(bn_train), example_inputs
+        )
+    from torch.fx.subgraph_rewriter import replace_pattern_with_filters
+    replace_pattern_with_filters(
+        m,
+        match_pattern,
+        replacement_pattern,
+        match_filters=[],
+        ignore_literals=True,
+    )
+    m.recompile()
+# TODO: expose these under this namespace?
+def _move_exported_model_to_eval(model: torch.fx.GraphModule):
+    """
+    Move an exported GraphModule to eval mode.
+    This is equivalent to model.eval() but only for certain special ops like dropout, batchnorm.
+    QAT users should call this before performing inference on the model.
+    """
+    _replace_dropout(model, train_to_eval=True)
+    _replace_batchnorm(model, train_to_eval=True)
+    return model
+def _move_exported_model_to_train(model: torch.fx.GraphModule):
+    """
+    Move an exported GraphModule to train mode.
+    This is equivalent to model.train() but only for certain special ops like dropout, batchnorm.
+    QAT users should call this before performing training on the model.
+    """
+    _replace_dropout(model, train_to_eval=False)
+    _replace_batchnorm(model, train_to_eval=False)
+    return model
+def _allow_exported_model_train_eval(model: torch.fx.GraphModule):
+    """
+    Allow users to call `model.train()` and `model.eval()` on an exported model,
+    but with the effect of changing behavior between the two modes limited to special
+    ops only, which are currently dropout and batchnorm.
+    Note: This does not achieve the same effect as what `model.train()` and `model.eval()`
+    does in eager models, but only provides an approximation. In particular, user code
+    branching on `training` flag will not function correctly in general because the branch
+    is already specialized at export time. Additionally, other ops beyond dropout and batchnorm
+    that have different train/eval behavior will also not be converted properly.
+    """
+    def _train(self, mode: bool = True):
+        if mode:
+            _move_exported_model_to_train(self)
+        else:
+            _move_exported_model_to_eval(self)
+    def _eval(self):
+        _move_exported_model_to_eval(self)
+    model.train = types.MethodType(_train, model)  # type: ignore[method-assign]
+    model.eval = types.MethodType(_eval, model)  # type: ignore[method-assign]
+    return model

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/prepare.py ADDED Viewed

	@@ -0,0 +1,489 @@

+import torch
+from torch._subclasses import FakeTensor
+from torch.ao.quantization.fx.prepare import (
+    _insert_obs_or_fq,
+    _save_state,
+    _is_activation_post_process_node,
+    _create_obs_or_fq_from_qspec,
+)
+from torch.fx import (
+    GraphModule,
+    Graph,
+    Node,
+)
+from torch.fx.node import Argument
+from torch.ao.quantization import QConfigMapping
+from torch.ao.quantization.qconfig import QConfigAny
+from torch.ao.quantization.fx.custom_config import PrepareCustomConfig
+from typing import Dict, Tuple, Union, Any, Optional
+from torch.ao.quantization.quantizer import (
+    EdgeOrNode,
+    SharedQuantizationSpec,
+    QuantizationSpecBase,
+)
+from torch.ao.quantization import ObserverOrFakeQuantize
+# TODO: make pt2e folder private?
+__all__ = [
+    "prepare",
+]
+def _find_root_edge_or_node(edge_or_node: EdgeOrNode, shared_with_map: Dict[EdgeOrNode, EdgeOrNode]) -> EdgeOrNode:
+    """Find the root node for the sharing tree
+    Args:
+        edge_or_node: edge/node that we want to find the root
+        shared_with_map: each edge/node points to the parent, the root node will points to itself
+    Returns:
+        root edge/node
+    """
+    parent = shared_with_map[edge_or_node]
+    if parent == edge_or_node:
+        return edge_or_node
+    root = _find_root_edge_or_node(parent, shared_with_map)
+    # path compression
+    shared_with_map[edge_or_node] = root
+    return root
+def _union(parent: EdgeOrNode, child: EdgeOrNode, shared_with_map: Dict[EdgeOrNode, EdgeOrNode]) -> None:
+    """Merge the subtree for `child` with `parent`, the order is important here
+    """
+    root_parent = _find_root_edge_or_node(parent, shared_with_map)
+    root_child = _find_root_edge_or_node(child, shared_with_map)
+    # union the two trees by pointing the root of child to root of parent
+    shared_with_map[root_child] = root_parent
+def _update_shared_with(child: EdgeOrNode, qspec: QuantizationSpecBase, shared_with_map: Dict[EdgeOrNode, EdgeOrNode]):
+    """Update the `shared_with_map` based on the qspec, this applies the `SharedQuantizationSpec`
+    configuration and established the relationship between `edge_or_node` with the edge/node that it
+    is pointing to, we'll use this information in the end to get the group id
+    """
+    if isinstance(qspec, SharedQuantizationSpec):
+        parent = qspec.edge_or_node
+        # we point from edge_or_node to the node that it is sharing_with, e.g.
+        # qspec for a = SharedQuantizationSpec(b) means `a` points to `b`
+        _union(parent, child, shared_with_map)
+def _unwrap_shared_qspec(
+    qspec: QuantizationSpecBase,
+    edge_or_node_to_qspec: Dict[EdgeOrNode, QuantizationSpecBase],
+    shared_with_map: Dict[EdgeOrNode, EdgeOrNode]
+) -> QuantizationSpecBase:
+    """Unwraps qspec to get the final root qspec (non SharedQuantizationSpec)
+    if qspec is SharedQuantizationSpec
+       (1). tries to find the root edge or node for the node that the qspec points to
+       (2). recursively find the root qspec based on the qspec for the root node
+    """
+    if isinstance(qspec, SharedQuantizationSpec):
+        sharing_with = qspec.edge_or_node
+        root = _find_root_edge_or_node(sharing_with, shared_with_map)
+        qspec = edge_or_node_to_qspec[root]
+        return _unwrap_shared_qspec(qspec, edge_or_node_to_qspec, shared_with_map)
+    return qspec
+def _has_same_dtype(qspec_a: QuantizationSpecBase, qspec_b: QuantizationSpecBase):
+    return (
+        hasattr(qspec_a, "dtype") and
+        hasattr(qspec_b, "dtype") and
+        qspec_a.dtype == qspec_b.dtype
+    )
+def _has_same_is_dynamic(qspec_a: QuantizationSpecBase, qspec_b: QuantizationSpecBase):
+    return (
+        hasattr(qspec_a, "is_dynamic") and
+        hasattr(qspec_b, "is_dynamic") and
+        qspec_a.is_dynamic == qspec_b.is_dynamic
+    )
+def _get_edge_or_node_to_qspec(model: torch.fx.GraphModule) -> Dict[EdgeOrNode, QuantizationSpecBase]:
+    """Get a map from EdgeOrNode to quantization spec based on annotations on the nodes
+    """
+    edge_or_node_to_qspec: Dict[EdgeOrNode, QuantizationSpecBase] = {}
+    for n in model.graph.nodes:
+        if hasattr(n, "meta") and "quantization_annotation" in n.meta:
+            qa = n.meta["quantization_annotation"]
+            for input_to_n, qspec in qa.input_qspec_map.items():
+                input_edge = (input_to_n, n)
+                edge_or_node_to_qspec[input_edge] = qspec
+            if qa.output_qspec is not None:
+                output_node = n
+                qspec = qa.output_qspec
+                edge_or_node_to_qspec[output_node] = qspec
+    return edge_or_node_to_qspec
+def _union_input_edge_with(input_edge, input_edge_root_qspec, edge_or_node, edge_or_node_to_qspec, shared_with_map):
+    """Union input edge with another edge or node, used in implicit sharing to point the current input
+    edge to other user edges of the producer node, or the output of producer node since these are
+    referring to the same Tensor
+    """
+    root_qspec = None
+    if edge_or_node in edge_or_node_to_qspec:
+        qspec = edge_or_node_to_qspec[edge_or_node]
+        root_qspec = _unwrap_shared_qspec(qspec, edge_or_node_to_qspec, shared_with_map)
+    # TODO: add assertions for types of root qspecs
+    if (
+        root_qspec is not None and
+        _has_same_dtype(root_qspec, input_edge_root_qspec) and
+        _has_same_is_dynamic(root_qspec, input_edge_root_qspec)
+    ):
+        # the input arg to the node should reuse the existing output observer for arg
+        # since dtype is the same (we may want to extend this to be a more strict check
+        # in the future)
+        # so we point from `input_edge` to `arg` (output of the argument)
+        _union(edge_or_node, input_edge, shared_with_map)
+def _get_edge_or_node_to_group_id(edge_or_node_to_qspec: Dict[EdgeOrNode, QuantizationSpecBase]) -> Dict[EdgeOrNode, int]:
+    """Map from edge/node to the group ID, generated from quantization annotations,
+    edge/node with the same group ID should use the same observer/fake_quant instance
+    This is applying SharedQuantizationSpec configuration and map each edge/node to a group
+    There is another implicit sharing that's built in the quantization, when we have the following:
+       * op1 -> op2
+       * output of op1: int8_qspec
+       * (op1 -> op2) input edge: int8_qspec
+    we'll assume sharing between the output of op1 and input of (op1 -> op2) since these are the same Tensor.
+    Figuring out the correct group ID for all edge/node is a standard union find problem:
+    https://www.geeksforgeeks.org/introduction-to-disjoint-set-data-structure-or-union-find-algorithm/
+    Args:
+        edge_or_node_to_qspec: Dictionary from edge_or_node to the qspec, derived from annotations
+    Returns:
+        edge_or_node_to_group_id: Dictionary from edge_or_node to group_id (int), all edge or node that
+        belongs to the same group should have the same id
+    Example:
+        op2 -> cat1 -> cat2
+           op1 /        /
+                     op3
+        edge_or_node_to_qspec: {
+            op1: int8_qspec,
+            op2: int8_qspec,
+            (op1, cat1): int8_qspc,
+            (op2, cat1): SharedQuantizationSpec((op1, cat1)),
+            cat1: SharedQuantizationSpec((op1, cat1)),
+            (op3, cat2): int8_qspec,
+            (cat1, cat2): SharedQuantizationSpec((op3, cat2)),
+            cat2: SharedQuantizationSpec((op3, cat2)),
+        }
+        edge_or_node_to_group_id = _get_edge_or_node_to_group_id(edge_or_node_to_qspec)
+        edge_or_node_to_group_id: {
+            op1: 1,
+            op2: 1,
+            (op1, cat1): 1,
+            (op2, cat1): 1,
+            cat1: 1,
+            (op3, cat2): 1,
+            (cat1, cat2): 1,
+            cat2: 1,
+        }
+        # everything are in the same group because (cat1) and (cat1, cat2) are implicitly shared, which
+        # connects the two sharing group around cat1 and cat2 op due to transitive sharing
+    """
+    # means the observer of key should be shared with observer with value, by default it will
+    # be shared with itself
+    shared_with_map: Dict[EdgeOrNode, EdgeOrNode] = {k: k for k in edge_or_node_to_qspec.keys()}
+    for edge_or_node, qspec in edge_or_node_to_qspec.items():
+        if isinstance(edge_or_node, torch.fx.Node):
+            output_node = edge_or_node
+            _update_shared_with(output_node, qspec, shared_with_map)
+        else:
+            input_edge = edge_or_node
+            input_edge_root_qspec = _unwrap_shared_qspec(qspec, edge_or_node_to_qspec, shared_with_map)
+            assert isinstance(input_edge, tuple)
+            arg, n = input_edge
+            if n.meta["quantization_annotation"].allow_implicit_sharing:
+                # NOTE: the order is important here, we first share with other users and then share with previous
+                # output because the reverse order could cause circular dependency
+                # e.g node1 -> node2
+                #          \ -> node3
+                # when processing (node1, node2), if we first point (node1, node2) to node1
+                # Step 1. shared_map = {(node1, node2): node1}
+                # Step 2. after that, we point the (node1, node2) to its other user (node1, node3) ,
+                # which means shared_map = {(node1, node2): node1, node1: (node1, node3)}
+                # because we will point the root of (node1, node2) (in this case node1) to the root of (node1, node3)
+                # Step 3. and when we process (node1, node3), it can try to point to node1 as well, then we'll
+                # have a circular dependency
+                # the following order works around this issue, but this does not allow arbitrary configuration
+                # of sharing so it might break in a different case in the future, when it breaks
+                # quantizer writer can check the notes here to debug the issue
+                # sharing with other users of the producer node
+                # (arg, user)
+                if not isinstance(arg, Node) or not isinstance(n, Node):
+                    raise Exception(f"Expected input_edge to have type Tuple[Node, Node], but got: {arg, n}")
+                for user in arg.users:
+                    if user is n:
+                        continue
+                    arg_to_user_edge = (arg, user)
+                    _union_input_edge_with(
+                        input_edge,
+                        input_edge_root_qspec,
+                        arg_to_user_edge,
+                        edge_or_node_to_qspec,
+                        shared_with_map
+                    )
+                # sharing with output of producer node
+                _union_input_edge_with(input_edge, input_edge_root_qspec, arg, edge_or_node_to_qspec, shared_with_map)
+            _update_shared_with(input_edge, qspec, shared_with_map)
+    # now that we get the sharing relations between all edges and nodes, we can assingn group ids
+    cur_group_id = 0
+    edge_or_node_to_group_id: Dict[EdgeOrNode, int] = {}
+    for edge_or_node in shared_with_map.keys():
+        root = _find_root_edge_or_node(edge_or_node, shared_with_map)
+        if root not in edge_or_node_to_group_id:
+            edge_or_node_to_group_id[root] = cur_group_id
+            cur_group_id += 1
+        edge_or_node_to_group_id[edge_or_node] = edge_or_node_to_group_id[root]
+    return edge_or_node_to_group_id
+def _get_obs_or_fq_map(
+    edge_or_node_to_group_id: Dict[EdgeOrNode, int],
+    edge_or_node_to_qspec: Dict[EdgeOrNode, QuantizationSpecBase],
+    is_qat: bool
+) -> Dict[EdgeOrNode, ObserverOrFakeQuantize]:
+    """Generates the EdgeOrNode to observer/fake_quant instances
+    Makes sure that for EdgeOrNode that has the same group_id should have the same observer or fake quant
+    instances
+    """
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize] = {}
+    group_id_to_obs_or_fq: Dict[int, ObserverOrFakeQuantize] = {}
+    for edge_or_node, qspec in edge_or_node_to_qspec.items():
+        group_id = edge_or_node_to_group_id[edge_or_node]
+        if group_id not in group_id_to_obs_or_fq:
+            # TODO: maybe edge_or_node_to_qspec should be edge_or_node_to_root_qspec, this will simplify
+            # the implementation for _create_obs_or_fq_from_qspec
+            group_id_to_obs_or_fq[group_id] = _create_obs_or_fq_from_qspec(qspec, obs_or_fq_map, is_qat)
+        obs_or_fq_map[edge_or_node] = group_id_to_obs_or_fq[group_id]
+    return obs_or_fq_map
+def _maybe_insert_input_observer_for_arg_or_kwarg(
+    node: Union[Node, Any],
+    arg: Argument,
+    qconfig: QConfigAny,
+    model: torch.nn.Module,
+    named_modules: Dict[str, torch.nn.Module],
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat: bool,
+) -> Argument:
+    """
+    Given a `node` and an `arg`, inserts an input observer between
+    `node` and `arg` if necessary.
+    """
+    # for ops such as torch.cat([x0, x1]),
+    # traverse through the list
+    if isinstance(arg, (list, tuple)):
+        new_arg_to_return = []
+        for inner_arg in arg:
+            new_inner_arg = _maybe_insert_input_observer_for_arg_or_kwarg(
+                node, inner_arg, qconfig, model, named_modules, obs_or_fq_map, is_qat,
+            )
+            new_arg_to_return.append(new_inner_arg)
+        return type(arg)(new_arg_to_return)
+    if not isinstance(arg, Node):
+        return arg
+    assert isinstance(arg, Node)
+    # default (no observer)
+    new_arg = arg
+    # find the original `arg` node to the current node, skipping inserted observer/fake_quant nodes
+    original_arg = arg
+    while _is_activation_post_process_node(original_arg, named_modules):
+        original_arg = original_arg.args[0]  # type: ignore[assignment]
+    assert isinstance(original_arg, Node), f"expect original argument to be a Node, but got: {type(original_arg)}"
+    input_edge = (original_arg, node)
+    if input_edge not in obs_or_fq_map:
+        return new_arg
+    # input_edge needs to be observed
+    input_edge_obs_or_fq = obs_or_fq_map[input_edge]
+    if input_edge_obs_or_fq is None:
+        return new_arg
+    arg_as_output_obs_or_fq = obs_or_fq_map.get(original_arg, None)
+    # the arg is observed as the output and is using the same instance as the input_edge
+    # we'll reuse the inserted observer/fake_quant
+    if arg_as_output_obs_or_fq is not None and id(arg_as_output_obs_or_fq) == id(input_edge_obs_or_fq):
+        return new_arg
+    # otherwise, we'll insert a new observer/fake_quant node
+    existing_obs_node = None
+    # skip inserting new observers if the same observer instance is inserted before for another user
+    # Example:
+    # conv1 -> obs1 -> existing_obs -> conv2
+    #             \ -> conv3
+    #
+    # instead of inserting new observers we will have:
+    # conv1 -> obs1 -> existing_obs -> conv2
+    #                            \ -> conv3
+    for maybe_obs_node in arg.users.keys():
+        if not _is_activation_post_process_node(maybe_obs_node, named_modules):
+            continue
+        maybe_obs_mod = named_modules[maybe_obs_node.target]  # type: ignore[index]
+        if id(maybe_obs_mod) == id(input_edge_obs_or_fq):
+            return maybe_obs_node
+    new_arg = _insert_obs_or_fq(arg, input_edge_obs_or_fq, model, named_modules, model.graph)
+    return new_arg
+def _maybe_insert_input_observers_for_node(
+    node: Node,
+    qconfig: QConfigAny,
+    model: torch.nn.Module,
+    named_modules: Dict[str, torch.nn.Module],
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat: bool,
+) -> None:
+    """
+    If needed, inserts observers to the input args and kwargs of `node`.
+    Note: modifies `node` inplace.
+    For example, if cur_node needs an observer after prev_node, we change from
+      prev_node -> cur_node
+    To
+      prev_node -> obs -> cur_node
+    """
+    # Look through every input arg.  If that arg's target dtype does not
+    # match the current node's target dtype, insert an observer.
+    new_args = []
+    # map from old arg to new arg, used for updating the numeric debug handle map
+    remap = {}
+    for arg in node.args:
+        new_arg = _maybe_insert_input_observer_for_arg_or_kwarg(
+            node, arg, qconfig, model, named_modules, obs_or_fq_map, is_qat,
+        )
+        new_args.append(new_arg)
+        remap[arg] = new_arg
+    if "numeric_debug_handle" in node.meta:
+        def remap_fn(x):
+            return remap.get(x, x)
+        numeric_debug_handle = node.meta["numeric_debug_handle"]
+        node.meta["numeric_debug_handle"] = {remap_fn(k): v for k, v in numeric_debug_handle.items()}
+    # Clone has a memory_format kwarg and zeros_like has a pin_memory kwarg
+    # that persist in exported graph. This is just a work around for these.
+    assert (
+        node.target == torch.ops.aten.clone.default or
+        node.target == torch.ops.aten.zeros_like.default or
+        len(node.kwargs) == 0
+    ), " expecting kwargs for aten op IR to be empty"
+    # assign the new args to the node, inplace
+    node.args = tuple(new_args)
+def _maybe_insert_output_observer_for_node(
+    node: Node,
+    model: torch.nn.Module,
+    named_modules: Dict[str, torch.nn.Module],
+    graph: Graph,
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat: bool,
+) -> Optional[Node]:
+    if node in obs_or_fq_map:
+        output_act_obs_or_fq = obs_or_fq_map[node]
+        return _insert_obs_or_fq(node, output_act_obs_or_fq, model, named_modules, graph)
+    return None
+def _maybe_insert_input_and_output_observers_for_node(
+    node: Node,
+    model: torch.fx.GraphModule,
+    obs_or_fq_map: Dict[EdgeOrNode, ObserverOrFakeQuantize],
+    is_qat: bool,
+):
+    this_node_quantization_annotation = node.meta["quantization_annotation"] if "quantization_annotation" in node.meta else None
+    if this_node_quantization_annotation is None:
+        return
+    named_modules = dict(model.named_modules(remove_duplicate=False))
+    _maybe_insert_input_observers_for_node(
+        node,
+        None,  # qconfig
+        model,
+        named_modules,
+        obs_or_fq_map,
+        is_qat,
+    )
+    output_is_a_tensor = "val" in node.meta and isinstance(node.meta["val"], FakeTensor)
+    if not output_is_a_tensor:
+        return
+    # this returns the new observer node if it was needed
+    maybe_output_obs_node = _maybe_insert_output_observer_for_node(
+        node, model, named_modules, model.graph, obs_or_fq_map, is_qat)
+    if maybe_output_obs_node is None:
+        return
+    # Update users of original node to use the output observer
+    # instead. For example, change
+    #
+    #           next_node
+    #          /
+    #   cur_node -> obs
+    #
+    # to
+    #
+    #                 next_node
+    #                 /
+    #   cur_node -> obs
+    #
+    # We need to save orig users before updating uses because
+    # the list of users will change as we update uses
+    orig_users = list(node.users.keys())
+    for user_node in orig_users:
+        if user_node is maybe_output_obs_node:
+            continue
+        user_node.replace_input_with(node, maybe_output_obs_node)
+def prepare(
+    model: GraphModule,
+    node_name_to_scope: Dict[str, Tuple[str, type]],
+    is_qat: bool,
+) -> GraphModule:
+    # Since we are mutating the graph as we go, we iterate over the original
+    # nodes before observer insertion, instead of model.graph.nodes.
+    nodes_before_observation = list(model.graph.nodes)
+    # At the high level we construct a map from EdgeOrNode to a observer_or_fake_quant instance
+    # all edge/nodes that belongs to the same group will use the same instance
+    # and when we insert observers we'll just query this map to get the correct observer_or_fake_quant
+    # instance
+    edge_or_node_to_qspec = _get_edge_or_node_to_qspec(model)
+    edge_or_node_to_group_id = _get_edge_or_node_to_group_id(edge_or_node_to_qspec)
+    obs_or_fq_map = _get_obs_or_fq_map(edge_or_node_to_group_id, edge_or_node_to_qspec, is_qat)
+    for node in nodes_before_observation:
+        # TODO: simplify logic for inserting observers
+        _maybe_insert_input_and_output_observers_for_node(node, model, obs_or_fq_map, is_qat)
+    model = GraphModule(model, model.graph)
+    _save_state(
+        model,
+        {},  # node_name_to_qconfig
+        node_name_to_scope,
+        PrepareCustomConfig(),
+        {},  # equalization_node_name_to_qconfig
+        QConfigMapping(),
+        is_qat,
+        set()  # observed_node_names
+    )
+    return model

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/pt2e/representation/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .rewrite import reference_representation_rewrite
+__all__ = [
+    "reference_representation_rewrite",
+]

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/qconfig_mapping.py ADDED Viewed

	@@ -0,0 +1,350 @@

+from __future__ import annotations
+from collections import OrderedDict
+from typing import Any, Callable, Dict, Tuple, Union, List
+import torch
+from .fake_quantize import (
+    default_weight_fake_quant,
+    FixedQParamsFakeQuantize,
+)
+from .observer import (
+    _PartialWrapper,
+    default_fixed_qparams_range_0to1_observer,
+    default_fixed_qparams_range_neg1to1_observer,
+    default_placeholder_observer,
+    default_weight_observer,
+)
+from .qconfig import (
+    default_reuse_input_qconfig,
+    default_symmetric_qnnpack_qconfig,
+    default_symmetric_qnnpack_qat_qconfig,
+    get_default_qconfig,
+    get_default_qat_qconfig,
+    QConfig,
+    QConfigAny,
+    default_quint8_weight_qconfig
+)
+__all__ = [
+    "get_default_qconfig_mapping",
+    "get_default_qat_qconfig_mapping",
+    "QConfigMapping",
+]
+# TODO: replace all usages with these constants
+_GLOBAL_DICT_KEY = ""
+_OBJECT_TYPE_DICT_KEY = "object_type"
+_MODULE_NAME_REGEX_DICT_KEY = "module_name_regex"
+_MODULE_NAME_DICT_KEY = "module_name"
+_MODULE_NAME_OBJECT_TYPE_ORDER_DICT_KEY = "module_name_object_type_order"
+# TODO: derive this map from the BackendConfig
+_FIXED_QPARAMS_OP_TO_OBSERVER: Dict[Union[Callable, str], _PartialWrapper] = {
+    torch.nn.Hardsigmoid: default_fixed_qparams_range_0to1_observer,
+    torch.nn.functional.hardsigmoid: default_fixed_qparams_range_0to1_observer,
+    "hardsigmoid": default_fixed_qparams_range_0to1_observer,
+    "hardsigmoid_": default_fixed_qparams_range_0to1_observer,
+    torch.nn.Sigmoid: default_fixed_qparams_range_0to1_observer,
+    torch.sigmoid: default_fixed_qparams_range_0to1_observer,
+    "sigmoid": default_fixed_qparams_range_0to1_observer,
+    "sigmoid_": default_fixed_qparams_range_0to1_observer,
+    torch.nn.Softmax: default_fixed_qparams_range_0to1_observer,
+    torch.nn.Tanh: default_fixed_qparams_range_neg1to1_observer,
+    torch.tanh: default_fixed_qparams_range_neg1to1_observer,
+    "tanh": default_fixed_qparams_range_neg1to1_observer,
+    "tanh_": default_fixed_qparams_range_neg1to1_observer,
+}
+def _get_default_qconfig_mapping(is_qat: bool, backend: str, version: int) -> QConfigMapping:
+    """
+    Return the default QConfigMapping for the given quantization type and backend.
+    """
+    if is_qat:
+        qconfig = get_default_qat_qconfig(backend, version)
+    else:
+        qconfig = get_default_qconfig(backend, version)
+    default_weight = default_weight_fake_quant if is_qat else default_weight_observer
+    # default_per_channel_weight_observer is not currently compatible with fbgemm backend
+    # so we have to modify the weight observer to default_weight_observer or another
+    # per tensor supported observer.
+    # see https://github.com/pytorch/pytorch/issues/47535
+    if backend in ("fbgemm", "x86"):
+        qconfig_transpose = QConfig(activation=qconfig.activation, weight=default_weight)
+    else:
+        qconfig_transpose = qconfig
+    # currently layernorm only supports float weights
+    # we have to add this because otherwise there will be a extra quantize-dequantize pair
+    qconfig_layernorm = QConfig(activation=qconfig.activation, weight=default_placeholder_observer)
+    qconfig_mapping = QConfigMapping() \
+        .set_global(qconfig) \
+        .set_object_type("reshape", default_reuse_input_qconfig) \
+        .set_object_type(torch.nn.ConvTranspose1d, qconfig_transpose) \
+        .set_object_type(torch.nn.ConvTranspose2d, qconfig_transpose) \
+        .set_object_type(torch.nn.ConvTranspose3d, qconfig_transpose) \
+        .set_object_type(torch.nn.functional.conv_transpose1d, qconfig_transpose) \
+        .set_object_type(torch.nn.functional.conv_transpose2d, qconfig_transpose) \
+        .set_object_type(torch.nn.functional.conv_transpose3d, qconfig_transpose) \
+        .set_object_type(torch.nn.functional.layer_norm, qconfig_layernorm) \
+        .set_object_type(torch.nn.LayerNorm, qconfig_layernorm) \
+        .set_object_type(torch.nn.PReLU, default_quint8_weight_qconfig) \
+    # Use special observers for ops with fixed qparams
+    fixed_qparams_observer_to_qconfig: Dict[Any, QConfigAny] = {}
+    for fixed_qparams_op, observer in _FIXED_QPARAMS_OP_TO_OBSERVER.items():
+        if observer in fixed_qparams_observer_to_qconfig:
+            fixed_qparams_qconfig = fixed_qparams_observer_to_qconfig[observer]
+        else:
+            if is_qat:
+                activation = FixedQParamsFakeQuantize.with_args(observer=observer)
+            else:
+                activation = observer
+            fixed_qparams_qconfig = QConfig(activation=activation, weight=default_weight)
+            fixed_qparams_observer_to_qconfig[observer] = fixed_qparams_qconfig
+        qconfig_mapping.set_object_type(fixed_qparams_op, fixed_qparams_qconfig)
+    # TODO Currently it's required that separate ops in a fused op/module have the same qconfig.
+    #      Need to be able to support fusion of ops with different qconfigs
+    return qconfig_mapping
+def get_default_qconfig_mapping(backend="x86", version=0) -> QConfigMapping:
+    """
+    Return the default QConfigMapping for post training quantization.
+    Args:
+      * ``backend`` (str) : the quantization backend for the default qconfig mapping, should be
+         one of ["x86" (default), "fbgemm", "qnnpack", "onednn"]
+      * ``version`` (int) : the version for the default qconfig mapping
+    """
+    # TODO: add assert for backend choices
+    return _get_default_qconfig_mapping(False, backend, version)
+def get_default_qat_qconfig_mapping(backend="x86", version=1) -> QConfigMapping:
+    """
+    Return the default QConfigMapping for quantization aware training.
+    Args:
+      * ``backend`` (str) : the quantization backend for the default qconfig mapping, should be
+         one of ["x86" (default), "fbgemm", "qnnpack", "onednn"]
+      * ``version`` (int) : the version for the default qconfig mapping
+    """
+    return _get_default_qconfig_mapping(True, backend, version)
+def _get_symmetric_qnnpack_qconfig_mapping() -> QConfigMapping:
+    """
+    Return a QConfigMapping that uses `torch.ao.quantization.default_symmetric_qnnpack_qconfig`
+    as the default QConfig.
+    """
+    default_qconfig = default_symmetric_qnnpack_qconfig
+    return _get_default_qconfig_mapping_with_default_qconfig(False, "qnnpack", default_qconfig)
+def _get_symmetric_qnnpack_qat_qconfig_mapping() -> QConfigMapping:
+    """
+    Return a QConfigMapping that uses `torch.ao.quantization.default_symmetric_qnnpack_qat_qconfig`
+    as the default QConfig.
+    """
+    default_qconfig = default_symmetric_qnnpack_qat_qconfig
+    return _get_default_qconfig_mapping_with_default_qconfig(True, "qnnpack", default_qconfig)
+def _get_default_qconfig_mapping_with_default_qconfig(
+    is_qat: bool,
+    backend: str,
+    default_qconfig: QConfig,
+) -> QConfigMapping:
+    """
+    Return a QConfigMapping that uses the provided qconfig as the default QConfig.
+    """
+    if is_qat:
+        qconfig_mapping = get_default_qat_qconfig_mapping(backend)
+    else:
+        qconfig_mapping = get_default_qconfig_mapping(backend)
+    qconfig_mapping.set_global(default_qconfig)
+    for pattern in qconfig_mapping.object_type_qconfigs.keys():
+        if pattern not in _FIXED_QPARAMS_OP_TO_OBSERVER:
+            qconfig_mapping.set_object_type(pattern, default_qconfig)
+    return qconfig_mapping
+_QCONFIG_STYLE_ORDER: List[str] = [
+    "global_qconfig",
+    "object_type_qconfigs",
+    "module_name_regex_qconfigs",
+    "module_name_qconfigs",
+    "module_name_object_type_order_qconfigs",
+]
+class QConfigMapping:
+    """
+    Mapping from model ops to :class:`torch.ao.quantization.QConfig` s.
+    The user can specify QConfigs using the following methods (in increasing match priority):
+        ``set_global`` : sets the global (default) QConfig
+        ``set_object_type`` : sets the QConfig for a given module type, function, or method name
+        ``set_module_name_regex`` : sets the QConfig for modules matching the given regex string
+        ``set_module_name`` : sets the QConfig for modules matching the given module name
+        ``set_module_name_object_type_order`` : sets the QConfig for modules matching a combination
+        of the given module name, object type, and the index at which the module appears
+    Example usage::
+        qconfig_mapping = QConfigMapping()
+            .set_global(global_qconfig)
+            .set_object_type(torch.nn.Linear, qconfig1)
+            .set_object_type(torch.nn.ReLU, qconfig1)
+            .set_module_name_regex("foo.*bar.*conv[0-9]+", qconfig1)
+            .set_module_name_regex("foo.*", qconfig2)
+            .set_module_name("module1", qconfig1)
+            .set_module_name("module2", qconfig2)
+            .set_module_name_object_type_order("foo.bar", torch.nn.functional.linear, 0, qconfig3)
+    """
+    def __init__(self):
+        # In increasing match priority:
+        self.global_qconfig: QConfigAny = None
+        self.object_type_qconfigs: OrderedDict[Union[Callable, str], QConfigAny] = OrderedDict()
+        self.module_name_regex_qconfigs: OrderedDict[str, QConfigAny] = OrderedDict()
+        self.module_name_qconfigs: OrderedDict[str, QConfigAny] = OrderedDict()
+        self.module_name_object_type_order_qconfigs: OrderedDict[Tuple[str, Callable, int], QConfigAny] =\
+            OrderedDict()
+    def set_global(self, global_qconfig: QConfigAny) -> QConfigMapping:
+        """
+        Set the global (default) QConfig.
+        """
+        self.global_qconfig = global_qconfig
+        return self
+    def set_object_type(self, object_type: Union[Callable, str], qconfig: QConfigAny) -> QConfigMapping:
+        """
+        Set the QConfig for a given module type, function, or method name.
+        If the QConfig for an existing object type was already set, the new QConfig will override the old one.
+        """
+        self.object_type_qconfigs[object_type] = qconfig
+        return self
+    def set_module_name_regex(self, module_name_regex: str, qconfig: QConfigAny) -> QConfigMapping:
+        """
+        Set the QConfig for modules matching the given regex string.
+        Regexes will be matched in the order in which they are registered through this method.
+        Thus, the caller should register more specific patterns first, e.g.::
+            qconfig_mapping = QConfigMapping()
+                .set_module_name_regex("foo.*bar.*conv[0-9]+", qconfig1)
+                .set_module_name_regex("foo.*bar.*", qconfig2)
+                .set_module_name_regex("foo.*", qconfig3)
+        In this example, "foo.bar.conv0" would match qconfig1, "foo.bar.linear" would match qconfig2,
+        and "foo.baz.relu" would match qconfig3.
+        If the QConfig for an existing module name regex was already set, the new QConfig will override the
+        old one while preserving the order in which the regexes were originally registered.
+        """
+        self.module_name_regex_qconfigs[module_name_regex] = qconfig
+        return self
+    def set_module_name(self, module_name: str, qconfig: QConfigAny) -> QConfigMapping:
+        """
+        Set the QConfig for modules matching the given module name.
+        If the QConfig for an existing module name was already set, the new QConfig will override the old one.
+        """
+        self.module_name_qconfigs[module_name] = qconfig
+        return self
+    def set_module_name_object_type_order(
+            self,
+            module_name: str,
+            object_type: Callable,
+            index: int,
+            qconfig: QConfigAny) -> QConfigMapping:
+        """
+        Set the QConfig for modules matching a combination of the given module name, object type,
+        and the index at which the module appears.
+        If the QConfig for an existing (module name, object type, index)  was already set, the new QConfig
+        will override the old one.
+        """
+        self.module_name_object_type_order_qconfigs[(module_name, object_type, index)] = qconfig
+        return self
+    def __repr__(self) -> str:
+        output = self.__class__.__name__ + " ("
+        for style_name in _QCONFIG_STYLE_ORDER:
+            output += f"\n {style_name}"
+            qconfigs = getattr(self, style_name)
+            if isinstance(qconfigs, OrderedDict) and len(qconfigs) > 0:
+                for key, qconfig in qconfigs.items():
+                    output += f"\n  {key}: {qconfig}"
+            else:
+                output += f"\n  {qconfigs}"
+        return output + "\n)"
+    # TODO: remove this
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Convert this ``QConfigMapping`` to a dictionary with the following keys:
+            "" (for global QConfig)
+            "object_type"
+            "module_name_regex"
+            "module_name"
+            "module_name_object_type_order"
+        The values of this dictionary are lists of tuples.
+        """
+        return {
+            _GLOBAL_DICT_KEY: self.global_qconfig,
+            _OBJECT_TYPE_DICT_KEY: list(self.object_type_qconfigs.items()),
+            _MODULE_NAME_REGEX_DICT_KEY: list(self.module_name_regex_qconfigs.items()),
+            _MODULE_NAME_DICT_KEY: list(self.module_name_qconfigs.items()),
+            _MODULE_NAME_OBJECT_TYPE_ORDER_DICT_KEY: [
+                (*k, v) for k, v in self.module_name_object_type_order_qconfigs.items()
+            ],
+        }
+    # TODO: remove this
+    @classmethod
+    def from_dict(cls, qconfig_dict: Dict[str, Any]) -> QConfigMapping:
+        """
+        Create a ``QConfigMapping`` from a dictionary with the following keys (all optional):
+            "" (for global QConfig)
+            "object_type"
+            "module_name_regex"
+            "module_name"
+            "module_name_object_type_order"
+        The values of this dictionary are expected to be lists of tuples.
+        """
+        conf = cls()
+        if _GLOBAL_DICT_KEY in qconfig_dict:
+            conf.set_global(qconfig_dict[_GLOBAL_DICT_KEY])
+        for object_type, qconfig in qconfig_dict.get(_OBJECT_TYPE_DICT_KEY, []):
+            conf.set_object_type(object_type, qconfig)
+        for module_name_regex, qconfig in qconfig_dict.get(_MODULE_NAME_REGEX_DICT_KEY, []):
+            conf.set_module_name_regex(module_name_regex, qconfig)
+        for module_name, qconfig in qconfig_dict.get(_MODULE_NAME_DICT_KEY, []):
+            conf.set_module_name(module_name, qconfig)
+        for module_name, object_type, index, qconfig in qconfig_dict.get(_MODULE_NAME_OBJECT_TYPE_ORDER_DICT_KEY, []):
+            conf.set_module_name_object_type_order(module_name, object_type, index, qconfig)
+        return conf

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/quantizer/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (663 Bytes). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/ao/quantization/quantizer/__pycache__/xnnpack_quantizer.cpython-311.pyc ADDED Viewed

Binary file (19.7 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/export/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (13.6 kB). View file

tuning-competition-baseline/.venv/lib/python3.11/site-packages/torch/export/__pycache__/_remove_auto_functionalized_pass.cpython-311.pyc ADDED Viewed

Binary file (4.41 kB). View file