koichi12 commited on Feb 12, 2025

Commit

7e7bbc5

verified ·

1 Parent(s): e278978

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/_gpu_trace.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/_memory_viz.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/_sanitizer.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/_utils.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/comm.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/error.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/gds.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/graphs.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/jiterator.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/nccl.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/nvtx.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/profiler.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/random.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/sparse.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/streams.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/tunable.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/amp/__init__.py +12 -0
.venv/lib/python3.11/site-packages/torch/cuda/amp/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/amp/__pycache__/autocast_mode.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/amp/__pycache__/common.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/amp/__pycache__/grad_scaler.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/cuda/amp/autocast_mode.py +90 -0
.venv/lib/python3.11/site-packages/torch/cuda/amp/common.py +11 -0
.venv/lib/python3.11/site-packages/torch/cuda/amp/grad_scaler.py +38 -0
.venv/lib/python3.11/site-packages/torch/nn/quantizable/__init__.py +1 -0
.venv/lib/python3.11/site-packages/torch/nn/quantizable/modules/activation.py +10 -0
.venv/lib/python3.11/site-packages/torch/nn/quantizable/modules/rnn.py +11 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/conv_expanded_weights.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/conv_utils.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/expanded_weights_utils.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/group_norm_expanded_weights.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/instance_norm_expanded_weights.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/layer_norm_expanded_weights.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/quantization/__init__.py +86 -0
.venv/lib/python3.11/site-packages/torch/quantization/_numeric_suite.py +28 -0
.venv/lib/python3.11/site-packages/torch/quantization/_numeric_suite_fx.py +26 -0
.venv/lib/python3.11/site-packages/torch/quantization/_quantized_conversions.py +133 -0
.venv/lib/python3.11/site-packages/torch/quantization/fake_quantize.py +32 -0
.venv/lib/python3.11/site-packages/torch/quantization/fuse_modules.py +22 -0
.venv/lib/python3.11/site-packages/torch/quantization/fuser_method_mappings.py +15 -0
.venv/lib/python3.11/site-packages/torch/quantization/fx/__init__.py +15 -0
.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/_equalize.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/convert.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/fuse.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/fusion_patterns.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/graph_module.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/match_utils.cpython-311.pyc +0 -0

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (76.2 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/_gpu_trace.cpython-311.pyc ADDED Viewed

Binary file (4.57 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/_memory_viz.cpython-311.pyc ADDED Viewed

Binary file (37.4 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/_sanitizer.cpython-311.pyc ADDED Viewed

Binary file (36.8 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/_utils.cpython-311.pyc ADDED Viewed

Binary file (2.18 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/comm.cpython-311.pyc ADDED Viewed

Binary file (492 Bytes). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/error.cpython-311.pyc ADDED Viewed

Binary file (180 Bytes). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/gds.cpython-311.pyc ADDED Viewed

Binary file (6.83 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/graphs.cpython-311.pyc ADDED Viewed

Binary file (29.4 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/jiterator.cpython-311.pyc ADDED Viewed

Binary file (7.96 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/nccl.cpython-311.pyc ADDED Viewed

Binary file (6.81 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/nvtx.cpython-311.pyc ADDED Viewed

Binary file (3.83 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/profiler.cpython-311.pyc ADDED Viewed

Binary file (4.17 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/random.cpython-311.pyc ADDED Viewed

Binary file (8.59 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/sparse.cpython-311.pyc ADDED Viewed

Binary file (181 Bytes). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/streams.cpython-311.pyc ADDED Viewed

Binary file (13.2 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/__pycache__/tunable.cpython-311.pyc ADDED Viewed

Binary file (12.3 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/amp/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from .autocast_mode import autocast, custom_bwd, custom_fwd
+from .common import amp_definitely_not_available
+from .grad_scaler import GradScaler
+__all__ = [
+    "amp_definitely_not_available",
+    "autocast",
+    "custom_bwd",
+    "custom_fwd",
+    "GradScaler",
+]

.venv/lib/python3.11/site-packages/torch/cuda/amp/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (497 Bytes). View file

.venv/lib/python3.11/site-packages/torch/cuda/amp/__pycache__/autocast_mode.cpython-311.pyc ADDED Viewed

Binary file (4.82 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/amp/__pycache__/common.cpython-311.pyc ADDED Viewed

Binary file (604 Bytes). View file

.venv/lib/python3.11/site-packages/torch/cuda/amp/__pycache__/grad_scaler.cpython-311.pyc ADDED Viewed

Binary file (1.74 kB). View file

.venv/lib/python3.11/site-packages/torch/cuda/amp/autocast_mode.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# mypy: allow-untyped-defs
+import functools
+from typing import Any
+from typing_extensions import deprecated
+import torch
+__all__ = ["autocast", "custom_fwd", "custom_bwd"]
+class autocast(torch.amp.autocast_mode.autocast):
+    r"""See :class:`torch.autocast`.
+    ``torch.cuda.amp.autocast(args...)`` is deprecated. Please use ``torch.amp.autocast("cuda", args...)`` instead.
+    """
+    @deprecated(
+        "`torch.cuda.amp.autocast(args...)` is deprecated. "
+        "Please use `torch.amp.autocast('cuda', args...)` instead.",
+        category=FutureWarning,
+    )
+    def __init__(
+        self,
+        enabled: bool = True,
+        dtype: torch.dtype = torch.float16,
+        cache_enabled: bool = True,
+    ):
+        if torch._jit_internal.is_scripting():
+            self._enabled = enabled
+            self.device = "cuda"
+            self.fast_dtype = dtype
+            return
+        super().__init__(
+            "cuda", enabled=enabled, dtype=dtype, cache_enabled=cache_enabled
+        )
+    def __enter__(self):
+        if torch._jit_internal.is_scripting():
+            return self
+        return super().__enter__()
+    # TODO: discuss a unified TorchScript-friendly API for autocast
+    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any):  # type: ignore[override]
+        if torch._jit_internal.is_scripting():
+            return
+        return super().__exit__(exc_type, exc_val, exc_tb)
+    def __call__(self, func):
+        if torch._jit_internal.is_scripting():
+            return func
+        return super().__call__(func)
+# Preserved only for BC reasons
+@deprecated(
+    "`torch.cuda.amp.autocast_mode._cast(value, dtype)` is deprecated. "
+    "Please use `torch.amp.autocast_mode._cast(value, 'cuda', dtype)` instead.",
+    category=FutureWarning,
+)
+def _cast(value, dtype):
+    return torch.amp.autocast_mode._cast(value, "cuda", dtype)
+@deprecated(
+    "`torch.cuda.amp.custom_fwd(args...)` is deprecated. "
+    "Please use `torch.amp.custom_fwd(args..., device_type='cuda')` instead.",
+    category=FutureWarning,
+)
+def custom_fwd(fwd=None, *, cast_inputs=None):
+    """
+    ``torch.cuda.amp.custom_fwd(args...)`` is deprecated. Please use
+    ``torch.amp.custom_fwd(args..., device_type='cuda')`` instead.
+    """
+    return functools.partial(torch.amp.custom_fwd, device_type="cuda")(
+        fwd=fwd, cast_inputs=cast_inputs
+    )
+@deprecated(
+    "`torch.cuda.amp.custom_bwd(args...)` is deprecated. "
+    "Please use `torch.amp.custom_bwd(args..., device_type='cuda')` instead.",
+    category=FutureWarning,
+)
+def custom_bwd(bwd):
+    """
+    ``torch.cuda.amp.custom_bwd(args...)`` is deprecated. Please use
+    ``torch.amp.custom_bwd(args..., device_type='cuda')`` instead.
+    """
+    return functools.partial(torch.amp.custom_bwd, device_type="cuda")(bwd)

.venv/lib/python3.11/site-packages/torch/cuda/amp/common.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# mypy: allow-untyped-defs
+from importlib.util import find_spec
+import torch
+__all__ = ["amp_definitely_not_available"]
+def amp_definitely_not_available():
+    return not (torch.cuda.is_available() or find_spec("torch_xla"))

.venv/lib/python3.11/site-packages/torch/cuda/amp/grad_scaler.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from typing_extensions import deprecated
+import torch
+# We need to keep this unused import for BC reasons
+from torch.amp.grad_scaler import OptState  # noqa: F401
+__all__ = ["GradScaler"]
+class GradScaler(torch.amp.GradScaler):
+    r"""
+    See :class:`torch.amp.GradScaler`.
+    ``torch.cuda.amp.GradScaler(args...)`` is deprecated. Please use ``torch.amp.GradScaler("cuda", args...)`` instead.
+    """
+    @deprecated(
+        "`torch.cuda.amp.GradScaler(args...)` is deprecated. "
+        "Please use `torch.amp.GradScaler('cuda', args...)` instead.",
+        category=FutureWarning,
+    )
+    def __init__(
+        self,
+        init_scale: float = 2.0**16,
+        growth_factor: float = 2.0,
+        backoff_factor: float = 0.5,
+        growth_interval: int = 2000,
+        enabled: bool = True,
+    ) -> None:
+        super().__init__(
+            "cuda",
+            init_scale=init_scale,
+            growth_factor=growth_factor,
+            backoff_factor=backoff_factor,
+            growth_interval=growth_interval,
+            enabled=enabled,
+        )

.venv/lib/python3.11/site-packages/torch/nn/quantizable/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from torch.nn.quantizable.modules import * # noqa: F403

.venv/lib/python3.11/site-packages/torch/nn/quantizable/modules/activation.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# flake8: noqa: F401
+r"""Quantizable Modules.
+This file is in the process of migration to `torch/ao/nn/quantizable`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate file under the `torch/ao/nn/quantizable/modules`,
+while adding an import statement here.
+"""
+from torch.ao.nn.quantizable.modules.activation import MultiheadAttention

.venv/lib/python3.11/site-packages/torch/nn/quantizable/modules/rnn.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# flake8: noqa: F401
+r"""Quantizable Modules.
+This file is in the process of migration to `torch/ao/nn/quantizable`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate file under the `torch/ao/nn/quantizable/modules`,
+while adding an import statement here.
+"""
+from torch.ao.nn.quantizable.modules.rnn import LSTM, LSTMCell

.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (834 Bytes). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/conv_expanded_weights.cpython-311.pyc ADDED Viewed

Binary file (3.53 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/conv_utils.cpython-311.pyc ADDED Viewed

Binary file (14.1 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/expanded_weights_utils.cpython-311.pyc ADDED Viewed

Binary file (9.36 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/group_norm_expanded_weights.cpython-311.pyc ADDED Viewed

Binary file (5.03 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/instance_norm_expanded_weights.cpython-311.pyc ADDED Viewed

Binary file (5.34 kB). View file

.venv/lib/python3.11/site-packages/torch/nn/utils/_expanded_weights/__pycache__/layer_norm_expanded_weights.cpython-311.pyc ADDED Viewed

Binary file (4.63 kB). View file

.venv/lib/python3.11/site-packages/torch/quantization/__init__.py ADDED Viewed

	@@ -0,0 +1,86 @@

+# mypy: allow-untyped-defs
+from .fake_quantize import *  # noqa: F403
+from .fuse_modules import fuse_modules
+from .fuser_method_mappings import *  # noqa: F403
+from .observer import *  # noqa: F403
+from .qconfig import *  # noqa: F403
+from .quant_type import *  # noqa: F403
+from .quantization_mappings import *  # noqa: F403
+from .quantize import *  # noqa: F403
+from .quantize_jit import *  # noqa: F403
+from .stubs import *  # noqa: F403
+def default_eval_fn(model, calib_data):
+    r"""
+    Default evaluation function takes a torch.utils.data.Dataset or a list of
+    input Tensors and run the model on the dataset
+    """
+    for data, target in calib_data:
+        model(data)
+__all__ = [
+    "QuantWrapper",
+    "QuantStub",
+    "DeQuantStub",
+    # Top level API for eager mode quantization
+    "quantize",
+    "quantize_dynamic",
+    "quantize_qat",
+    "prepare",
+    "convert",
+    "prepare_qat",
+    # Top level API for graph mode quantization on TorchScript
+    "quantize_jit",
+    "quantize_dynamic_jit",
+    "_prepare_ondevice_dynamic_jit",
+    "_convert_ondevice_dynamic_jit",
+    "_quantize_ondevice_dynamic_jit",
+    # Top level API for graph mode quantization on GraphModule(torch.fx)
+    # 'fuse_fx', 'quantize_fx',  # TODO: add quantize_dynamic_fx
+    # 'prepare_fx', 'prepare_dynamic_fx', 'convert_fx',
+    "QuantType",  # quantization type
+    # custom module APIs
+    "get_default_static_quant_module_mappings",
+    "get_static_quant_module_class",
+    "get_default_dynamic_quant_module_mappings",
+    "get_default_qat_module_mappings",
+    "get_default_qconfig_propagation_list",
+    "get_default_compare_output_module_list",
+    "get_quantized_operator",
+    "get_fuser_method",
+    # Sub functions for `prepare` and `swap_module`
+    "propagate_qconfig_",
+    "add_quant_dequant",
+    "swap_module",
+    "default_eval_fn",
+    # Observers
+    "ObserverBase",
+    "WeightObserver",
+    "HistogramObserver",
+    "observer",
+    "default_observer",
+    "default_weight_observer",
+    "default_placeholder_observer",
+    "default_per_channel_weight_observer",
+    # FakeQuantize (for qat)
+    "default_fake_quant",
+    "default_weight_fake_quant",
+    "default_fixed_qparams_range_neg1to1_fake_quant",
+    "default_fixed_qparams_range_0to1_fake_quant",
+    "default_per_channel_weight_fake_quant",
+    "default_histogram_fake_quant",
+    # QConfig
+    "QConfig",
+    "default_qconfig",
+    "default_dynamic_qconfig",
+    "float16_dynamic_qconfig",
+    "float_qparams_weight_only_qconfig",
+    # QAT utilities
+    "default_qat_qconfig",
+    "prepare_qat",
+    "quantize_qat",
+    # module transformations
+    "fuse_modules",
+]

.venv/lib/python3.11/site-packages/torch/quantization/_numeric_suite.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/ns/_numeric_suite.py`, while adding an import statement
+here.
+"""
+from torch.ao.ns._numeric_suite import (
+    _convert_tuple_to_list,
+    _dequantize_tensor_list,
+    _find_match,
+    _get_logger_dict_helper,
+    _is_identical_module_type,
+    compare_model_outputs,
+    compare_model_stub,
+    compare_weights,
+    get_logger_dict,
+    get_matching_activations,
+    Logger,
+    NON_LEAF_MODULE_TO_ADD_OBSERVER_ALLOW_LIST,
+    OutputLogger,
+    prepare_model_outputs,
+    prepare_model_with_stubs,
+    Shadow,
+    ShadowLogger,
+)

.venv/lib/python3.11/site-packages/torch/quantization/_numeric_suite_fx.py ADDED Viewed

	@@ -0,0 +1,26 @@

+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/ns/_numeric_suite_fx.py`, while adding an import statement
+here.
+"""
+from torch.ao.ns._numeric_suite_fx import (
+    _add_loggers_impl,
+    _add_loggers_one_model,
+    _add_shadow_loggers_impl,
+    _extract_logger_info_one_model,
+    _extract_weights_impl,
+    _extract_weights_one_model,
+    add_loggers,
+    add_shadow_loggers,
+    extend_logger_results_with_comparison,
+    extract_logger_info,
+    extract_shadow_logger_info,
+    extract_weights,
+    NSTracer,
+    OutputLogger,
+    RNNReturnType,
+)

.venv/lib/python3.11/site-packages/torch/quantization/_quantized_conversions.py ADDED Viewed

	@@ -0,0 +1,133 @@

+# mypy: allow-untyped-defs
+import torch
+# Pack pairs of int4 values into int8, in row major order; first int4
+# value goes into lower order bits, and second int4 value into higher
+# order bits of resulting int8 value.
+def pack_int4_to_int8(weight):
+    assert weight.dim() == 2
+    assert weight.shape[1] % 2 == 0
+    assert weight.dtype == torch.int8
+    return ((weight[:, 1::2] & 0xF) << 4) | (weight[:, 0::2] & 0xF)
+# Unpack quandruples of bits in int8 values into int4 values, in row
+# major order; lower 4 bits go into first int4 value goes, and upper 4
+# bits go into second int4 value.
+def unpack_int8_to_int4(weight):
+    assert weight.dim() == 2
+    assert weight.dtype == torch.int8
+    return torch.stack((weight & 0xF, (weight >> 4) & 0xF), dim=2).view(
+        weight.shape[0], 2 * weight.shape[1]
+    )
+# Transpose the weight matrix, and then reorder its elements according
+# to underlying requirements of CUTLASS library, so that it could be
+# used for CUTLASS-based mixed datatypes linear operation.
+def quantized_weight_reorder_for_mixed_dtypes_linear_cutlass(
+    weight, dtypeq, transpose=False
+):
+    assert weight.dim() == 2
+    assert weight.dtype == torch.int8
+    assert dtypeq == torch.int8 or dtypeq == torch.quint4x2
+    assert weight.device.type == "cuda"
+    device = weight.device
+    # subbyte_transpose
+    if not transpose:
+        if dtypeq == torch.int8:
+            outp = weight.T
+        elif dtypeq == torch.quint4x2:
+            outp = pack_int4_to_int8(unpack_int8_to_int4(weight.view(torch.int8)).T)
+    else:
+        outp = weight
+    ncols, nrows = outp.shape  # type: ignore[possibly-undefined]
+    assert nrows % (32 if dtypeq == torch.quint4x2 else 64) == 0
+    assert ncols % 64 == 0
+    # permute_B_rows_for_mixed_gemm
+    # (permute cols actually, as transpose is applied first here)
+    if dtypeq == torch.quint4x2:
+        cols_permuted = (
+            torch.tensor(
+                [0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15],
+                device=device,
+            )
+            + (torch.arange(0, nrows // 16, device=device).reshape(-1, 1) * 16).expand(
+                nrows // 16, 16
+            )
+        ).view(-1)
+    else:
+        cols_permuted = (
+            torch.tensor(
+                [0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15],
+                device=device,
+            )
+            + (torch.arange(0, nrows // 16, device=device).reshape(-1, 1) * 16).expand(
+                nrows // 16, 16
+            )
+        ).view(-1)
+    outp = outp.index_copy(1, cols_permuted, outp)
+    # interleave_column_major_tensor
+    magic0 = 4 if dtypeq == torch.quint4x2 else 2
+    magic1 = 32 // magic0
+    tmp0 = (
+        (torch.arange(0, ncols // magic0, device=device) * (nrows // 4 * magic0))
+        .view(-1, 1)
+        .repeat(1, nrows // 4 * magic0)
+        .view(-1)
+    )
+    tmp1 = (
+        (torch.arange(0, nrows // 4 // magic1, device=device) * (magic0 * magic1))
+        .view(-1, 1)
+        .repeat(1, magic1)
+        .view(-1)
+        .repeat(ncols)
+    )
+    tmp2 = (
+        (torch.arange(0, magic0, device=device) * magic1)
+        .view(-1, 1)
+        .repeat(1, nrows // 4)
+        .view(-1)
+        .repeat(ncols // magic0)
+    )
+    tmp3 = torch.arange(0, magic1, device=device).repeat(nrows // 4 * ncols // magic1)
+    outp_offsets = tmp0 + tmp1 + tmp2 + tmp3
+    tmp = outp.view(-1).view(torch.int32)
+    outp = torch.zeros_like(tmp)
+    outp.scatter_(0, outp_offsets, tmp)
+    outp = outp.view(weight.dtype)
+    # add_bias_and_interleave_quantized_tensor_inplace
+    tmp = outp.view(-1)
+    outp = torch.empty_like(tmp)
+    if dtypeq == torch.int8:
+        tmp = (tmp.to(torch.int) + 128).to(tmp.dtype)
+        outp[0::4] = tmp[0::4]
+        outp[1::4] = tmp[2::4]
+        outp[2::4] = tmp[1::4]
+        outp[3::4] = tmp[3::4]
+    elif dtypeq == torch.quint4x2:
+        tmp0 = ((tmp & 0xF) + 8) & 0xF
+        tmp0 = (tmp0[1::2] << 4) | tmp0[0::2]
+        tmp1 = (((tmp >> 4) & 0xF) + 8) & 0xF
+        tmp1 = (tmp1[1::2] << 4) | tmp1[0::2]
+        outp[0::4] = tmp0[0::2]
+        outp[1::4] = tmp0[1::2]
+        outp[2::4] = tmp1[0::2]
+        outp[3::4] = tmp1[1::2]
+    if dtypeq == torch.quint4x2:
+        nrows *= 2
+        ncols //= 2
+    return outp.view(nrows, ncols).view(torch.uint8)

.venv/lib/python3.11/site-packages/torch/quantization/fake_quantize.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/fake_quantize.py`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fake_quantize import (
+    _is_fake_quant_script_module,
+    _is_per_channel,
+    _is_per_tensor,
+    _is_symmetric_quant,
+    default_fake_quant,
+    default_fixed_qparams_range_0to1_fake_quant,
+    default_fixed_qparams_range_neg1to1_fake_quant,
+    default_fused_act_fake_quant,
+    default_fused_per_channel_wt_fake_quant,
+    default_fused_wt_fake_quant,
+    default_histogram_fake_quant,
+    default_per_channel_weight_fake_quant,
+    default_weight_fake_quant,
+    disable_fake_quant,
+    disable_observer,
+    enable_fake_quant,
+    enable_observer,
+    FakeQuantize,
+    FakeQuantizeBase,
+    FixedQParamsFakeQuantize,
+    FusedMovingAvgObsFakeQuantize,
+)

.venv/lib/python3.11/site-packages/torch/quantization/fuse_modules.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/fuse_modules.py`, while adding an import statement
+here.
+"""
+# TODO: These functions are not used outside the `fuse_modules.py`
+#       Keeping here for now, need to remove them later.
+from torch.ao.quantization.fuse_modules import (
+    _fuse_modules,
+    _get_module,
+    _set_module,
+    fuse_known_modules,
+    fuse_modules,
+    get_fuser_method,
+)
+# for backward compatiblity
+from torch.ao.quantization.fuser_method_mappings import fuse_conv_bn, fuse_conv_bn_relu

.venv/lib/python3.11/site-packages/torch/quantization/fuser_method_mappings.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+`torch/ao/quantization/fuser_method_mappings.py`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fuser_method_mappings import (
+    _DEFAULT_OP_LIST_TO_FUSER_METHOD,
+    fuse_conv_bn,
+    fuse_conv_bn_relu,
+    fuse_linear_bn,
+    get_fuser_method,
+)

.venv/lib/python3.11/site-packages/torch/quantization/fx/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# flake8: noqa: F401
+r"""
+This file is in the process of migration to `torch/ao/quantization`, and
+is kept here for compatibility while the migration process is ongoing.
+If you are adding a new entry/functionality, please, add it to the
+appropriate files under `torch/ao/quantization/fx/`, while adding an import statement
+here.
+"""
+from torch.ao.quantization.fx.convert import convert
+from torch.ao.quantization.fx.fuse import fuse
+# omitting files that's unlikely to be used right now, for example
+# the newly added lower_to_fbgemm etc.
+from torch.ao.quantization.fx.prepare import prepare

.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (762 Bytes). View file

.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/_equalize.cpython-311.pyc ADDED Viewed

Binary file (1.88 kB). View file

.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/convert.cpython-311.pyc ADDED Viewed

Binary file (605 Bytes). View file

.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/fuse.cpython-311.pyc ADDED Viewed

Binary file (596 Bytes). View file

.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/fusion_patterns.cpython-311.pyc ADDED Viewed

Binary file (658 Bytes). View file

.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/graph_module.cpython-311.pyc ADDED Viewed

Binary file (892 Bytes). View file

.venv/lib/python3.11/site-packages/torch/quantization/fx/__pycache__/match_utils.cpython-311.pyc ADDED Viewed

Binary file (729 Bytes). View file