koichi12 commited on Feb 12, 2025

Commit

f1e8896

verified ·

1 Parent(s): 4ac3d46

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.venv/lib/python3.11/site-packages/compressed_tensors/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/__pycache__/base.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/__pycache__/version.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/__pycache__/base.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/__pycache__/helpers.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/model_compressors/__init__.py +17 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/model_compressors/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/model_compressors/__pycache__/model_compressor.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/model_compressors/model_compressor.py +466 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/__pycache__/marlin_24.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/config/__init__.py +19 -0
.venv/lib/python3.11/site-packages/compressed_tensors/config/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/config/__pycache__/base.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/config/__pycache__/dense.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/config/__pycache__/sparse_24_bitmask.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/config/__pycache__/sparse_bitmask.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/config/base.py +111 -0
.venv/lib/python3.11/site-packages/compressed_tensors/config/dense.py +36 -0
.venv/lib/python3.11/site-packages/compressed_tensors/config/sparse_24_bitmask.py +40 -0
.venv/lib/python3.11/site-packages/compressed_tensors/config/sparse_bitmask.py +36 -0
.venv/lib/python3.11/site-packages/compressed_tensors/linear/__init__.py +13 -0
.venv/lib/python3.11/site-packages/compressed_tensors/linear/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/linear/__pycache__/compressed_linear.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/linear/compressed_linear.py +89 -0
.venv/lib/python3.11/site-packages/compressed_tensors/registry/__init__.py +17 -0
.venv/lib/python3.11/site-packages/compressed_tensors/registry/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/registry/__pycache__/registry.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/registry/registry.py +360 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/__init__.py +21 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/helpers.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/offload.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/permutations_24.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/permute.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/safetensors_load.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/semi_structured_conversions.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/helpers.py +326 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/offload.py +404 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/permutations_24.py +65 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/permute.py +70 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/safetensors_load.py +306 -0
.venv/lib/python3.11/site-packages/compressed_tensors/utils/semi_structured_conversions.py +342 -0
.venv/lib/python3.11/site-packages/dotenv/__init__.py +49 -0
.venv/lib/python3.11/site-packages/dotenv/__main__.py +6 -0
.venv/lib/python3.11/site-packages/dotenv/__pycache__/__main__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/dotenv/__pycache__/cli.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/dotenv/__pycache__/ipython.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/dotenv/__pycache__/main.cpython-311.pyc +0 -0

.venv/lib/python3.11/site-packages/compressed_tensors/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (447 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/__pycache__/base.cpython-311.pyc ADDED Viewed

Binary file (487 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/__pycache__/version.cpython-311.pyc ADDED Viewed

Binary file (1.26 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (435 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/__pycache__/base.cpython-311.pyc ADDED Viewed

Binary file (7.69 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/__pycache__/helpers.cpython-311.pyc ADDED Viewed

Binary file (6.34 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/model_compressors/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .model_compressor import *

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/model_compressors/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (264 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/model_compressors/__pycache__/model_compressor.cpython-311.pyc ADDED Viewed

Binary file (19.5 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/model_compressors/model_compressor.py ADDED Viewed

	@@ -0,0 +1,466 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import logging
+import operator
+import os
+import re
+from contextlib import contextmanager
+from copy import deepcopy
+from typing import TYPE_CHECKING, Any, Dict, Optional, Set, TypeVar, Union
+import compressed_tensors
+import torch
+import transformers
+from compressed_tensors.base import (
+    COMPRESSION_VERSION_NAME,
+    QUANTIZATION_CONFIG_NAME,
+    QUANTIZATION_METHOD_NAME,
+    SPARSITY_CONFIG_NAME,
+)
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
+from compressed_tensors.quantization import (
+    DEFAULT_QUANTIZATION_METHOD,
+    QuantizationConfig,
+    QuantizationStatus,
+    apply_quantization_config,
+    load_pretrained_quantization,
+)
+from compressed_tensors.quantization.lifecycle import expand_sparse_target_names
+from compressed_tensors.quantization.quant_args import QuantizationArgs
+from compressed_tensors.quantization.utils import (
+    is_module_quantized,
+    iter_named_leaf_modules,
+)
+from compressed_tensors.utils import get_safetensors_folder, update_parameter_data
+from compressed_tensors.utils.helpers import (
+    fix_fsdp_module_name,
+    is_compressed_tensors_config,
+)
+from torch import Tensor
+from torch.nn import Module
+from tqdm import tqdm
+from transformers import AutoConfig
+from transformers.file_utils import CONFIG_NAME
+__all__ = ["ModelCompressor", "map_modules_to_quant_args"]
+_LOGGER: logging.Logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    # dummy type if not available from transformers
+    CompressedTensorsConfig = TypeVar("CompressedTensorsConfig")
+class ModelCompressor:
+    """
+    Handles compression and decompression of a model with a sparsity config and/or
+    quantization config.
+    Compression LifeCycle
+        - compressor = ModelCompressor.from_pretrained_model(model)
+        - compressed_state_dict = compressor.compress(model, state_dict)
+            - compressor.quantization_compressor.compress(model, state_dict)
+            - compressor.sparsity_compressor.compress(model, state_dict)
+        - model.save_pretrained(output_dir, state_dict=compressed_state_dict)
+        - compressor.update_config(output_dir)
+    Decompression LifeCycle
+        - compressor = ModelCompressor.from_pretrained(comp_model_path)
+        - model = AutoModel.from_pretrained(comp_model_path)
+        - compressor.decompress(comp_model_path, model)
+            - compressor.sparsity_compressor.decompress(comp_model_path, model)
+            - compressor.quantization_compressor.decompress(comp_model_path, model)
+    :param sparsity_config: config specifying sparsity compression parameters
+    :param quantization_config: config specifying quantization compression parameters
+    """
+    @classmethod
+    def from_pretrained(
+        cls,
+        pretrained_model_name_or_path: str,
+        **kwargs,
+    ) -> Optional["ModelCompressor"]:
+        """
+        Given a path to a model config, extract the sparsity and/or quantization
+        configs and load a ModelCompressor
+        :param pretrained_model_name_or_path: path to model config on disk or HF hub
+        :return: compressor for the configs, or None if model is not compressed
+        """
+        config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
+        compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
+        return cls.from_compression_config(compression_config)
+    @classmethod
+    def from_compression_config(
+        cls,
+        compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
+    ):
+        """
+        :param compression_config:
+            A compression or quantization config
+            The type is one of the following:
+            1. A Dict found under either "quantization_config" or "compression_config"
+                keys in the config.json
+            2. A CompressedTensorsConfig found under key "quantization_config" in HF
+                model config
+        :return: compressor for the configs, or None if model is not compressed
+        """
+        if compression_config is None:
+            return None
+        sparsity_config = cls.parse_sparsity_config(compression_config)
+        quantization_config = cls.parse_quantization_config(compression_config)
+        if sparsity_config is None and quantization_config is None:
+            return None
+        if sparsity_config is not None:
+            format = sparsity_config.get("format")
+            sparsity_config = SparsityCompressionConfig.load_from_registry(
+                format, **sparsity_config
+            )
+        if quantization_config is not None:
+            quantization_config = QuantizationConfig.model_validate(quantization_config)
+        return cls(
+            sparsity_config=sparsity_config, quantization_config=quantization_config
+        )
+    @classmethod
+    def from_pretrained_model(
+        cls,
+        model: Module,
+        sparsity_config: Union[SparsityCompressionConfig, str, None] = None,
+        quantization_format: Optional[str] = None,
+    ) -> Optional["ModelCompressor"]:
+        """
+        Given a pytorch model and optional sparsity and/or quantization configs,
+        load the appropriate compressors
+        :param model: pytorch model to target for compression
+        :param sparsity_config: a filled in sparsity config or string corresponding
+            to a sparsity compression algorithm
+        :param quantization_format: string corresponding to a quantization compression
+            algorithm
+        :return: compressor for the configs, or None if model is not compressed
+        """
+        quantization_config = QuantizationConfig.from_pretrained(
+            model, format=quantization_format
+        )
+        if isinstance(sparsity_config, str):  # we passed in a sparsity format
+            sparsity_config = SparsityCompressionConfig.load_from_registry(
+                sparsity_config
+            )
+        if sparsity_config is None and quantization_config is None:
+            return None
+        return cls(
+            sparsity_config=sparsity_config, quantization_config=quantization_config
+        )
+    @staticmethod
+    def parse_sparsity_config(
+        compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
+    ) -> Union[Dict[str, Any], None]:
+        """
+        Parse sparsity config from quantization/compression config. Sparsity
+        config is nested inside q/c config
+        :param compression_config: quantization/compression config
+        :return: sparsity config
+        """
+        if compression_config is None:
+            return None
+        if is_compressed_tensors_config(compression_config):
+            s_config = compression_config.sparsity_config
+            return s_config.model_dump() if s_config is not None else None
+        return compression_config.get(SPARSITY_CONFIG_NAME, None)
+    @staticmethod
+    def parse_quantization_config(
+        compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
+    ) -> Union[Dict[str, Any], None]:
+        """
+        Parse quantization config from quantization/compression config. The
+        quantization are all the fields that are not the sparsity config or
+        metadata fields
+        :param compression_config: quantization/compression config
+        :return: quantization config without sparsity config or metadata fields
+        """
+        if compression_config is None:
+            return None
+        if is_compressed_tensors_config(compression_config):
+            q_config = compression_config.quantization_config
+            return q_config.model_dump() if q_config is not None else None
+        quantization_config = deepcopy(compression_config)
+        quantization_config.pop(SPARSITY_CONFIG_NAME, None)
+        # some fields are required, even if a qconfig is not present
+        # pop them off and if nothing remains, then there is no qconfig
+        quant_method = quantization_config.pop(QUANTIZATION_METHOD_NAME, None)
+        _ = quantization_config.pop(COMPRESSION_VERSION_NAME, None)
+        if len(quantization_config) == 0:
+            return None
+        # replace popped off values
+        # note that version is discarded for now
+        if quant_method is not None:
+            quantization_config[QUANTIZATION_METHOD_NAME] = quant_method
+        return quantization_config
+    def __init__(
+        self,
+        sparsity_config: Optional[SparsityCompressionConfig] = None,
+        quantization_config: Optional[QuantizationConfig] = None,
+    ):
+        self.sparsity_config = sparsity_config
+        self.quantization_config = quantization_config
+        self.sparsity_compressor = None
+        self.quantization_compressor = None
+        if sparsity_config is not None:
+            self.sparsity_compressor = BaseCompressor.load_from_registry(
+                sparsity_config.format, config=sparsity_config
+            )
+        if quantization_config is not None:
+            self.quantization_compressor = BaseCompressor.load_from_registry(
+                quantization_config.format, config=quantization_config
+            )
+    def compress(
+        self, model: Module, state_dict: Optional[Dict[str, Tensor]] = None
+    ) -> Dict[str, Tensor]:
+        """
+        Compresses a dense state dict or model with sparsity and/or quantization
+        :param model: uncompressed model to compress
+        :param state_dict: optional uncompressed state_dict to insert into model
+        :return: compressed state dict
+        """
+        if state_dict is None:
+            state_dict = model.state_dict()
+        compressed_state_dict = state_dict
+        quantized_modules_to_args: Dict[
+            str, QuantizationArgs
+        ] = map_modules_to_quant_args(model)
+        if self.quantization_compressor is not None:
+            compressed_state_dict = self.quantization_compressor.compress(
+                state_dict, names_to_scheme=quantized_modules_to_args
+            )
+            if self.quantization_config.format != CompressionFormat.dense.value:
+                self.quantization_config.quantization_status = (
+                    QuantizationStatus.COMPRESSED
+                )
+        if self.sparsity_compressor is not None:
+            sparse_compression_targets: Set[str] = expand_sparse_target_names(
+                model=model,
+                targets=self.sparsity_config.targets,
+                ignore=self.sparsity_config.ignore,
+            )
+            compressed_state_dict = self.sparsity_compressor.compress(
+                compressed_state_dict,
+                compression_targets=sparse_compression_targets,
+            )
+        # HACK: Override the dtype_byte_size function in transformers to
+        # support float8 types. Fix is posted upstream
+        # https://github.com/huggingface/transformers/pull/30488
+        transformers.modeling_utils.dtype_byte_size = new_dtype_byte_size
+        return compressed_state_dict
+    def decompress(self, model_path: str, model: Module):
+        """
+        Overwrites the weights in model with weights decompressed from model_path
+        :param model_path: path to compressed weights
+        :param model: pytorch model to load decompressed weights into
+        """
+        model_path = get_safetensors_folder(model_path)
+        sparse_decompressed = False
+        if (
+            self.sparsity_compressor is not None
+            and self.sparsity_config.format != CompressionFormat.dense.value
+        ):
+            # Sparse decompression is applied on the model_path
+            dense_gen = self.sparsity_compressor.decompress(model_path)
+            self._replace_weights(dense_gen, model)
+            setattr(model, SPARSITY_CONFIG_NAME, self.sparsity_compressor.config)
+            sparse_decompressed = True
+        if self.quantization_compressor is not None:
+            # Temporarily set quantization status to FROZEN to prevent
+            # quantization during apply_quantization_config. This ensures
+            # that the dtypes of the weights are not unintentionally updated.
+            # The status is restored after quantization params are loaded.
+            with override_quantization_status(
+                self.quantization_config, QuantizationStatus.FROZEN
+            ):
+                names_to_scheme = apply_quantization_config(
+                    model, self.quantization_config
+                )
+                load_pretrained_quantization(model, model_path)
+            model_path_or_state_dict = (
+                model.state_dict() if sparse_decompressed else model_path
+            )
+            dense_gen = self.quantization_compressor.decompress(
+                model_path_or_state_dict, names_to_scheme=names_to_scheme
+            )
+            self._replace_weights(dense_gen, model)
+            def freeze_quantization_status(module):
+                module.quantization_status = QuantizationStatus.FROZEN
+            model.apply(freeze_quantization_status)
+            setattr(model, QUANTIZATION_CONFIG_NAME, self.quantization_config)
+    def update_config(self, save_directory: str):
+        """
+        Update the model config located at save_directory with compression configs
+        for sparsity and/or quantization
+        :param save_directory: path to a folder containing a HF model config
+        """
+        if self.quantization_config is None and self.sparsity_config is None:
+            return
+        config_file_path = os.path.join(save_directory, CONFIG_NAME)
+        if not os.path.exists(config_file_path):
+            _LOGGER.warning(
+                f"Could not find a valid model config file in "
+                f"{save_directory}. Compression config will not be saved."
+            )
+            return
+        with open(config_file_path, "r") as config_file:
+            config_data = json.load(config_file)
+        # required metadata whenever a quantization or sparsity config is present
+        # overwrite previous config and version if already existing
+        config_data[QUANTIZATION_CONFIG_NAME] = {}
+        config_data[QUANTIZATION_CONFIG_NAME][
+            COMPRESSION_VERSION_NAME
+        ] = compressed_tensors.__version__
+        if self.quantization_config is not None:
+            self.quantization_config.quant_method = DEFAULT_QUANTIZATION_METHOD
+        else:
+            config_data[QUANTIZATION_CONFIG_NAME][
+                QUANTIZATION_METHOD_NAME
+            ] = DEFAULT_QUANTIZATION_METHOD
+        # quantization and sparsity configs
+        if self.quantization_config is not None:
+            quant_config_data = self.quantization_config.model_dump()
+            config_data[QUANTIZATION_CONFIG_NAME] = quant_config_data
+        if self.sparsity_config is not None:
+            sparsity_config_data = self.sparsity_config.model_dump()
+            config_data[QUANTIZATION_CONFIG_NAME][
+                SPARSITY_CONFIG_NAME
+            ] = sparsity_config_data
+        with open(config_file_path, "w") as config_file:
+            json.dump(config_data, config_file, indent=2, sort_keys=True)
+    def _replace_weights(self, dense_weight_generator, model: Module):
+        """
+        Replace the weights of the model with the
+        provided dense weights.
+        This method iterates over the dense_weight_generator and
+        updates the corresponding weights in the model. If a parameter
+        name does not exist in the model, it will be skipped.
+        :param dense_weight_generator (generator): A generator that yields
+            tuples of (name, data), where 'name' is the parameter name and
+            'data' is the updated param data
+        :param model: The model whose weights are to be updated.
+        """
+        for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
+            split_name = name.split(".")
+            prefix, param_name = ".".join(split_name[:-1]), split_name[-1]
+            module = operator.attrgetter(prefix)(model)
+            if hasattr(module, param_name):
+                update_parameter_data(module, data, param_name)
+def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
+    """
+    Given a pytorch model, map out the submodule name (usually linear layers)
+     to the QuantizationArgs
+    :param model: pytorch model
+    """
+    quantized_modules_to_args = {}
+    for name, submodule in iter_named_leaf_modules(model):
+        if is_module_quantized(submodule):
+            if submodule.quantization_scheme.weights is not None:
+                name = fix_fsdp_module_name(name)
+                quantized_modules_to_args[name] = submodule.quantization_scheme.weights
+    return quantized_modules_to_args
+# HACK: Override the dtype_byte_size function in transformers to support float8 types
+# Fix is posted upstream https://github.com/huggingface/transformers/pull/30488
+def new_dtype_byte_size(dtype):
+    if dtype == torch.bool:
+        return 1 / 8
+    bit_search = re.search(r"[^\d](\d+)_?", str(dtype))
+    if bit_search is None:
+        raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
+    bit_size = int(bit_search.groups()[0])
+    return bit_size // 8
+@contextmanager
+def override_quantization_status(
+    config: QuantizationConfig, status: QuantizationStatus
+):
+    """
+    Within this context, the quantization status will be set to the
+    supplied status. After the context exits, the original status
+    will be restored.
+    :param config: the quantization config to override
+    :param status: the status to temporarily set
+    """
+    original_status = config.quantization_status
+    config.quantization_status = status
+    try:
+        yield
+    finally:
+        config.quantization_status = original_status

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (300 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/__pycache__/marlin_24.cpython-311.pyc ADDED Viewed

Binary file (11.7 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/config/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .base import *
+from .dense import *
+from .sparse_24_bitmask import *
+from .sparse_bitmask import *

.venv/lib/python3.11/site-packages/compressed_tensors/config/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (331 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/config/__pycache__/base.cpython-311.pyc ADDED Viewed

Binary file (4.34 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/config/__pycache__/dense.cpython-311.pyc ADDED Viewed

Binary file (1.36 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/config/__pycache__/sparse_24_bitmask.cpython-311.pyc ADDED Viewed

Binary file (1.48 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/config/__pycache__/sparse_bitmask.cpython-311.pyc ADDED Viewed

Binary file (1.35 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/config/base.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from enum import Enum, unique
+from typing import List, Optional
+from compressed_tensors.registry import RegistryMixin
+from pydantic import BaseModel
+__all__ = ["SparsityCompressionConfig", "CompressionFormat", "SparsityStructure"]
+@unique
+class CompressionFormat(Enum):
+    dense = "dense"
+    sparse_bitmask = "sparse-bitmask"
+    sparse_24_bitmask = "sparse-24-bitmask"
+    int_quantized = "int-quantized"
+    float_quantized = "float-quantized"
+    naive_quantized = "naive-quantized"
+    pack_quantized = "pack-quantized"
+    marlin_24 = "marlin-24"
+@unique
+class SparsityStructure(Enum):
+    """
+    An enumeration to represent different sparsity structures.
+    Attributes
+    ----------
+    TWO_FOUR : str
+        Represents a 2:4 sparsity structure.
+    ZERO_ZERO : str
+        Represents a 0:0 sparsity structure.
+    UNSTRUCTURED : str
+        Represents an unstructured sparsity structure.
+    Examples
+    --------
+    >>> SparsityStructure('2:4')
+    <SparsityStructure.TWO_FOUR: '2:4'>
+    >>> SparsityStructure('unstructured')
+    <SparsityStructure.UNSTRUCTURED: 'unstructured'>
+    >>> SparsityStructure('2:4') == SparsityStructure.TWO_FOUR
+    True
+    >>> SparsityStructure('UNSTRUCTURED') == SparsityStructure.UNSTRUCTURED
+    True
+    >>> SparsityStructure(None) == SparsityStructure.UNSTRUCTURED
+    True
+    >>> SparsityStructure('invalid')
+    Traceback (most recent call last):
+        ...
+    ValueError: invalid is not a valid SparsityStructure
+    """
+    TWO_FOUR = "2:4"
+    UNSTRUCTURED = "unstructured"
+    ZERO_ZERO = "0:0"
+    def __new__(cls, value):
+        obj = object.__new__(cls)
+        obj._value_ = value.lower() if value is not None else value
+        return obj
+    @classmethod
+    def _missing_(cls, value):
+        # Handle None and case-insensitive values
+        if value is None:
+            return cls.UNSTRUCTURED
+        for member in cls:
+            if member.value == value.lower():
+                return member
+        raise ValueError(f"{value} is not a valid {cls.__name__}")
+class SparsityCompressionConfig(RegistryMixin, BaseModel):
+    """
+    Base data class for storing sparsity compression parameters
+    :param format: name of compression format
+    :param targets: List of layer names or layer types that aren't sparse and should
+        be ignored during compression. By default, assume all layers are targeted
+    :param ignore: List of layer names (unique) to ignore from targets. Defaults to None
+    :param global_sparsity: average sparsity of the entire model
+    :param sparsity_structure: structure of the sparsity, such as
+    "unstructured", "2:4", "8:16" etc
+    """
+    format: str
+    targets: Optional[List[str]] = None
+    ignore: Optional[List[str]] = None
+    global_sparsity: Optional[float] = 0.0
+    sparsity_structure: Optional[str] = "unstructured"

.venv/lib/python3.11/site-packages/compressed_tensors/config/dense.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
+__all__ = ["DenseSparsityConfig"]
+@SparsityCompressionConfig.register(name=CompressionFormat.dense.value)
+class DenseSparsityConfig(SparsityCompressionConfig):
+    """
+    Identity configuration for storing a sparse model in
+    an uncompressed dense format
+    :param global_sparsity: average sparsity of the entire model
+    :param sparsity_structure: structure of the sparsity, such as
+    "unstructured", "2:4", "8:16" etc
+    """
+    format: str = CompressionFormat.dense.value
+    global_sparsity: Optional[float] = 0.0
+    sparsity_structure: Optional[str] = "unstructured"

.venv/lib/python3.11/site-packages/compressed_tensors/config/sparse_24_bitmask.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+from compressed_tensors.config import (
+    CompressionFormat,
+    SparsityCompressionConfig,
+    SparsityStructure,
+)
+__all__ = ["Sparse24BitMaskConfig"]
+@SparsityCompressionConfig.register(name=CompressionFormat.sparse_24_bitmask.value)
+class Sparse24BitMaskConfig(SparsityCompressionConfig):
+    """
+    Configuration for storing a 24 sparse model using
+    bytemask compression
+    :param global_sparsity: average sparsity of the entire model
+    :param sparsity_structure: structure of the sparsity, should always be
+        "2:4" for this compression format
+    """
+    format: str = CompressionFormat.sparse_24_bitmask.value
+    global_sparsity: Optional[float] = 0.0
+    sparsity_structure: Optional[str] = SparsityStructure.TWO_FOUR.value

.venv/lib/python3.11/site-packages/compressed_tensors/config/sparse_bitmask.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
+__all__ = ["BitmaskConfig"]
+@SparsityCompressionConfig.register(name=CompressionFormat.sparse_bitmask.value)
+class BitmaskConfig(SparsityCompressionConfig):
+    """
+    Configuration for storing a sparse model using
+    bitmask compression
+    :param global_sparsity: average sparsity of the entire model
+    :param sparsity_structure: structure of the sparsity, such as
+    "unstructured", "2:4", "8:16" etc
+    """
+    format: str = CompressionFormat.sparse_bitmask.value
+    global_sparsity: Optional[float] = 0.0
+    sparsity_structure: Optional[str] = "unstructured"

.venv/lib/python3.11/site-packages/compressed_tensors/linear/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

.venv/lib/python3.11/site-packages/compressed_tensors/linear/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (198 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/linear/__pycache__/compressed_linear.cpython-311.pyc ADDED Viewed

Binary file (3.63 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/linear/compressed_linear.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, Tuple
+import torch
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.quantization import (
+    QuantizationScheme,
+    QuantizationStatus,
+    initialize_module_for_quantization,
+)
+from torch import Tensor
+from torch.nn import Parameter
+from torch.nn.functional import linear
+from torch.nn.modules import Linear
+class CompressedLinear(Linear):
+    """
+    Wrapper module for running a compressed forward pass of a quantized Linear module.
+    The wrapped layer will decompressed on each forward call.
+    :param module: dense linear module to replace
+    :param quantization_scheme: quantization config for the module to wrap
+    :param quantization_format: compression format module is stored as
+    """
+    @classmethod
+    @torch.no_grad()
+    def from_linear(
+        cls,
+        module: Linear,
+        quantization_scheme: QuantizationScheme,
+        quantization_format: str,
+    ):
+        module.__class__ = CompressedLinear
+        module.compressor = BaseCompressor.load_from_registry(quantization_format)
+        device = next(module.parameters()).device
+        # this will initialize all the scales and zero points
+        initialize_module_for_quantization(
+            module, quantization_scheme, force_zero_point=False
+        )
+        # get the shape and dtype of compressed parameters
+        compression_params: Dict[str, Tuple] = module.compressor.compression_param_info(
+            module.weight.shape, quantization_scheme.weights
+        )
+        # no need for this once quantization is initialized, will be replaced
+        # with the compressed parameter
+        delattr(module, "weight")
+        # populate compressed weights and quantization parameters
+        for name, (shape, dtype) in compression_params.items():
+            param = Parameter(
+                torch.empty(shape, device=device, dtype=dtype), requires_grad=False
+            )
+            module.register_parameter(name, param)
+        # mark module as compressed
+        module.quantization_status = QuantizationStatus.COMPRESSED
+        # handles case where forward is wrapped in new_forward by accelerate hooks
+        if hasattr(module, "_old_forward"):
+            module._old_forward = CompressedLinear.forward.__get__(
+                module, CompressedLinear
+            )
+        return module
+    def forward(self, input: Tensor) -> Tensor:
+        """
+        Decompresses the weight, then runs the wrapped forward pass
+        """
+        uncompressed_weight = self.compressor.decompress_module(self)
+        return linear(input, uncompressed_weight, self.bias)

.venv/lib/python3.11/site-packages/compressed_tensors/registry/__init__.py ADDED Viewed

	@@ -0,0 +1,17 @@

+# flake8: noqa
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .registry import *

.venv/lib/python3.11/site-packages/compressed_tensors/registry/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (235 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/registry/__pycache__/registry.cpython-311.pyc ADDED Viewed

Binary file (14.4 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/registry/registry.py ADDED Viewed

	@@ -0,0 +1,360 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Universal registry to support registration and loading of child classes and plugins
+of neuralmagic utilities
+"""
+import importlib
+from collections import defaultdict
+from typing import Any, Dict, List, Optional, Type, Union
+__all__ = [
+    "RegistryMixin",
+    "register",
+    "get_from_registry",
+    "registered_names",
+    "registered_aliases",
+    "standardize_lookup_name",
+]
+_ALIAS_REGISTRY: Dict[Type, Dict[str, str]] = defaultdict(dict)
+_REGISTRY: Dict[Type, Dict[str, Any]] = defaultdict(dict)
+def standardize_lookup_name(name: str) -> str:
+    """
+    Standardize the given name for lookup in the registry.
+    This will replace all underscores and spaces with hyphens and
+    convert the name to lowercase.
+    example:
+    ```
+    standardize_lookup_name("Foo_bar baz") == "foo-bar-baz"
+    ```
+    :param name: name to standardize
+    :return: standardized name
+    """
+    return name.replace("_", "-").replace(" ", "-").lower()
+def standardize_alias_name(
+    name: Union[None, str, List[str]]
+) -> Union[None, str, List[str]]:
+    if name is None:
+        return None
+    elif isinstance(name, str):
+        return standardize_lookup_name(name)
+    else:  # isinstance(name, list)
+        return [standardize_lookup_name(n) for n in name]
+class RegistryMixin:
+    """
+    Universal registry to support registration and loading of child classes and plugins
+    of neuralmagic utilities.
+    Classes that require a registry or plugins may add the `RegistryMixin` and use
+    `register` and `load` as the main entrypoints for adding new implementations and
+    loading requested values from its registry.
+    If a class should only have its child classes in its registry, the class should
+    set the static attribute `registry_requires_subclass` to True
+    example
+    ```python
+    class Dataset(RegistryMixin):
+        pass
+    # register with default name
+    @Dataset.register()
+    class ImageNetDataset(Dataset):
+        pass
+    # load as "ImageNetDataset"
+    imagenet = Dataset.load("ImageNetDataset")
+    # register with custom name
+    @Dataset.register(name="cifar-dataset")
+    class Cifar(Dataset):
+        pass
+    Note: the name will be standardized for lookup in the registry.
+    For example, if a class is registered as "cifar_dataset" or
+    "cifar dataset", it will be stored as "cifar-dataset". The user
+    will be able to load the class with any of the three name variants.
+    # register with multiple aliases
+    @Dataset.register(alias=["cifar-10-dataset", "cifar_100_dataset"])
+    class Cifar(Dataset):
+        pass
+    # load as "cifar-dataset"
+    cifar = Dataset.load_from_registry("cifar-dataset")
+    # load from custom file that implements a dataset
+    mnist = Dataset.load_from_registry("/path/to/mnnist_dataset.py:MnistDataset")
+    ```
+    """
+    # set to True in child class to add check that registered/retrieved values
+    # implement the class it is registered to
+    registry_requires_subclass: bool = False
+    @classmethod
+    def register(
+        cls, name: Optional[str] = None, alias: Union[List[str], str, None] = None
+    ):
+        """
+        Decorator for registering a value (ie class or function) wrapped by this
+        decorator to the base class (class that .register is called from)
+        :param name: name or list of names to register the wrapped value as,
+            defaults to value.__name__
+        :param alias: alias or list of aliases to register the wrapped value as,
+            defaults to None
+        :return: register decorator
+        """
+        def decorator(value: Any):
+            cls.register_value(value, name=name, alias=alias)
+            return value
+        return decorator
+    @classmethod
+    def register_value(
+        cls, value: Any, name: str, alias: Union[str, List[str], None] = None
+    ):
+        """
+        Registers the given value to the class `.register_value` is called from
+        :param value: value to register
+        :param name: name to register the wrapped value as,
+            defaults to value.__name__
+        :param alias: alias or list of aliases to register the wrapped value as,
+            defaults to None
+        """
+        register(
+            parent_class=cls,
+            value=value,
+            name=name,
+            alias=alias,
+            require_subclass=cls.registry_requires_subclass,
+        )
+    @classmethod
+    def load_from_registry(cls, name: str, **constructor_kwargs) -> object:
+        """
+        :param name: name of registered class to load
+        :param constructor_kwargs: arguments to pass to the constructor retrieved
+            from the registry
+        :return: loaded object registered to this class under the given name,
+            constructed with the given kwargs. Raises error if the name is
+            not found in the registry
+        """
+        constructor = cls.get_value_from_registry(name=name)
+        return constructor(**constructor_kwargs)
+    @classmethod
+    def get_value_from_registry(cls, name: str):
+        """
+        :param name: name to retrieve from the registry
+        :return: value from retrieved the registry for the given name, raises
+            error if not found
+        """
+        return get_from_registry(
+            parent_class=cls,
+            name=name,
+            require_subclass=cls.registry_requires_subclass,
+        )
+    @classmethod
+    def registered_names(cls) -> List[str]:
+        """
+        :return: list of all names registered to this class
+        """
+        return registered_names(cls)
+    @classmethod
+    def registered_aliases(cls) -> List[str]:
+        """
+        :return: list of all aliases registered to this class
+        """
+        return registered_aliases(cls)
+def register(
+    parent_class: Type,
+    value: Any,
+    name: Optional[str] = None,
+    alias: Union[List[str], str, None] = None,
+    require_subclass: bool = False,
+):
+    """
+    :param parent_class: class to register the name under
+    :param value: the value to register
+    :param name: name to register the wrapped value as, defaults to value.__name__
+    :param alias: alias or list of aliases to register the wrapped value as,
+        defaults to None
+    :param require_subclass: require that value is a subclass of the class this
+        method is called from
+    """
+    if name is None:
+        # default name
+        name = value.__name__
+    name = standardize_lookup_name(name)
+    alias = standardize_alias_name(alias)
+    register_alias(name=name, alias=alias, parent_class=parent_class)
+    if require_subclass:
+        _validate_subclass(parent_class, value)
+    if name in _REGISTRY[parent_class]:
+        # name already exists - raise error if two different values are attempting
+        # to share the same name
+        registered_value = _REGISTRY[parent_class][name]
+        if registered_value is not value:
+            raise RuntimeError(
+                f"Attempting to register name {name} as {value} "
+                f"however {name} has already been registered as {registered_value}"
+            )
+    else:
+        _REGISTRY[parent_class][name] = value
+def get_from_registry(
+    parent_class: Type, name: str, require_subclass: bool = False
+) -> Any:
+    """
+    :param parent_class: class that the name is registered under
+    :param name: name to retrieve from the registry of the class
+    :param require_subclass: require that value is a subclass of the class this
+        method is called from
+    :return: value from retrieved the registry for the given name, raises
+        error if not found
+    """
+    name = standardize_lookup_name(name)
+    if ":" in name:
+        # user specifying specific module to load and value to import
+        module_path, value_name = name.split(":")
+        retrieved_value = _import_and_get_value_from_module(module_path, value_name)
+    else:
+        # look up name in alias registry
+        name = _ALIAS_REGISTRY[parent_class].get(name, name)
+        # look up name in registry
+        retrieved_value = _REGISTRY[parent_class].get(name)
+        if retrieved_value is None:
+            raise KeyError(
+                f"Unable to find {name} registered under type {parent_class}.\n"
+                f"Registered values for {parent_class}: "
+                f"{registered_names(parent_class)}\n"
+                f"Registered aliases for {parent_class}: "
+                f"{registered_aliases(parent_class)}"
+            )
+    if require_subclass:
+        _validate_subclass(parent_class, retrieved_value)
+    return retrieved_value
+def registered_names(parent_class: Type) -> List[str]:
+    """
+    :param parent_class: class to look up the registry of
+    :return: all names registered to the given class
+    """
+    return list(_REGISTRY[parent_class].keys())
+def registered_aliases(parent_class: Type) -> List[str]:
+    """
+    :param parent_class: class to look up the registry of
+    :return: all aliases registered to the given class
+    """
+    registered_aliases_plus_names = list(_ALIAS_REGISTRY[parent_class].keys())
+    registered_aliases = list(
+        set(registered_aliases_plus_names) - set(registered_names(parent_class))
+    )
+    return registered_aliases
+def register_alias(
+    name: str, parent_class: Type, alias: Union[str, List[str], None] = None
+):
+    """
+    Updates the mapping from the alias(es) to the given name.
+    If the alias is None, the name is used as the alias.
+    ```
+    :param name: name that the alias refers to
+    :param parent_class: class that the name is registered under
+    :param alias: single alias or list of aliases that
+        refer to the name, defaults to None
+    """
+    if alias is not None:
+        alias = alias if isinstance(alias, list) else [alias]
+    else:
+        alias = []
+    if name in alias:
+        raise KeyError(
+            f"Attempting to register alias {name}, "
+            f"that is identical to the standardized name: {name}."
+        )
+    alias.append(name)
+    for alias_name in alias:
+        if alias_name in _ALIAS_REGISTRY[parent_class]:
+            raise KeyError(
+                f"Attempting to register alias {alias_name} as {name} "
+                f"however {alias_name} has already been registered as "
+                f"{_ALIAS_REGISTRY[alias_name]}"
+            )
+        _ALIAS_REGISTRY[parent_class][alias_name] = name
+def _import_and_get_value_from_module(module_path: str, value_name: str) -> Any:
+    # import the given module path and try to get the value_name if it is included
+    # in the module
+    # load module
+    spec = importlib.util.spec_from_file_location(
+        f"plugin_module_for_{value_name}", module_path
+    )
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    # get value from module
+    value = getattr(module, value_name, None)
+    if not value:
+        raise RuntimeError(
+            f"Unable to find attribute {value_name} in module {module_path}"
+        )
+    return value
+def _validate_subclass(parent_class: Type, child_class: Type):
+    if not issubclass(child_class, parent_class):
+        raise ValueError(
+            f"class {child_class} is not a subclass of the class it is "
+            f"registered for: {parent_class}."
+        )

.venv/lib/python3.11/site-packages/compressed_tensors/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .helpers import *
+from .offload import *
+from .permutations_24 import *
+from .permute import *
+from .safetensors_load import *
+from .semi_structured_conversions import *

.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (413 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/helpers.cpython-311.pyc ADDED Viewed

Binary file (13.8 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/offload.cpython-311.pyc ADDED Viewed

Binary file (17.5 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/permutations_24.cpython-311.pyc ADDED Viewed

Binary file (3.01 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/permute.cpython-311.pyc ADDED Viewed

Binary file (2.58 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/safetensors_load.cpython-311.pyc ADDED Viewed

Binary file (12.5 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/utils/__pycache__/semi_structured_conversions.cpython-311.pyc ADDED Viewed

Binary file (13.1 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/utils/helpers.py ADDED Viewed

	@@ -0,0 +1,326 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+from functools import wraps
+from typing import Any, Callable, Dict, List, Optional
+import numpy
+import torch
+from transformers import AutoConfig
+__all__ = [
+    "infer_compressor_from_model_config",
+    "fix_fsdp_module_name",
+    "tensor_follows_mask_structure",
+    "replace_module",
+    "is_compressed_tensors_config",
+    "getattr_chain",
+    "deprecated",
+    "Aliasable",
+    "combine_shards",
+    "shard_tensor",
+    "pack_bitmasks",
+    "unpack_bitmasks",
+]
+FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
+def infer_compressor_from_model_config(
+    pretrained_model_name_or_path: str,
+) -> Optional["ModelCompressor"]:  # noqa: F821
+    """
+    Given a path to a model config, extract a sparsity config if it exists and return
+    the associated ModelCompressor
+    :param pretrained_model_name_or_path: path to model config on disk or HF hub
+    :return: matching compressor if config contains a sparsity config
+    """
+    from compressed_tensors.compressors import ModelCompressor
+    from compressed_tensors.config import CompressionConfig
+    config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
+    sparsity_config = ModelCompressor.parse_sparsity_config(config)
+    if sparsity_config is None:
+        return None
+    format = sparsity_config.get("format")
+    sparsity_config = CompressionConfig.load_from_registry(format, **sparsity_config)
+    compressor = ModelCompressor.load_from_registry(format, config=sparsity_config)
+    return compressor
+# TODO: There is already the same function in
+# SparseML, should be moved to a shared location
+# in the future
+def fix_fsdp_module_name(name: str) -> str:
+    """
+    Remove FSDP wrapper prefixes from a module name
+    Accounts for scenario where FSDP_WRAPPER_NAME is
+    at the end of the name, as well as in the middle.
+    :param name: name to strip
+    :return: stripped name
+    """
+    return name.replace(FSDP_WRAPPER_NAME + ".", "").replace(
+        "." + FSDP_WRAPPER_NAME, ""
+    )
+def tensor_follows_mask_structure(tensor, mask: str = "2:4") -> bool:
+    """
+    :param tensor: tensor to check
+    :param mask: mask structure to check for, in the format "n:m"
+    :return: True if the tensor follows the mask structure, False otherwise.
+        Note, some weights can incidentally be zero, so we check for
+        atleast n zeros in each chunk of size m
+    """
+    n, m = tuple(map(int, mask.split(":")))
+    # Reshape the tensor into chunks of size m
+    tensor = tensor.view(-1, m)
+    # Count the number of zeros in each chunk
+    zero_counts = (tensor == 0).sum(dim=1)
+    # Check if the number of zeros in each chunk atleast n
+    # Greater than sign is needed as some weights can incidentally
+    # be zero
+    if not torch.all(zero_counts >= n).item():
+        raise ValueError()
+    return True
+def replace_module(model: torch.nn.Module, name: str, new_module: torch.nn.Module):
+    if "." in name:
+        parent_name = name.rsplit(".", 1)[0]
+        child_name = name[len(parent_name) + 1 :]
+        parent = model.get_submodule(parent_name)
+    else:
+        parent_name = ""
+        parent = model
+        child_name = name
+    setattr(parent, child_name, new_module)
+def is_compressed_tensors_config(compression_config: Any) -> bool:
+    """
+    Returns True if CompressedTensorsConfig is available from transformers and
+    compression_config is an instance of CompressedTensorsConfig
+    See: https://github.com/huggingface/transformers/pull/31704
+    """
+    try:
+        from transformers.utils.quantization_config import CompressedTensorsConfig
+        return isinstance(compression_config, CompressedTensorsConfig)
+    except ImportError:
+        return False
+def getattr_chain(obj: Any, chain_str: str, *args, **kwargs) -> Any:
+    """
+    Chain multiple getattr calls, separated by `.`
+    :param obj: base object whose attributes are being retrieved
+    :param chain_str: attribute names separated by `.`
+    :param default: default value, throw error otherwise
+    """
+    if len(args) >= 1:
+        has_default = True
+        default = args[0]
+    elif "default" in kwargs:
+        has_default = True
+        default = kwargs["default"]
+    else:
+        has_default = False
+    attr_names = chain_str.split(".")
+    res = obj
+    for attr_name in attr_names:
+        if not hasattr(res, attr_name):
+            if has_default:
+                return default
+            else:
+                raise AttributeError(f"{res} object has no attribute {attr_name}")
+        res = getattr(res, attr_name)
+    return res
+def deprecated(future_name: Optional[str] = None, message: Optional[str] = None):
+    """
+    Decorator to mark functions as deprecated
+    :param new_function: Function called in place of depreciated function
+    :param message: Depreciation message, replaces default depreciation message
+    """
+    def decorator(func: Callable[[Any], Any]):
+        nonlocal message
+        if message is None:
+            message = (
+                f"{func.__name__} is deprecated and will be removed in a future release"
+            )
+            if future_name is not None:
+                message += f". Please use {future_name} instead."
+        @wraps(func)
+        def wrapped(*args, **kwargs):
+            warnings.warn(message, DeprecationWarning, stacklevel=2)
+            return func(*args, **kwargs)
+        return wrapped
+    return decorator
+class Aliasable:
+    """
+    A mixin for enums to allow aliasing of enum members
+    Example:
+    >>> class MyClass(Aliasable, int, Enum):
+    >>>     ...
+    """
+    @staticmethod
+    def get_aliases() -> Dict[str, str]:
+        raise NotImplementedError()
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            aliases = self.get_aliases()
+            return self.value == other.value or (
+                aliases.get(self.value, self.value)
+                == aliases.get(other.value, other.value)
+            )
+        else:
+            aliases = self.get_aliases()
+            self_value = aliases.get(self.value, self.value)
+            other_value = aliases.get(other, other)
+            return self_value == other_value
+    def __hash__(self):
+        canonical_value = self.aliases.get(self.value, self.value)
+        return hash(canonical_value)
+def shard_tensor(
+    tensor: torch.Tensor, shard_sizes: List[int], dim: int = 0
+) -> List[torch.Tensor]:
+    """
+    Shards a tensor into a list of tensors along a given dimension.
+    raises: ValueError: If the sum of shard_sizes does not match the
+        size of the tensor along the given dimension.
+    :param tensor: The input tensor to shard.
+    :param shard_sizes : List of sizes for each shard along the specified dimension.
+    :param dim : The dimension along which to shard the tensor.
+    :returns: A list of tensors sharded along the specified dimension.
+    """
+    if sum(shard_sizes) != tensor.size(dim):
+        raise ValueError(
+            "Sum of shard_sizes must equal the size of the tensor "
+            "along the specified dimension."
+        )
+    shards = []
+    start_idx = 0
+    for size in shard_sizes:
+        end_idx = start_idx + size
+        shard = tensor.narrow(dim, start_idx, size)
+        shards.append(shard)
+        start_idx = end_idx
+    return shards
+def combine_shards(shards, dim=0):
+    """
+    Combine decompressed shards along a given dimension using `narrow`.
+    :param shards: List of decompressed shard tensors.
+    :param dim: Dimension to combine along (default: 0).
+    :return: Combined decompressed tensor.
+    """
+    if not shards:
+        raise ValueError("The list of shards is empty.")
+    # Assert that all shards have the same dtype
+    shard_dtypes = {shard.dtype for shard in shards}
+    if len(shard_dtypes) > 1:
+        raise ValueError("All shards must have the same dtype.")
+    # Determine the total shape of the combined tensor
+    total_shape = list(shards[0].shape)
+    total_shape[dim] = sum(shard.shape[dim] for shard in shards)
+    # Create the combined tensor
+    combined = torch.zeros(total_shape, dtype=shards[0].dtype, device=shards[0].device)
+    # Fill the combined tensor using narrow
+    shard_offset = 0
+    for shard in shards:
+        shard_size = shard.shape[dim]
+        combined.narrow(dim, shard_offset, shard_size).copy_(shard)
+        shard_offset += shard_size
+    return combined
+def pack_bitmasks(bytemasks: torch.Tensor) -> torch.Tensor:
+    """
+    Converts a bytemask tensor to a bitmask tensor to reduce memory. Shape RxC will be
+    compressed to R x ceil(C/8)
+    :param bytemasks: mask tensor where each byte corresponds to a weight
+    :return: mask tensor where each bit corresounds to a weight
+    """
+    packed_bits_numpy = numpy.packbits(bytemasks.numpy(), axis=-1, bitorder="little")
+    packed_bits_torch = torch.from_numpy(packed_bits_numpy)
+    return packed_bits_torch
+def unpack_bitmasks(
+    packed_bitmasks: torch.Tensor, original_shape: List[int]
+) -> torch.Tensor:
+    """
+    Converts a bitmask tensor back to a bytemask tensor for use during decompression
+    :param packed_bitmasks: mask tensor where each bit corresponds to a weight
+    :param original_shape: dense shape to decompress to
+    :return: boolean mask of weights in the original dense shape
+    """
+    # Unpack the bits
+    unpacked_bits = numpy.unpackbits(
+        packed_bitmasks.cpu().numpy(),
+        axis=-1,
+        count=original_shape[-1],
+        bitorder="little",
+    )
+    # Reshape to match the original shape
+    unpacked_bitmasks_torch = torch.from_numpy(
+        unpacked_bits.reshape(original_shape).astype(bool)
+    )
+    return unpacked_bitmasks_torch

.venv/lib/python3.11/site-packages/compressed_tensors/utils/offload.py ADDED Viewed

	@@ -0,0 +1,404 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Utilities associated with offloading functionality provided by `accelerate`.
+| ----------------------------------------------------------------------------------------------------- | # noqa: E501
+| Operation | Without offloading support             | With offloading support                          | # noqa: E501
+| --------- | -------------------------------------- | ------------------------------------------------ | # noqa: E501
+| Add       | module.register_parameter(name, param) | register_offload_parameter(module, name, param)  | # noqa: E501
+| Check     | N/A                                    | has_offloaded_params(module)                     | # noqa: E501
+| Onload    | N/A                                    | with align_module_device(module)                 | # noqa: E501
+| Update    | module.name.data.copy_(new_data)       | update_offload_parameter(module, name, new_data) | # noqa: E501
+| Delete    | del module.name                        | delete_offload_parameter(module, name)           | # noqa: E501
+| ----------------------------------------------------------------------------------------------------- | # noqa: E501
+"""
+import contextlib
+from functools import wraps
+from typing import Any, Callable, Dict, Literal, Optional, Union
+import torch
+try:
+    from accelerate.hooks import (
+        AlignDevicesHook,
+        add_hook_to_module,
+        remove_hook_from_module,
+    )
+    from accelerate.utils import (
+        OffloadedWeightsLoader,
+        PrefixedDataset,
+        set_module_tensor_to_device,
+    )
+    _has_accelerate = True
+except ImportError:
+    _has_accelerate = False
+    AlignDevicesHook = None
+    add_hook_to_module = None
+    remove_hook_from_module = None
+    OffloadedWeightsLoader = None
+    PrefixedDataset = None
+    set_module_tensor_to_device = None
+__all__ = [
+    "is_module_offloaded",
+    "get_execution_device",
+    "get_offloaded_device",
+    "update_prefix_dict",
+    "update_parameter_data",
+    "register_offload_parameter",
+    "update_offload_parameter",
+    "delete_offload_parameter",
+    "has_offloaded_params",
+    "disable_hf_hook",
+    "align_module_device",
+]
+def check_accelerate(fallback: Any):
+    def decorator(func: Callable[[Any], Any]):
+        if not _has_accelerate:
+            @wraps(func)
+            def fallback_fn(*args, **kwargs):
+                return fallback
+            return fallback_fn
+        return func
+    return decorator
+""" Candidates for Depreciation """
+@check_accelerate(fallback=False)
+def is_module_offloaded(module: torch.nn.Module) -> bool:
+    return has_offloaded_params(module)
+def get_execution_device(module: torch.nn.Module) -> torch.device:
+    """
+    :param module: module to check
+    :return: device module is loaded onto during forward pass
+    """
+    if has_offloaded_params(module):
+        return module._hf_hook.execution_device
+    device = next(module.parameters()).device
+    # offload only gets set for leaf modules, fallback to checking for device type
+    if device.type == "meta":
+        return module._hf_hook.execution_device
+    return device
+def get_offloaded_device(module: torch.nn.Module) -> torch.device:
+    """
+    :param module: module to check
+    :return: device module is offloaded to onto after forward pass
+    """
+    if has_offloaded_params(module):
+        first_key = list(module._hf_hook.weights_map.keys())[0]
+        prefix_dataset = module._hf_hook.weights_map.dataset
+        return prefix_dataset[first_key].device
+    return next(module.parameters()).device
+@check_accelerate(fallback=None)
+def update_prefix_dict(module: torch.nn.Module, key: str, data: torch.Tensor):
+    """
+    Updates the offloaded state dict for a given module. Parameter named key is replaced
+    by data. This is neccesary because parameter updates for offloaded modules do not
+    persist automatically between loads. This function only affects the offloaded
+    state dict and not the current state of the loaded module.
+    :param module: module containing the parameter to update
+    :param key: name of parameter to update
+    :param data: tensor to update parameter with in the offloaded state dict
+    """
+    if not has_offloaded_params(module):
+        raise ValueError("Prefix dict is only applicable to offloaded modules")
+    weights_map = module._hf_hook.weights_map
+    offload_to_weights_map(weights_map, key, data)
+def update_parameter_data(
+    module: torch.nn.Module, new_param_data: torch.Tensor, param_name: str
+):
+    """
+    Update the data of an existing parameter and its offload dict. Supports both
+    parameters of offloaded modules and non-offloaded modules
+    :param module: module containing the parameter to update
+    :param new_param_data: tensor to update parameter with
+    :param param_name: name of module parameter to update
+    """
+    update_offload_parameter(module, param_name, new_param_data)
+""" Candidates for Upstreaming """
+def register_offload_parameter(
+    module: torch.nn.Module,
+    name: str,
+    parameter: torch.nn.Parameter,
+    offload_device: Optional[Union[torch.device, Literal["disk"]]] = None,
+):
+    """
+    Register a parameter to the given module which may be offloaded
+    :param module: maybe offloaded module
+    :param name: name of newly registered parameter
+    :param parameter: parameter being registered
+    :param offload_device: device on which weight will be offloaded to. If None is
+        provided, then infer device from parameters on module
+    """
+    has_onload = any(p.device != torch.device("meta") for p in module.parameters())
+    module.register_parameter(name, parameter)
+    if has_offloaded_params(module):
+        weights_map = module._hf_hook.weights_map
+        offload_to_weights_map(weights_map, name, parameter.data, offload_device)
+        if not has_onload:
+            set_module_tensor_to_device(module, name, "meta")
+def update_offload_parameter(
+    module: torch.nn.Module,
+    name: str,
+    data: Optional[torch.Tensor],
+    offload_device: Optional[Union[torch.device, Literal["disk"]]] = None,
+):
+    """
+    Update the data of an existing parameter and its offload dict. Supports both
+    parameters of offloaded modules and non-offloaded modules
+    :param module: module containing the parameter to update
+    :param name: name of module parameter to update
+    :param data: tensor to update parameter with
+    :param offload_device: device on which weight will be offloaded to. If None is
+        provided, then infer device from parameters on module
+    """
+    param = getattr(module, name)
+    data = data.to(param.dtype)
+    # copy data into onloaded parameter if applicable
+    if param.device != "meta":
+        param.data.copy_(data)
+    # update offload dict
+    if has_offloaded_params(module):
+        weights_map = module._hf_hook.weights_map
+        offload_to_weights_map(weights_map, name, data, offload_device)
+def delete_offload_parameter(module: torch.nn.Module, name: str):
+    """
+    Delete a parameter from a module which may be offloaded
+    :param module: maybe offloaded module
+    :param name: name of parameter being deleted
+    """
+    delattr(module, name)
+    if has_offloaded_params(module):
+        weights_map = module._hf_hook.weights_map
+        delete_from_weights_map(weights_map, name)
+@check_accelerate(fallback=contextlib.nullcontext())
+@contextlib.contextmanager
+def disable_hf_hook(module: torch.nn.Module):
+    hooks = {}
+    def collect_hooks(module):
+        nonlocal hooks
+        if hasattr(module, "_hf_hook"):
+            hooks[module] = module._hf_hook
+            remove_hook_from_module(module)
+    module.apply(collect_hooks)
+    yield
+    for submodule, hook in hooks.items():
+        add_hook_to_module(submodule, hook)
+@check_accelerate(fallback=None)
+def offload_to_weights_map(
+    weights_map: Union[PrefixedDataset, Dict, OffloadedWeightsLoader],
+    key: str,
+    value: torch.Tensor,
+    offload_device: Optional[Union[torch.device, Literal["disk"]]] = None,
+):
+    """
+    Helper function which implements offloaded item assignment for PrefixedDataset,
+    OffloadedWeightsLoader, and Dict types.
+    :param weights_map: weight map to be updated with offload information
+    :param key: key used to identify weight location
+    :param value: weight being offloaded
+    :param offload_device: device on which weight will be offloaded to. If None is
+        provided, then infer device from parameters in weights_map
+    """
+    if isinstance(weights_map, PrefixedDataset):
+        if offload_device == "disk":
+            raise ValueError(f"Cannot offload to disk with type {type(weights_map)}")
+        dataset = weights_map.dataset
+        key = f"{weights_map.prefix}{key}"
+        offload_to_weights_map(dataset, key, value, offload_device)
+    elif isinstance(weights_map, OffloadedWeightsLoader):
+        if key not in weights_map.all_keys:
+            weights_map.all_keys.append(key)
+        if len(weights_map.index) <= 0 and offload_device != "disk":
+            offload_to_weights_map(weights_map.state_dict, key, value, offload_device)
+        else:
+            raise NotImplementedError(
+                "Updating weights_map with disk offloading is not implemented yet"
+            )
+    elif isinstance(weights_map, dict):
+        if offload_device == "disk":
+            raise ValueError(f"Cannot offload to disk with type {type(weights_map)}")
+        # infer offload device
+        if offload_device is None:
+            if key in weights_map:
+                offload_device = weights_map[key].device
+            else:
+                tens = next(iter(weights_map.values()), None)
+                if tens is None:
+                    raise ValueError(
+                        "Cannot infer offload device from empty weights_map"
+                    )
+                offload_device = tens.device
+        weights_map[key] = value.to(device=offload_device)
+    else:
+        raise NotImplementedError(
+            "Updating offload data not implemented for weights_map of type "
+            f"{type(weights_map)}"
+        )
+@check_accelerate(fallback=None)
+def delete_from_weights_map(
+    weights_map: Union[PrefixedDataset, Dict, OffloadedWeightsLoader],
+    key: str,
+):
+    if isinstance(weights_map, PrefixedDataset):
+        dataset = weights_map.dataset
+        key = f"{weights_map.prefix}{key}"
+        delete_from_weights_map(dataset, key)
+    elif isinstance(weights_map, OffloadedWeightsLoader):
+        if len(weights_map.index) <= 0:
+            delete_from_weights_map(weights_map.state_dict, key)
+        else:
+            raise NotImplementedError(
+                "Delete from weights_map with disk offloading is not implemented yet"
+            )
+    elif isinstance(weights_map, dict):
+        del weights_map[key]
+    else:
+        raise NotImplementedError(
+            "Updating offload data not implemented for weights_map of type "
+            f"{type(weights_map)}"
+        )
+""" Upstreamed Functions """
+# introduced in accelerate v1.1.0
+@check_accelerate(fallback=False)
+def has_offloaded_params(module: torch.nn.Module) -> bool:
+    """
+    Checks if a module has offloaded parameters by checking if the given module has a
+    AlignDevicesHook attached with offloading enabled
+    Args:
+        module (`torch.nn.Module`): The module to check for an offload hook.
+    Returns:
+        bool: `True` if the module has an offload hook and offloading is enabled,
+        `False` otherwise.
+    """
+    return (
+        hasattr(module, "_hf_hook")
+        and isinstance(module._hf_hook, AlignDevicesHook)
+        and module._hf_hook.offload
+    )
+# introduced in accelerate v1.1.0
+@check_accelerate(fallback=contextlib.nullcontext())
+@contextlib.contextmanager
+def align_module_device(
+    module: torch.nn.Module, execution_device: Optional[torch.device] = None
+):
+    """
+    Context manager that moves a module's parameters to the specified execution device.
+    Args:
+        module (`torch.nn.Module`):
+            Module with parameters to align.
+        execution_device (`torch.device`, *optional*):
+            If provided, overrides the module's execution device within the context.
+            Otherwise, use hook execution device or pass
+    """
+    if has_offloaded_params(module):
+        if execution_device is not None:
+            original_device = module._hf_hook.execution_device
+            module._hf_hook.execution_device = execution_device
+        try:
+            module._hf_hook.pre_forward(module)
+            yield
+        finally:
+            module._hf_hook.post_forward(module, None)
+            if execution_device is not None:
+                module._hf_hook.execution_device = original_device
+    elif execution_device is not None:
+        devices = {
+            name: param.device for name, param in module.named_parameters(recurse=False)
+        }
+        try:
+            for name in devices:
+                set_module_tensor_to_device(module, name, execution_device)
+            yield
+        finally:
+            for name, device in devices.items():
+                set_module_tensor_to_device(module, name, device)
+    else:
+        yield

.venv/lib/python3.11/site-packages/compressed_tensors/utils/permutations_24.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy
+import torch
+__all__ = ["get_permutations_24"]
+# Precompute permutations for Marlin24 weight and scale shuffling
+# Originally implemented in nm-vllm/vllm/model_executor/layers/quantization/utils/marlin_24_perms.py # noqa: E501
+#
+# Marlin works on [16*2,64] tiles. The goal of the permutations is to reorder the weight
+# data so that it is compatible with the tensor-core format that is described here:
+# https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#matrix-fragments-for-mma-m16n8k16-with-floating-point-type # noqa: E501
+#
+# As a result of this reordering, the vector loads inside the kernel will get the data
+# as it is needed for tensor-core (without the need to use ldmatrix instructions)
+def get_permutations_24(num_bits):
+    perm_list = []
+    for i in range(32):
+        perm1 = []
+        col = i // 4
+        col_o = col // 2
+        for block in [0, 1]:
+            for row in [
+                2 * (i % 4),
+                2 * (i % 4) + 1,
+                2 * (i % 4 + 4),
+                2 * (i % 4 + 4) + 1,
+            ]:
+                perm1.append(16 * row + col_o * 256 + 8 * (col % 2) + 4 * block)
+        for j in range(4):
+            perm_list.extend([p + 1 * j for p in perm1])
+    perm = numpy.array(perm_list)
+    if num_bits == 4:
+        interleave = numpy.array([0, 2, 4, 6, 1, 3, 5, 7])
+    elif num_bits == 8:
+        interleave = numpy.array([0, 2, 1, 3])
+    else:
+        raise ValueError("num_bits must be 4 or 8, got {}".format(num_bits))
+    perm = perm.reshape((-1, len(interleave)))[:, interleave].ravel()
+    perm = torch.from_numpy(perm)
+    scale_perm = []
+    for i in range(8):
+        scale_perm.extend([i * 8 + j for j in [0, 4, 1, 5, 2, 6, 3, 7]])
+    scale_perm_single = []
+    for i in range(8):
+        scale_perm_single.extend([8 * i + j for j in [0, 1, 2, 3, 4, 5, 6, 7]])
+    return perm, scale_perm, scale_perm_single

.venv/lib/python3.11/site-packages/compressed_tensors/utils/permute.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Set, Tuple
+import torch
+__all__ = ["safe_permute"]
+# these datatypes are missing implementations required for standard permutation
+_EXPERIMENTAL_DTYPES: Set[Tuple[torch.dtype, torch.device]] = set()
+def safe_permute(value: torch.Tensor, perm: torch.Tensor, dim: int = 0) -> torch.Tensor:
+    """
+    Perform out-of-place permutation without using torch.Tensor.index_put_,
+    whose implementation is missing for datatypes such as `torch.float8_e4m3fn`
+    :param value: tensor to permute
+    :param perm: permutation map
+    :param dim: dimension along which to apply permutation
+    :return: permuted value
+    """
+    dtype_tuple = (value.dtype, value.device)
+    if dtype_tuple in _EXPERIMENTAL_DTYPES:
+        return _fallback_permute(value, perm, dim)
+    try:
+        return value[tuple([slice(None)] * dim + [perm])]
+    except RuntimeError:
+        # Mark dtype as experimental if advanced indexing fails
+        _EXPERIMENTAL_DTYPES.add(dtype_tuple)
+        return _fallback_permute(value, perm, dim)
+def _fallback_permute(
+    value: torch.Tensor, perm: torch.Tensor, dim: int
+) -> torch.Tensor:
+    """
+    Fallback permutation method for experimental dtypes.
+    :param value: tensor to permute
+    :param perm: permutation map
+    :param dim: dimension along which to apply permutation
+    :return: permuted value
+    """
+    value_ret = value.clone()  # cannot use zeros_like b/c of missing impl.
+    orig_slices = [slice(None)] * (dim + 1)
+    perm_slices = [slice(None)] * (dim + 1)
+    for index, perm_index in enumerate(perm):
+        orig_slices[dim] = index
+        perm_slices[dim] = perm_index
+        value_ret[tuple(orig_slices)] = value[tuple(perm_slices)]
+    return value_ret

.venv/lib/python3.11/site-packages/compressed_tensors/utils/safetensors_load.py ADDED Viewed

	@@ -0,0 +1,306 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import os
+import re
+import struct
+from typing import Dict, List, Optional, Tuple, Union
+from safetensors import safe_open
+from torch import Tensor
+from transformers.utils import SAFE_WEIGHTS_INDEX_NAME, SAFE_WEIGHTS_NAME, cached_file
+__all__ = [
+    "get_safetensors_folder",
+    "get_safetensors_header",
+    "match_param_name",
+    "merge_names",
+    "get_weight_mappings",
+    "get_nested_weight_mappings",
+    "get_nested_mappings_from_state_dict",
+    "get_quantization_state_dict",
+    "is_quantization_param",
+]
+WeightMappingType = Dict[str, str]
+NestedWeightMappingType = Dict[str, WeightMappingType]
+def get_safetensors_folder(
+    pretrained_model_name_or_path: str, cache_dir: Optional[str] = None
+) -> str:
+    """
+    Given a Hugging Face stub or a local path, return the folder containing the
+    safetensors weight files
+    :param pretrained_model_name_or_path: local path to model or HF stub
+    :param cache_dir: optional cache dir to search through, if none is specified the
+    model will be searched for in the default TRANSFORMERS_CACHE
+    :return: local folder containing model data
+    """
+    if os.path.exists(pretrained_model_name_or_path):
+        # argument is a path to a local folder
+        return os.path.abspath(pretrained_model_name_or_path)
+    safetensors_path = cached_file(
+        pretrained_model_name_or_path,
+        SAFE_WEIGHTS_NAME,
+        cache_dir=cache_dir,
+        _raise_exceptions_for_missing_entries=False,
+    )
+    index_path = cached_file(
+        pretrained_model_name_or_path,
+        SAFE_WEIGHTS_INDEX_NAME,
+        cache_dir=cache_dir,
+        _raise_exceptions_for_missing_entries=False,
+    )
+    if safetensors_path is not None:
+        # found a single cached safetensors file
+        return os.path.split(safetensors_path)[0]
+    if index_path is not None:
+        # found a cached safetensors weight index file
+        return os.path.split(index_path)[0]
+    # model weights could not be found locally or cached from HF Hub
+    raise ValueError(
+        "Could not locate safetensors weight or index file from "
+        f"{pretrained_model_name_or_path}."
+    )
+def get_safetensors_header(safetensors_path: str) -> Dict[str, str]:
+    """
+    Extracts the metadata from a safetensors file as JSON
+    :param safetensors_path: path to a safetensors file
+    :return: dictionary of metadata extracted from the safetensors file
+    """
+    with open(safetensors_path, "rb") as f:
+        length_of_header = struct.unpack("<Q", f.read(8))[0]
+        header_data = f.read(length_of_header)
+        header = json.loads(header_data)
+    return header
+def match_param_name(full_name: str, param_name: str) -> Optional[str]:
+    """
+    Helper function extracting the uncompressed parameterized layer name from a
+    compressed name. Assumes the compressed name was merged using merge_names.
+    :param full_name: full name of parameter in compressed model
+    :param param_name: compression paramater name
+    :return: uncompressed name of the uncompressed parameterized layer
+    """
+    pattern = r"^(.*)\." + param_name + r"$"
+    regex = re.findall(pattern, full_name)
+    if len(regex) == 0:
+        return None
+    return regex[0]
+def merge_names(parent_name: str, child_name: str) -> str:
+    """
+    Helper function for merging an uncompressed parameterized layer name with a
+    compression parameter. Names merged with this function can then be parsed by
+    match_param_name.
+    :param parent_name: uncompressed parameterized layer name
+    :param child_name: compression parameter name
+    :return: merged compressed name
+    """
+    return parent_name + "." + child_name
+def get_weight_mappings(path_to_model_or_tensors: str) -> Dict[str, str]:
+    """
+    Takes a path to a state dict saved in safetensors format and returns a mapping
+    from parameterized layer name to file location.
+    {
+        layer.weight.bitmask: file_location,
+        layer.weight.row_offsets: file_location,
+        layer.weight.shape: file_location,
+        layer.weight.compressed: file_location
+    }
+    This generalizes to cases where the model is split into multiple safetensors files
+    :param path_to_model_or_tensors: path to directory that contains
+        safetensors (must contain either a single file or multiple files with an index),
+        or a path to a single safetensors file
+    :return: mapping of parameterized layer name to file location
+    """
+    if os.path.isfile(path_to_model_or_tensors):
+        # we have a single safetensors file to read
+        header = get_safetensors_header(path_to_model_or_tensors)
+        for key in header.keys():
+            header[key] = path_to_model_or_tensors
+        header.pop("__metadata__", None)
+    else:
+        # we have a directory with multiple safetensors files
+        safetensors_path = os.path.join(path_to_model_or_tensors, SAFE_WEIGHTS_NAME)
+        index_path = os.path.join(path_to_model_or_tensors, SAFE_WEIGHTS_INDEX_NAME)
+        if os.path.exists(safetensors_path):
+            # we have a single safetensors file to read
+            header = get_safetensors_header(safetensors_path)
+            for key in header.keys():
+                header[key] = SAFE_WEIGHTS_NAME
+            header.pop("__metadata__", None)
+        elif os.path.exists(index_path):
+            # we have multiple safetensors file, read from index
+            with open(index_path, "r", encoding="utf-8") as f:
+                index = json.load(f)
+            header = index["weight_map"]
+        else:
+            raise ValueError(
+                "Could not find a safetensors weight "
+                f"or index file at {path_to_model_or_tensors}"
+            )
+        # convert weight locations to full paths
+        for key, value in header.items():
+            header[key] = os.path.join(path_to_model_or_tensors, value)
+    return header
+def get_nested_weight_mappings(
+    model_path: str, params_to_nest: List[str], return_unmatched_params: bool = False
+) -> Union[NestedWeightMappingType, Tuple[NestedWeightMappingType, WeightMappingType]]:
+    """
+    Takes a path to a state dict saved in safetensors format and returns a nested
+    mapping from uncompressed parameterized layer names to the file locations of
+    each layer's compression parameters.
+    Example of the nested mapping:
+    layer: {
+        bitmask: file_location,
+        row_offsets: file_location,
+        shape: file_location,
+        compressed: file_location
+    }
+    If other parameters are found that do not match the nested parameters, they will
+    be returned in a separate dictionary only if return_unmatched_params is True.
+    This dictionary may be needed for cases where compressors are stacked (e.g.,
+    quantization compression followed by sparse compression).
+    Example of the unmatched params mapping:
+    {
+        layer.weight_scale: file_location,
+        layer.input_scale: file_location
+    }
+    This generalizes to cases where the model is split into multiple safetensors
+    files.
+    :param model_path: Path to the safetensors state dict, must contain either a
+        single safetensors file or multiple files with an index.
+    :param params_to_nest: List of parameter names to nest.
+    :param return_unmatched_params: If True, return a second dictionary containing
+        the remaining parameters that were not matched to the params_to_nest.
+    :return:
+        - If return_unmatched_params is False:
+            NestedWeightMappingType: A nested mapping of parameterized layer names to
+            file locations of each layer's compression parameters.
+        - If return_unmatched_params is True:
+            Tuple[NestedWeightMappingType, WeightMappingType]: A tuple containing:
+                - NestedWeightMappingType: A nested mapping of parameterized layer
+                names to file locations of each layer's compression parameters.
+                - WeightMappingType: A mapping of the remaining parameter names to
+                their file locations that were not matched to the params_to_nest.
+    """
+    weight_mappings = get_weight_mappings(model_path)
+    nested_weight_mappings = {}
+    unmatched_params = {}
+    for key, file_location in weight_mappings.items():
+        matched = False
+        for param_name in params_to_nest:
+            dense_param = match_param_name(key, param_name)
+            if dense_param:
+                if dense_param not in nested_weight_mappings:
+                    nested_weight_mappings[dense_param] = {}
+                nested_weight_mappings[dense_param][param_name] = file_location
+                matched = True
+        if return_unmatched_params and not matched:
+            unmatched_params[key] = file_location
+    if return_unmatched_params:
+        return nested_weight_mappings, unmatched_params
+    return nested_weight_mappings
+def get_nested_mappings_from_state_dict(
+    state_dict, params_to_nest
+) -> NestedWeightMappingType:
+    """
+    Takes a state dict and returns a nested mapping from uncompressed
+    parameterized layer names to the value of
+    each layer's compression parameters.
+    Example of the nested mapping:
+    layer: {
+        weight_scale: ...,
+        weight: ...,
+        zero_point: ...,
+    }
+    :param state_dict: state dict of the model
+    :param params_to_nest: List of parameter names to nest.
+    :return: Nested mapping of parameterized layer names to the value of
+        each layer's compression parameters.
+    """
+    nested_weight_mappings = {}
+    for key in state_dict.keys():
+        for param_name in params_to_nest:
+            dense_param = match_param_name(key, param_name)
+            if dense_param:
+                if dense_param not in nested_weight_mappings:
+                    nested_weight_mappings[dense_param] = {}
+                nested_weight_mappings[dense_param][param_name] = state_dict[key]
+    return nested_weight_mappings
+def get_quantization_state_dict(model_path: str) -> Dict[str, Tensor]:
+    weight_mappings = get_weight_mappings(model_path)
+    state_dict = {}
+    for weight_name, safe_path in weight_mappings.items():
+        if not is_quantization_param(weight_name):
+            continue
+        with safe_open(safe_path, framework="pt", device="cpu") as f:
+            state_dict[weight_name] = f.get_tensor(weight_name)
+    return state_dict
+def is_quantization_param(name: str) -> bool:
+    """
+    Checks is a parameter name is associated with a quantization parameter
+    :param name: parameter name to check
+    :return: True if parameter name is a quantization parameter, else False
+    """
+    if name.endswith("_scale"):
+        return True
+    if name.endswith("zero_point"):
+        return True
+    if name.endswith("g_idx"):
+        return True
+    return False

.venv/lib/python3.11/site-packages/compressed_tensors/utils/semi_structured_conversions.py ADDED Viewed

	@@ -0,0 +1,342 @@

+#
+# Modified by Roberto Lopez Castro (roberto.lopez.castro@udc.es).
+# Pulled from nm-vllm/vllm/model_executor/layers/quantization/utils/format_24.py
+#
+# flake8: noqa
+# isort: skip_file
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+__all__ = [
+    "sparse_semi_structured_from_dense_cutlass",
+    "sparse_semi_structured_to_dense_cutlass",
+    "mask_creator",
+]
+# This is PyTorch implementation of main part of reorder_meta()
+# function, from tools/util/include/cutlass/util/host_reorder.h file
+# of CUTLASS source tree.  Furthermore, CUTLASS template for sparse
+# GEMM decides upon layout of this matrix, and at the moment for the
+# sparse GEMM executed on tensor cores, this is layout described by
+# ColumnMajorInterleaved<2> data structure, in
+# include/cutlass/layout/matrix.h of CUTLASS source tree.  The
+# reordering of meta matrix into meta_reordered matrix calculated
+# according to these segments of CUTLASS code is re-implemented here.
+# Note that this calculation produces offsets for scattering metadata
+# matrix elements into reordered metadata matrix elements (or,
+# equivalently, for gathering reordered metadata matrix element back
+# into metadata matrix elements).
+def _calculate_meta_reordering_scatter_offsets(m, meta_ncols, meta_dtype, device):
+    dst_rows = torch.arange(0, m, device=device)[:, None].repeat(1, meta_ncols)
+    dst_cols = torch.arange(0, meta_ncols, device=device).repeat(m, 1)
+    # Reorder the rows, then swizzle the 2x2 blocks.
+    group_x = 64
+    group_y = 32 if meta_dtype.itemsize == 2 else 16
+    dst_rows = (
+        dst_rows // group_x * group_x
+        + (dst_rows % 2) * 2
+        + (dst_rows % 8) // 4
+        + ((dst_rows % group_y) % 4) // 2 * 32
+        + ((dst_rows % group_x) // 8) * 4
+    )
+    topright = ((dst_rows % 2 == 0) & (dst_cols % 2 == 1)).to(torch.int8)
+    bottomleft = ((dst_rows % 2 == 1) & (dst_cols % 2 == 0)).to(torch.int8)
+    dst_rows += topright - bottomleft
+    dst_cols -= topright - bottomleft
+    # Assumed that meta tensor is to be stored in CUTLASS
+    # InterleavedColumnMajor layout, and reverse engineered
+    # corresponding code to store values into this tensor.
+    interleave = 2
+    cols_maj = dst_cols // interleave
+    cols_min = dst_cols % interleave
+    return (cols_maj * m * interleave + dst_rows * interleave + cols_min).view(-1)
+# This function converts dense matrix into sparse semi-structured
+# representation, producing "compressed" matrix, in the layout used by
+# CUTLASS backend, and corresponding metadata matrix.
+def sparse_semi_structured_from_dense_cutlass(dense):
+    if dense.dim() != 2:
+        raise RuntimeError(
+            f"Expected 2-dimensional dense tensor, got {dense.dim()}-dimensional tensor"  # noqa: E501
+        )
+    m, k = dense.shape
+    device = dense.device
+    meta_dtype = torch.int8
+    if dense.dtype == torch.int8:
+        meta_dtype = torch.int32
+    elif dense.dtype in [torch.half, torch.bfloat16, torch.float, torch.int32]:
+        meta_dtype = torch.int16
+    else:
+        raise RuntimeError(f"Invalid datatype {dense.dtype} of dense matrix")
+    quadbits_per_meta_elem = meta_dtype.itemsize * 8 // 4
+    if quadbits_per_meta_elem not in (4, 8):
+        raise RuntimeError("Invalid number of elements per meta element calculated")
+    if meta_dtype == torch.int32:
+        if m % 16 != 0:
+            raise RuntimeError(
+                f"Number of rows of dense matrix {m} must be divisible by 16"
+            )
+    else:
+        if m % 32 != 0:
+            raise RuntimeError(
+                f"Number of rows of dense matrix {m} must be divisible by 32"
+            )
+    if k % (4 * quadbits_per_meta_elem) != 0:
+        raise RuntimeError(
+            f"Number of columns of dense matrix {k} must be divisible by {4 * quadbits_per_meta_elem}"  # noqa: E501
+        )
+    if dense.dtype != torch.float:
+        ksparse = 4
+        dense_4 = dense.view(-1, k // ksparse, ksparse)
+        m0, m1, m2, m3 = (dense_4 != 0).unbind(-1)
+    else:
+        ksparse = 2
+        dense_2 = dense.view(-1, k // ksparse, ksparse)
+        m0, m2 = m1, m3 = (dense_2 != 0).unbind(-1)
+    meta_ncols = k // (ksparse * quadbits_per_meta_elem)
+    # Encoding quadruples of True/False values as follows:
+    #     [True,  True,  False, False] -> 0b0100
+    #     [True,  False, True,  False] -> 0b1000
+    #     [False, True,  True,  False] -> 0b1001
+    #     [True,  False, False, True ] -> 0b1100
+    #     [False, True,  False, True ] -> 0b1101
+    #     [False, False, True,  True ] -> 0b1110
+    # Thus, lower two bits in the encoding are index of the True value
+    # at the lowest index in the quadruple, and the higher two bits in
+    # the encoding are index of the other True value in the quadruple.
+    # In case there are less than two True values, than False value or
+    # values at some index or indices are considered True for the
+    # encoding.  In case there are more than two True values, then the
+    # excess True value(s) at some indices are considered False for
+    # the encoding.  The exact encodings used for these cases are as
+    # follows:
+    #     [False, False, False, False] -> 0b1110
+    #     [False, False, False, True ] -> 0b1110
+    #     [False, False, True,  False] -> 0b1110
+    #     [False, True,  False, False] -> 0b1001
+    #     [False, True,  True,  True ] -> 0b1101
+    #     [True,  False, False, False] -> 0b1000
+    #     [True,  False, True,  True ] -> 0b1100
+    #     [True,  True,  False, True ] -> 0b0100
+    #     [True,  True,  True,  False] -> 0b0100
+    #     [True,  True,  True,  True ] -> 0b0100
+    # These particular encodings are chosen, with the help of Espresso
+    # logic minimizer software, for the purpose of minimization of
+    # corresponding Boolean functions, that translate non-zero flags
+    # into encoding bits.  Note also possible choices for the first
+    # and last of these encodings were limited only to (0b0100,
+    # 0b1110), in order to produce valid encodings for 1:2 sparsity
+    # case.
+    expr0 = m0 & m1
+    expr1 = ~m0 & m1
+    expr2 = ~m0 & ~m1
+    bit0 = expr1
+    bit1 = expr2
+    bit2 = expr0 | expr2 | m3
+    bit3 = expr1 | ~m1
+    idxs0 = bit0 | (bit1.to(torch.int64) << 1)
+    idxs1 = bit2 | (bit3.to(torch.int64) << 1)
+    if dense.dtype != torch.float:
+        sparse0 = dense_4.gather(
+            -1, idxs0.unsqueeze(-1)
+        )  # type: ignore[possibly-undefined]
+        sparse1 = dense_4.gather(-1, idxs1.unsqueeze(-1))
+        sparse = torch.stack((sparse0, sparse1), dim=-1).view(m, k // 2)
+    else:
+        sparse = dense_2.gather(-1, idxs0.unsqueeze(-1) // 2).view(
+            m, k // 2
+        )  # type: ignore[possibly-undefined]
+    meta_4 = idxs0 | (idxs1 << 2)
+    meta_n = meta_4.view((-1, meta_ncols, quadbits_per_meta_elem)).to(meta_dtype)
+    if quadbits_per_meta_elem == 4:
+        meta = (
+            meta_n[:, :, 0]
+            | (meta_n[:, :, 1] << 4)
+            | (meta_n[:, :, 2] << 8)
+            | (meta_n[:, :, 3] << 12)
+        )
+    elif quadbits_per_meta_elem == 8:
+        meta = (
+            meta_n[:, :, 0]
+            | (meta_n[:, :, 1] << 4)
+            | (meta_n[:, :, 2] << 8)
+            | (meta_n[:, :, 3] << 12)
+            | (meta_n[:, :, 4] << 16)
+            | (meta_n[:, :, 5] << 20)
+            | (meta_n[:, :, 6] << 24)
+            | (meta_n[:, :, 7] << 28)
+        )
+    # Reorder meta tensor elements.
+    meta_reordered = meta.new_empty(
+        (m * meta_ncols,)
+    )  # type: ignore[possibly-undefined]
+    meta_offsets = _calculate_meta_reordering_scatter_offsets(
+        m, meta_ncols, meta_dtype, device
+    )
+    meta_reordered.scatter_(0, meta_offsets, meta.view(-1))
+    return (sparse, meta_reordered.view(m, meta_ncols))
+# This function performs reverse of the function above - it
+# reconstructs dense matrix from a pair of "compressed" matrix, given
+# in the layout used by CUTLASS backend, and accompanying metadata
+# matrix.
+def sparse_semi_structured_to_dense_cutlass(sparse, meta_reordered):
+    if sparse.dim() != 2:
+        raise RuntimeError(
+            f"Expected 2-dimensional sparse tensor, got {sparse.dim()}-dimensional tensor"  # noqa: E501
+        )
+    m, k = sparse.shape
+    device = sparse.device
+    if meta_reordered.dim() != 2:
+        raise RuntimeError(
+            f"Expected 2-dimensional meta tensor, got {meta_reordered.dim()}-dimensional tensor"  # noqa: E501
+        )
+    if meta_reordered.device != device:
+        raise RuntimeError(
+            f"Expected meta matrix to be on {device} device, got matrix on {meta_reordered.device} device"  # noqa: E501
+        )
+    meta_dtype = meta_reordered.dtype
+    if meta_dtype not in (torch.int16, torch.int32):
+        raise RuntimeError(f"Invalid datatype {meta_dtype} of meta matrix")
+    quadbits_per_meta_elem = meta_dtype.itemsize * 8 // 4
+    ksparse = 4 if sparse.dtype != torch.float else 2
+    meta_nrows, meta_ncols = meta_reordered.shape
+    if meta_nrows != m:
+        raise RuntimeError(
+            f"Number of rows of meta matrix {meta_nrows} must be equal to number of columns of spase matrix {m}"  # noqa: E501
+        )
+    if meta_ncols * ksparse * quadbits_per_meta_elem != 2 * k:
+        raise RuntimeError(
+            f"Number of columns of sparse matrix {k} different from the {meta_ncols * ksparse * quadbits_per_meta_elem // 2}, "  # noqa: E501
+            "expected according to the number of columns of meta matrix"
+        )
+    # Undo meta tensor elements reordering.
+    meta_offsets = _calculate_meta_reordering_scatter_offsets(
+        m, meta_ncols, meta_dtype, device
+    )
+    meta = torch.gather(meta_reordered.view(-1), 0, meta_offsets).view(m, meta_ncols)
+    # Unpack sparse tensor back to original dense tensor, using
+    # information provided by meta tensor.  Note that torch.float
+    # datatype is handled pretty much the same as
+    # torch.half/torch.bfloat16, as metadata for a pair of torch.float
+    # value is encoded as if underlying 8 bytes contain four
+    # torch.half/torch.bfloat16 values, where either first two or last
+    # two are zeros.
+    meta_2 = torch.empty(
+        (m, meta_ncols, 2 * quadbits_per_meta_elem),
+        dtype=meta_dtype,
+        device=device,
+    )
+    if quadbits_per_meta_elem == 4:
+        meta_2[:, :, 0] = meta & 0b11
+        meta_2[:, :, 1] = (meta >> 2) & 0b11
+        meta_2[:, :, 2] = (meta >> 4) & 0b11
+        meta_2[:, :, 3] = (meta >> 6) & 0b11
+        meta_2[:, :, 4] = (meta >> 8) & 0b11
+        meta_2[:, :, 5] = (meta >> 10) & 0b11
+        meta_2[:, :, 6] = (meta >> 12) & 0b11
+        meta_2[:, :, 7] = (meta >> 14) & 0b11
+    elif quadbits_per_meta_elem == 8:
+        meta_2[:, :, 0] = meta & 0b11
+        meta_2[:, :, 1] = (meta >> 2) & 0b11
+        meta_2[:, :, 2] = (meta >> 4) & 0b11
+        meta_2[:, :, 3] = (meta >> 6) & 0b11
+        meta_2[:, :, 4] = (meta >> 8) & 0b11
+        meta_2[:, :, 5] = (meta >> 10) & 0b11
+        meta_2[:, :, 6] = (meta >> 12) & 0b11
+        meta_2[:, :, 7] = (meta >> 14) & 0b11
+        meta_2[:, :, 8] = (meta >> 16) & 0b11
+        meta_2[:, :, 9] = (meta >> 18) & 0b11
+        meta_2[:, :, 10] = (meta >> 20) & 0b11
+        meta_2[:, :, 11] = (meta >> 22) & 0b11
+        meta_2[:, :, 12] = (meta >> 24) & 0b11
+        meta_2[:, :, 13] = (meta >> 26) & 0b11
+        meta_2[:, :, 14] = (meta >> 28) & 0b11
+        meta_2[:, :, 15] = (meta >> 30) & 0b11
+    dense_offsets = meta_2.view(-1) + (
+        torch.arange(0, 2 * m * k // ksparse, device=device) * 4
+    ).view(-1, 1).repeat(1, 2).view(-1)
+    dense = torch.zeros((m * 2 * k,), dtype=sparse.dtype, device=device)
+    if sparse.dtype != torch.float:
+        # dense.scatter_(0, dense_offsets, sparse.view(-1))
+        dense.scatter_(0, dense_offsets, sparse.reshape(-1))
+    else:
+        dense.view(torch.half).scatter_(
+            0, dense_offsets, sparse.view(torch.half).view(-1)
+        )
+    return dense.view(m, 2 * k)
+def mask_creator(tensor):
+    """
+    Class for creating N:M sparsity masks.
+    Masks will be created using the N:M ratio, where for every block of
+    M weights, N will be pruned based on ranked weight value. Each mask
+    will correspond to the given tensor.
+    :param N: The number of weights in a group to keep
+    :param M: The size of a weight group
+    """
+    N = 2
+    M = 4
+    mask = None
+    # for i, tensor in enumerate(tensors):
+    if tensor.numel() % M != 0:
+        raise ValueError(
+            f"Tensor of size {tensor.shape} can't be evenly divided into " f"{M} groups"
+        )
+    num_groups = tensor.numel() // M
+    # N:M sparsity for linear layers
+    tensor_temp = tensor.detach().abs().reshape(num_groups, M)
+    index = torch.argsort(tensor_temp, dim=1)[:, : int(M - N)]
+    w_b = torch.ones(tensor_temp.shape, device=tensor_temp.device)
+    mask = w_b.scatter_(dim=1, index=index, value=0).reshape(tensor.shape)
+    return mask

.venv/lib/python3.11/site-packages/dotenv/__init__.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from typing import Any, Optional
+from .main import (dotenv_values, find_dotenv, get_key, load_dotenv, set_key,
+                   unset_key)
+def load_ipython_extension(ipython: Any) -> None:
+    from .ipython import load_ipython_extension
+    load_ipython_extension(ipython)
+def get_cli_string(
+    path: Optional[str] = None,
+    action: Optional[str] = None,
+    key: Optional[str] = None,
+    value: Optional[str] = None,
+    quote: Optional[str] = None,
+):
+    """Returns a string suitable for running as a shell script.
+    Useful for converting a arguments passed to a fabric task
+    to be passed to a `local` or `run` command.
+    """
+    command = ['dotenv']
+    if quote:
+        command.append(f'-q {quote}')
+    if path:
+        command.append(f'-f {path}')
+    if action:
+        command.append(action)
+        if key:
+            command.append(key)
+            if value:
+                if ' ' in value:
+                    command.append(f'"{value}"')
+                else:
+                    command.append(value)
+    return ' '.join(command).strip()
+__all__ = ['get_cli_string',
+           'load_dotenv',
+           'dotenv_values',
+           'get_key',
+           'set_key',
+           'unset_key',
+           'find_dotenv',
+           'load_ipython_extension']

.venv/lib/python3.11/site-packages/dotenv/__main__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+"""Entry point for cli, enables execution with `python -m dotenv`"""
+from .cli import cli
+if __name__ == "__main__":
+    cli()

.venv/lib/python3.11/site-packages/dotenv/__pycache__/__main__.cpython-311.pyc ADDED Viewed

Binary file (386 Bytes). View file

.venv/lib/python3.11/site-packages/dotenv/__pycache__/cli.cpython-311.pyc ADDED Viewed

Binary file (10.9 kB). View file

.venv/lib/python3.11/site-packages/dotenv/__pycache__/ipython.cpython-311.pyc ADDED Viewed

Binary file (2.3 kB). View file

.venv/lib/python3.11/site-packages/dotenv/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (18.1 kB). View file