koichi12 commited on Feb 12, 2025

Commit

4ac3d46

verified ·

1 Parent(s): d33aea4

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.venv/lib/python3.11/site-packages/OpenSSL/SSL.py +0 -0
.venv/lib/python3.11/site-packages/OpenSSL/__init__.py +31 -0
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/_util.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/debug.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/rand.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/version.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/OpenSSL/_util.py +124 -0
.venv/lib/python3.11/site-packages/OpenSSL/crypto.py +0 -0
.venv/lib/python3.11/site-packages/OpenSSL/debug.py +40 -0
.venv/lib/python3.11/site-packages/OpenSSL/py.typed +0 -0
.venv/lib/python3.11/site-packages/OpenSSL/rand.py +40 -0
.venv/lib/python3.11/site-packages/OpenSSL/version.py +28 -0
.venv/lib/python3.11/site-packages/compressed_tensors/__init__.py +22 -0
.venv/lib/python3.11/site-packages/compressed_tensors/base.py +20 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/__init__.py +22 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/base.py +188 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/helpers.py +137 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__init__.py +18 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/base.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/naive_quantized.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/pack_quantized.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/base.py +176 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +142 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +213 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__init__.py +19 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/base.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/dense.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_24_bitmask.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_bitmask.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/base.py +148 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/dense.py +34 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +240 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +163 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +16 -0
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +251 -0
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__init__.py +21 -0
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_args.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_config.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_scheme.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__init__.py +22 -0
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/__init__.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/apply.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/compressed.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/forward.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/helpers.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/initialize.cpython-311.pyc +0 -0

.venv/lib/python3.11/site-packages/OpenSSL/SSL.py ADDED Viewed

The diff for this file is too large to render. See raw diff

.venv/lib/python3.11/site-packages/OpenSSL/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+# Copyright (C) AB Strakt
+# See LICENSE for details.
+"""
+pyOpenSSL - A simple wrapper around the OpenSSL library
+"""
+from OpenSSL import SSL, crypto
+from OpenSSL.version import (
+    __author__,
+    __copyright__,
+    __email__,
+    __license__,
+    __summary__,
+    __title__,
+    __uri__,
+    __version__,
+)
+__all__ = [
+    "SSL",
+    "crypto",
+    "__author__",
+    "__copyright__",
+    "__email__",
+    "__license__",
+    "__summary__",
+    "__title__",
+    "__uri__",
+    "__version__",
+]

.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (716 Bytes). View file

.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/_util.cpython-311.pyc ADDED Viewed

Binary file (5.28 kB). View file

.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/debug.cpython-311.pyc ADDED Viewed

Binary file (1.67 kB). View file

.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/rand.cpython-311.pyc ADDED Viewed

Binary file (1.75 kB). View file

.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/version.cpython-311.pyc ADDED Viewed

Binary file (712 Bytes). View file

.venv/lib/python3.11/site-packages/OpenSSL/_util.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import os
+import sys
+import warnings
+from typing import Any, Callable, NoReturn, Type, Union
+from cryptography.hazmat.bindings.openssl.binding import Binding
+StrOrBytesPath = Union[str, bytes, os.PathLike]
+binding = Binding()
+ffi = binding.ffi
+lib = binding.lib
+# This is a special CFFI allocator that does not bother to zero its memory
+# after allocation. This has vastly better performance on large allocations and
+# so should be used whenever we don't need the memory zeroed out.
+no_zero_allocator = ffi.new_allocator(should_clear_after_alloc=False)
+def text(charp: Any) -> str:
+    """
+    Get a native string type representing of the given CFFI ``char*`` object.
+    :param charp: A C-style string represented using CFFI.
+    :return: :class:`str`
+    """
+    if not charp:
+        return ""
+    return ffi.string(charp).decode("utf-8")
+def exception_from_error_queue(exception_type: Type[Exception]) -> NoReturn:
+    """
+    Convert an OpenSSL library failure into a Python exception.
+    When a call to the native OpenSSL library fails, this is usually signalled
+    by the return value, and an error code is stored in an error queue
+    associated with the current thread. The err library provides functions to
+    obtain these error codes and textual error messages.
+    """
+    errors = []
+    while True:
+        error = lib.ERR_get_error()
+        if error == 0:
+            break
+        errors.append(
+            (
+                text(lib.ERR_lib_error_string(error)),
+                text(lib.ERR_func_error_string(error)),
+                text(lib.ERR_reason_error_string(error)),
+            )
+        )
+    raise exception_type(errors)
+def make_assert(error: Type[Exception]) -> Callable[[bool], Any]:
+    """
+    Create an assert function that uses :func:`exception_from_error_queue` to
+    raise an exception wrapped by *error*.
+    """
+    def openssl_assert(ok: bool) -> None:
+        """
+        If *ok* is not True, retrieve the error from OpenSSL and raise it.
+        """
+        if ok is not True:
+            exception_from_error_queue(error)
+    return openssl_assert
+def path_bytes(s: StrOrBytesPath) -> bytes:
+    """
+    Convert a Python path to a :py:class:`bytes` for the path which can be
+    passed into an OpenSSL API accepting a filename.
+    :param s: A path (valid for os.fspath).
+    :return: An instance of :py:class:`bytes`.
+    """
+    b = os.fspath(s)
+    if isinstance(b, str):
+        return b.encode(sys.getfilesystemencoding())
+    else:
+        return b
+def byte_string(s: str) -> bytes:
+    return s.encode("charmap")
+# A marker object to observe whether some optional arguments are passed any
+# value or not.
+UNSPECIFIED = object()
+_TEXT_WARNING = "str for {0} is no longer accepted, use bytes"
+def text_to_bytes_and_warn(label: str, obj: Any) -> Any:
+    """
+    If ``obj`` is text, emit a warning that it should be bytes instead and try
+    to convert it to bytes automatically.
+    :param str label: The name of the parameter from which ``obj`` was taken
+        (so a developer can easily find the source of the problem and correct
+        it).
+    :return: If ``obj`` is the text string type, a ``bytes`` object giving the
+        UTF-8 encoding of that text is returned.  Otherwise, ``obj`` itself is
+        returned.
+    """
+    if isinstance(obj, str):
+        warnings.warn(
+            _TEXT_WARNING.format(label),
+            category=DeprecationWarning,
+            stacklevel=3,
+        )
+        return obj.encode("utf-8")
+    return obj

.venv/lib/python3.11/site-packages/OpenSSL/crypto.py ADDED Viewed

The diff for this file is too large to render. See raw diff

.venv/lib/python3.11/site-packages/OpenSSL/debug.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import ssl
+import sys
+import cffi
+import cryptography
+import OpenSSL.SSL
+from . import version
+_env_info = """\
+pyOpenSSL: {pyopenssl}
+cryptography: {cryptography}
+cffi: {cffi}
+cryptography's compiled against OpenSSL: {crypto_openssl_compile}
+cryptography's linked OpenSSL: {crypto_openssl_link}
+Python's OpenSSL: {python_openssl}
+Python executable: {python}
+Python version: {python_version}
+Platform: {platform}
+sys.path: {sys_path}""".format(
+    pyopenssl=version.__version__,
+    crypto_openssl_compile=OpenSSL._util.ffi.string(
+        OpenSSL._util.lib.OPENSSL_VERSION_TEXT,
+    ).decode("ascii"),
+    crypto_openssl_link=OpenSSL.SSL.SSLeay_version(
+        OpenSSL.SSL.SSLEAY_VERSION
+    ).decode("ascii"),
+    python_openssl=getattr(ssl, "OPENSSL_VERSION", "n/a"),
+    cryptography=cryptography.__version__,
+    cffi=cffi.__version__,
+    python=sys.executable,
+    python_version=sys.version,
+    platform=sys.platform,
+    sys_path=sys.path,
+)
+if __name__ == "__main__":
+    print(_env_info)

.venv/lib/python3.11/site-packages/OpenSSL/py.typed ADDED Viewed

File without changes

.venv/lib/python3.11/site-packages/OpenSSL/rand.py ADDED Viewed

	@@ -0,0 +1,40 @@

+"""
+PRNG management routines, thin wrappers.
+"""
+from OpenSSL._util import lib as _lib
+def add(buffer: bytes, entropy: int) -> None:
+    """
+    Mix bytes from *string* into the PRNG state.
+    The *entropy* argument is (the lower bound of) an estimate of how much
+    randomness is contained in *string*, measured in bytes.
+    For more information, see e.g. :rfc:`1750`.
+    This function is only relevant if you are forking Python processes and
+    need to reseed the CSPRNG after fork.
+    :param buffer: Buffer with random data.
+    :param entropy: The entropy (in bytes) measurement of the buffer.
+    :return: :obj:`None`
+    """
+    if not isinstance(buffer, bytes):
+        raise TypeError("buffer must be a byte string")
+    if not isinstance(entropy, int):
+        raise TypeError("entropy must be an integer")
+    _lib.RAND_add(buffer, len(buffer), entropy)
+def status() -> int:
+    """
+    Check whether the PRNG has been seeded with enough data.
+    :return: 1 if the PRNG is seeded enough, 0 otherwise.
+    """
+    return _lib.RAND_status()

.venv/lib/python3.11/site-packages/OpenSSL/version.py ADDED Viewed

	@@ -0,0 +1,28 @@

+# Copyright (C) AB Strakt
+# Copyright (C) Jean-Paul Calderone
+# See LICENSE for details.
+"""
+pyOpenSSL - A simple wrapper around the OpenSSL library
+"""
+__all__ = [
+    "__author__",
+    "__copyright__",
+    "__email__",
+    "__license__",
+    "__summary__",
+    "__title__",
+    "__uri__",
+    "__version__",
+]
+__version__ = "24.2.1"
+__title__ = "pyOpenSSL"
+__uri__ = "https://pyopenssl.org/"
+__summary__ = "Python wrapper module around the OpenSSL library"
+__author__ = "The pyOpenSSL developers"
+__email__ = "cryptography-dev@python.org"
+__license__ = "Apache License, Version 2.0"
+__copyright__ = f"Copyright 2001-2024 {__author__}"

.venv/lib/python3.11/site-packages/compressed_tensors/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .base import *
+# flake8: noqa
+from .compressors import *
+from .config import *
+from .quantization import QuantizationConfig, QuantizationStatus
+from .utils import *
+from .version import *

.venv/lib/python3.11/site-packages/compressed_tensors/base.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+SPARSITY_CONFIG_NAME = "sparsity_config"
+QUANTIZATION_CONFIG_NAME = "quantization_config"
+COMPRESSION_CONFIG_NAME = "compression_config"
+KV_CACHE_SCHEME_NAME = "kv_cache_scheme"
+COMPRESSION_VERSION_NAME = "version"
+QUANTIZATION_METHOD_NAME = "quant_method"

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .base import *
+from .helpers import *
+from .model_compressors import *
+from .quantized_compressors import *
+from .sparse_compressors import *
+from .sparse_quantized_compressors import *

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/base.py ADDED Viewed

	@@ -0,0 +1,188 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABC, abstractmethod
+from typing import Dict, Generator, Optional, Tuple, Union
+import torch
+from compressed_tensors.config import SparsityCompressionConfig
+from compressed_tensors.quantization import QuantizationArgs, QuantizationConfig
+from compressed_tensors.registry import RegistryMixin
+from torch import Tensor
+from torch.nn import Module
+__all__ = ["BaseCompressor"]
+class BaseCompressor(RegistryMixin, ABC):
+    """
+    Base class representing a model compression algorithm. Each child class should
+    implement compression_param_info, compress_weight and decompress_weight.
+    Compressors support compressing/decompressing a full module state dict or a single
+    quantized PyTorch leaf module.
+    Model Load Lifecycle (run_compressed=False):
+        - ModelCompressor.decompress()
+            - apply_quantization_config()
+            - BaseCompressor.decompress()
+    Model Save Lifecycle:
+        - ModelCompressor.compress()
+            - BaseCompressor.compress()
+    Module Lifecycle (run_compressed=True):
+        - apply_quantization_config()
+        - compressed_module = CompressedLinear(module)
+            - initialize_module_for_quantization()
+            - BaseCompressor.compression_param_info()
+            - register_parameters()
+        - compressed_module.forward()
+            -compressed_module.decompress()
+    :param config: config specifying compression parameters
+    """
+    def __init__(
+        self, config: Union[SparsityCompressionConfig, QuantizationConfig, None] = None
+    ):
+        self.config = config
+    def compression_param_info(
+        self,
+        weight_shape: torch.Size,
+        quantization_args: Optional[QuantizationArgs] = None,
+    ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
+        """
+        Creates a dictionary of expected shapes and dtypes for each compression
+            parameter used by the compressor
+        :param weight_shape: uncompressed weight shape
+        :param quantization_args: quantization parameters for the weight
+        :return: dictionary mapping compressed parameter names to shape and dtype
+        """
+        raise NotImplementedError()
+    @abstractmethod
+    def compress(
+        self,
+        model_state: Dict[str, Tensor],
+        **kwargs,
+    ) -> Dict[str, Tensor]:
+        """
+        Compresses a dense state dict
+        :param model_state: state dict of uncompressed model
+        :param kwargs: additional arguments for compression
+        :return: compressed state dict
+        """
+        raise NotImplementedError()
+    @abstractmethod
+    def decompress(
+        self,
+        path_to_model_or_tensors: str,
+        device: str = "cpu",
+        **kwargs,
+    ) -> Generator[Tuple[str, Tensor], None, None]:
+        """
+        Reads a compressed state dict located at path_to_model_or_tensors
+        and returns a generator for sequentially decompressing back to a
+        dense state dict
+        :param path_to_model_or_tensors: path to compressed safetensors model (directory
+            with one or more safetensors files) or compressed tensors file
+        :param names_to_scheme: quantization args for each quantized weight
+        :param device: optional device to load intermediate weights into
+        :return: compressed state dict
+        """
+        raise NotImplementedError()
+    def compress_module(self, module: Module) -> Optional[Dict[str, torch.Tensor]]:
+        """
+        Compresses a single quantized leaf PyTorch module. If the module is not
+        quantized, this function has no effect.
+        :param module: PyTorch module to compress
+        :return: dictionary of compressed weight data, or None if module is not
+            quantized
+        """
+        if not hasattr(module, "quantization_scheme"):
+            return None  # module is not quantized
+        quantization_scheme = module.quantization_scheme
+        if not hasattr(quantization_scheme, "weights"):
+            return None  # weights are not quantized
+        quantization_args = quantization_scheme.weights
+        weight = getattr(module, "weight", None)
+        weight_scale = getattr(module, "weight_scale", None)
+        weight_zero_point = getattr(module, "weight_zero_point", None)
+        return self.compress_weight(
+            weight=weight,
+            scale=weight_scale,
+            zero_point=weight_zero_point,
+            quantization_args=quantization_args,
+        )
+    def compress_weight(
+        self,
+        weight: Tensor,
+        **kwargs,
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Compresses a single uncompressed weight
+        :param weight: uncompressed weight tensor
+        :param kwargs: additional arguments for compression
+        """
+        raise NotImplementedError()
+    def decompress_module(self, module: Module):
+        """
+        Decompresses a single compressed leaf PyTorch module. If the module is not
+        quantized, this function has no effect.
+        :param module: PyTorch module to decompress
+        :return: tensor of the decompressed weight, or None if module is not quantized
+        """
+        if not hasattr(module, "quantization_scheme"):
+            return None  # module is not quantized
+        quantization_scheme = module.quantization_scheme
+        if not hasattr(quantization_scheme, "weights"):
+            return None  # weights are not quantized
+        quantization_args = quantization_scheme.weights
+        compressed_data = {}
+        for name, parameter in module.named_parameters():
+            compressed_data[name] = parameter
+        return self.decompress_weight(
+            compressed_data=compressed_data, quantization_args=quantization_args
+        )
+    def decompress_weight(
+        self, compressed_data: Dict[str, Tensor], **kwargs
+    ) -> torch.Tensor:
+        """
+        Decompresses a single compressed weight
+        :param compressed_data: dictionary of data needed for decompression
+        :param kwargs: additional arguments for decompression
+        :return: tensor of the decompressed weight
+        """
+        raise NotImplementedError()

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/helpers.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pathlib import Path
+from typing import Dict, Generator, Optional, Tuple, Union
+import torch
+from compressed_tensors.compressors import BaseCompressor
+from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
+from compressed_tensors.utils.safetensors_load import get_weight_mappings
+from safetensors import safe_open
+from safetensors.torch import save_file
+from torch import Tensor
+__all__ = [
+    "load_compressed",
+    "save_compressed",
+    "save_compressed_model",
+]
+def save_compressed(
+    tensors: Dict[str, Tensor],
+    save_path: Union[str, Path],
+    compression_format: Optional[CompressionFormat] = None,
+):
+    """
+    Save compressed tensors to disk. If tensors are not compressed,
+    save them as is.
+    :param tensors: dictionary of tensors to compress
+    :param save_path: path to save compressed tensors
+    :param compression_format: compression format used for the tensors
+    :return: compression config, if tensors were compressed - None otherwise
+    """
+    if tensors is None or len(tensors) == 0:
+        raise ValueError("No tensors or empty tensors provided to compress")
+    # if no compression_format specified, default to `dense`
+    compression_format = compression_format or CompressionFormat.dense.value
+    if not (
+        compression_format in BaseCompressor.registered_names()
+        or compression_format in BaseCompressor.registered_aliases()
+    ):
+        raise ValueError(
+            f"Unknown compression format: {compression_format}. "
+            f"Must be one of {set(BaseCompressor.registered_names() + BaseCompressor.registered_aliases())}"  # noqa E501
+        )
+    # compress
+    compressor = BaseCompressor.load_from_registry(compression_format)
+    # save compressed tensors
+    compressed_tensors = compressor.compress(tensors)
+    save_file(compressed_tensors, save_path)
+def load_compressed(
+    compressed_tensors: Union[str, Path],
+    compression_config: SparsityCompressionConfig = None,
+    device: Optional[str] = "cpu",
+) -> Generator[Tuple[str, Tensor], None, None]:
+    """
+    Load compressed tensors from disk.
+    If tensors are not compressed, load them as is.
+    :param compressed_tensors: path to compressed tensors.
+        This can be a path to a file or a directory containing
+        one or multiple safetensor files (if multiple - in the format
+        assumed by huggingface)
+    :param compression_config: compression config to use for decompressing tensors.
+    :param device: device to move tensors to. If None, tensors are loaded on CPU.
+    :param return_dict: if True, return a dictionary of decompressed tensors
+    :return a generator that yields the name and tensor of the decompressed tensor
+    """
+    if compressed_tensors is None or not Path(compressed_tensors).exists():
+        raise ValueError("No compressed tensors provided to load")
+    if (
+        compression_config is None
+        or compression_config.format == CompressionFormat.dense.value
+    ):
+        # if no compression_config specified, or `dense` format specified,
+        # assume tensors are not compressed on disk
+        weight_mappings = get_weight_mappings(compressed_tensors)
+        for weight_name, file_with_weight_name in weight_mappings.items():
+            with safe_open(file_with_weight_name, framework="pt", device=device) as f:
+                weight = f.get_tensor(weight_name)
+                yield weight_name, weight
+    else:
+        # decompress tensors
+        compression_format = compression_config.format
+        compressor = BaseCompressor.load_from_registry(
+            compression_format, config=compression_config
+        )
+        yield from compressor.decompress(compressed_tensors, device=device)
+def save_compressed_model(
+    model: torch.nn.Module,
+    filename: str,
+    compression_format: Optional[CompressionFormat] = None,
+    force_contiguous: bool = True,
+):
+    """
+    Wrapper around safetensors `save_model` helper function, which allows for
+    saving compressed model to disk.
+    Note: The model is assumed to have a
+        state_dict with  unique entries
+    :param model: model to save on disk
+    :param filename: filename location to save the file
+    :param compression_format: compression format used for the model
+    :param force_contiguous: forcing the state_dict to be saved as contiguous tensors
+    """
+    state_dict = model.state_dict()
+    if force_contiguous:
+        state_dict = {k: v.contiguous() for k, v in state_dict.items()}
+    try:
+        save_compressed(state_dict, filename, compression_format=compression_format)
+    except ValueError as e:
+        msg = str(e)
+        msg += " Or use save_compressed_model(..., force_contiguous=True), read the docs for potential caveats."  # noqa E501
+        raise ValueError(msg)

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__init__.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .base import *
+from .naive_quantized import *
+from .pack_quantized import *

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (329 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/base.cpython-311.pyc ADDED Viewed

Binary file (8.51 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/naive_quantized.cpython-311.pyc ADDED Viewed

Binary file (5.77 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/pack_quantized.cpython-311.pyc ADDED Viewed

Binary file (9.49 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/base.py ADDED Viewed

	@@ -0,0 +1,176 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from pathlib import Path
+from typing import Any, Dict, Generator, Tuple, Union
+import torch
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.quantization import QuantizationArgs
+from compressed_tensors.utils import (
+    get_nested_mappings_from_state_dict,
+    get_nested_weight_mappings,
+    merge_names,
+)
+from safetensors import safe_open
+from torch import Tensor
+from tqdm import tqdm
+_LOGGER: logging.Logger = logging.getLogger(__name__)
+__all__ = ["BaseQuantizationCompressor"]
+class BaseQuantizationCompressor(BaseCompressor):
+    """
+    Base class representing a quant compression algorithm. Each child class should
+    implement compression_param_info, compress_weight and decompress_weight.
+    Compressors support compressing/decompressing a full module state dict or a single
+    quantized PyTorch leaf module.
+    Model Load Lifecycle (run_compressed=False):
+        - ModelCompressor.decompress()
+            - apply_quantization_config()
+            - BaseQuantizationCompressor.decompress()
+                - BaseQuantizationCompressor.decompress_weight()
+    Model Save Lifecycle:
+        - ModelCompressor.compress()
+            - BaseQuantizationCompressor.compress()
+                - BaseQuantizationCompressor.compress_weight()
+    Module Lifecycle (run_compressed=True):
+        - apply_quantization_config()
+        - compressed_module = CompressedLinear(module)
+            - initialize_module_for_quantization()
+            - BaseQuantizationCompressor.compression_param_info()
+            - register_parameters()
+        - compressed_module.forward()
+            - compressed_module.decompress()
+    :param config: config specifying compression parameters
+    """
+    def compress(
+        self,
+        model_state: Dict[str, Tensor],
+        names_to_scheme: Dict[str, QuantizationArgs],
+        **kwargs,
+    ) -> Dict[str, Tensor]:
+        """
+        Compresses a dense state dict
+        :param model_state: state dict of uncompressed model
+        :param names_to_scheme: quantization args for each quantized weight, needed for
+            quantize function to calculate bit depth
+        :return: compressed state dict
+        """
+        compressed_dict = {}
+        weight_suffix = ".weight"
+        _LOGGER.debug(
+            f"Compressing model with {len(model_state)} parameterized layers..."
+        )
+        for name, value in tqdm(model_state.items(), desc="Quantized Compression"):
+            if name.endswith(weight_suffix):
+                prefix = name[: -(len(weight_suffix))]
+                scale = model_state.get(merge_names(prefix, "weight_scale"), None)
+                zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
+                g_idx = model_state.get(merge_names(prefix, "weight_g_idx"), None)
+                if scale is not None:
+                    # weight is quantized, compress it
+                    quant_args = names_to_scheme[prefix]
+                    compressed_data = self.compress_weight(
+                        weight=value,
+                        scale=scale,
+                        zero_point=zp,
+                        g_idx=g_idx,
+                        quantization_args=quant_args,
+                        device="cpu",
+                    )
+                    for key, value in compressed_data.items():
+                        compressed_dict[merge_names(prefix, key)] = value
+                else:
+                    compressed_dict[name] = value.to("cpu")
+            elif name.endswith("zero_point") and torch.all(value == 0):
+                continue
+            elif name.endswith("g_idx") and torch.any(value <= -1):
+                continue
+            else:
+                compressed_dict[name] = value.to("cpu")
+        return compressed_dict
+    def decompress(
+        self,
+        path_to_model_or_tensors: Union[str, Path, Dict[str, Any]],
+        names_to_scheme: Dict[str, QuantizationArgs],
+        device: str = "cpu",
+    ) -> Generator[Tuple[str, Tensor], None, None]:
+        """
+        Reads a compressed state dict located at path_to_model_or_tensors
+        and returns a generator for sequentially decompressing back to a
+        dense state dict
+        :param path_to_model_or_tensors: path to compressed safetensors model (directory
+            with one or more safetensors files) or compressed tensors file
+        :param names_to_scheme: quantization args for each quantized weight
+        :param device: optional device to load intermediate weights into
+        :return: compressed state dict
+        """
+        if isinstance(path_to_model_or_tensors, (str, Path)):
+            yield from self._decompress_from_path(
+                path_to_model_or_tensors, names_to_scheme, device
+            )
+        else:
+            yield from self._decompress_from_state_dict(
+                path_to_model_or_tensors, names_to_scheme
+            )
+    def _decompress_from_path(self, path_to_model, names_to_scheme, device):
+        weight_mappings = get_nested_weight_mappings(
+            path_to_model, self.COMPRESSION_PARAM_NAMES
+        )
+        for weight_name in weight_mappings.keys():
+            weight_data = {}
+            for param_name, safe_path in weight_mappings[weight_name].items():
+                full_name = merge_names(weight_name, param_name)
+                with safe_open(safe_path, framework="pt", device=device) as f:
+                    weight_data[param_name] = f.get_tensor(full_name)
+            if "weight_scale" in weight_data:
+                quant_args = names_to_scheme[weight_name]
+                decompressed = self.decompress_weight(
+                    compressed_data=weight_data, quantization_args=quant_args
+                )
+                yield merge_names(weight_name, "weight"), decompressed
+    def _decompress_from_state_dict(self, state_dict, names_to_scheme):
+        weight_mappings = get_nested_mappings_from_state_dict(
+            state_dict, self.COMPRESSION_PARAM_NAMES
+        )
+        for weight_name in weight_mappings.keys():
+            weight_data = {}
+            for param_name, param_value in weight_mappings[weight_name].items():
+                weight_data[param_name] = param_value
+            if "weight_scale" in weight_data:
+                quant_args = names_to_scheme[weight_name]
+                decompressed = self.decompress_weight(
+                    compressed_data=weight_data, quantization_args=quant_args
+                )
+                yield merge_names(weight_name, "weight"), decompressed

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/naive_quantized.py ADDED Viewed

	@@ -0,0 +1,142 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, Optional, Tuple
+import torch
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.compressors.quantized_compressors.base import (
+    BaseQuantizationCompressor,
+)
+from compressed_tensors.config import CompressionFormat
+from compressed_tensors.quantization import QuantizationArgs
+from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
+from compressed_tensors.quantization.utils import can_quantize
+from torch import Tensor
+__all__ = [
+    "NaiveQuantizationCompressor",
+    "IntQuantizationCompressor",
+    "FloatQuantizationCompressor",
+]
+@BaseCompressor.register(name=CompressionFormat.naive_quantized.value)
+class NaiveQuantizationCompressor(BaseQuantizationCompressor):
+    """
+    Implements naive compression for quantized models. Weight of each
+    quantized layer is converted from its original float type to the closest Pytorch
+    type to the type specified by the layer's QuantizationArgs.
+    """
+    COMPRESSION_PARAM_NAMES = [
+        "weight",
+        "weight_scale",
+        "weight_zero_point",
+        "weight_g_idx",
+    ]
+    def compression_param_info(
+        self,
+        weight_shape: torch.Size,
+        quantization_args: Optional[QuantizationArgs] = None,
+    ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
+        """
+        Creates a dictionary of expected shapes and dtypes for each compression
+            parameter used by the compressor
+        :param weight_shape: uncompressed weight shape
+        :param quantization_args: quantization parameters for the weight
+        :return: dictionary mapping compressed parameter names to shape and dtype
+        """
+        dtype = quantization_args.pytorch_dtype()
+        return {"weight": (weight_shape, dtype)}
+    def compress_weight(
+        self,
+        weight: Tensor,
+        scale: Tensor,
+        quantization_args: QuantizationArgs,
+        zero_point: Optional[Tensor] = None,
+        g_idx: Optional[torch.Tensor] = None,
+        device: Optional[torch.device] = None,
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Compresses a single uncompressed weight
+        :param weight: uncompressed weight tensor
+        :param scale: quantization scale for weight
+        :param quantization_args: quantization parameters for weight
+        :param zero_point: quantization zero point for weight
+        :param g_idx: optional mapping from column index to group index
+        :param device: optional device to move compressed output to
+        :return: dictionary of compressed weight data
+        """
+        if can_quantize(weight, quantization_args):
+            quantized_weight = quantize(
+                x=weight,
+                scale=scale,
+                zero_point=zero_point,
+                g_idx=g_idx,
+                args=quantization_args,
+                dtype=quantization_args.pytorch_dtype(),
+            )
+        else:
+            quantized_weight = weight
+        if device is not None:
+            quantized_weight = quantized_weight.to(device)
+        return {"weight": quantized_weight}
+    def decompress_weight(
+        self,
+        compressed_data: Dict[str, Tensor],
+        quantization_args: Optional[QuantizationArgs] = None,
+    ) -> torch.Tensor:
+        """
+        Decompresses a single compressed weight
+        :param compressed_data: dictionary of data needed for decompression
+        :param quantization_args: quantization parameters for the weight
+        :return: tensor of the decompressed weight
+        """
+        weight = compressed_data["weight"]
+        scale = compressed_data["weight_scale"]
+        zero_point = compressed_data.get("weight_zero_point", None)
+        g_idx = compressed_data.get("weight_g_idx", None)
+        decompressed_weight = dequantize(
+            x_q=weight, scale=scale, zero_point=zero_point, g_idx=g_idx
+        )
+        return decompressed_weight
+@BaseCompressor.register(name=CompressionFormat.int_quantized.value)
+class IntQuantizationCompressor(NaiveQuantizationCompressor):
+    """
+    Alias for integer quantized models
+    """
+    pass
+@BaseCompressor.register(name=CompressionFormat.float_quantized.value)
+class FloatQuantizationCompressor(NaiveQuantizationCompressor):
+    """
+    Alias for fp quantized models
+    """
+    pass

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/pack_quantized.py ADDED Viewed

	@@ -0,0 +1,213 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+from typing import Dict, Optional, Tuple
+import numpy as np
+import torch
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.compressors.quantized_compressors.base import (
+    BaseQuantizationCompressor,
+)
+from compressed_tensors.config import CompressionFormat
+from compressed_tensors.quantization import QuantizationArgs
+from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
+from compressed_tensors.quantization.utils import can_quantize
+from torch import Tensor
+__all__ = ["PackedQuantizationCompressor", "pack_to_int32", "unpack_from_int32"]
+@BaseCompressor.register(name=CompressionFormat.pack_quantized.value)
+class PackedQuantizationCompressor(BaseQuantizationCompressor):
+    """
+    Compresses a quantized model by packing every eight 4-bit weights into an int32
+    """
+    COMPRESSION_PARAM_NAMES = [
+        "weight_packed",
+        "weight_scale",
+        "weight_zero_point",
+        "weight_g_idx",
+        "weight_shape",
+    ]
+    def compression_param_info(
+        self,
+        weight_shape: torch.Size,
+        quantization_args: Optional[QuantizationArgs] = None,
+    ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
+        """
+        Creates a dictionary of expected shapes and dtypes for each compression
+            parameter used by the compressor
+        :param weight_shape: uncompressed weight shape
+        :param quantization_args: quantization parameters for the weight
+        :return: dictionary mapping compressed parameter names to shape and dtype
+        """
+        pack_factor = 32 // quantization_args.num_bits
+        packed_size = math.ceil(weight_shape[1] / pack_factor)
+        return {
+            "weight_packed": (torch.Size((weight_shape[0], packed_size)), torch.int32),
+            "weight_shape": (torch.Size((2,)), torch.int32),
+        }
+    def compress_weight(
+        self,
+        weight: Tensor,
+        scale: Tensor,
+        quantization_args: QuantizationArgs,
+        zero_point: Optional[Tensor] = None,
+        g_idx: Optional[torch.Tensor] = None,
+        device: Optional[torch.device] = None,
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Compresses a single uncompressed weight
+        :param weight: uncompressed weight tensor
+        :param scale: quantization scale for weight
+        :param quantization_args: quantization parameters for weight
+        :param zero_point: quantization zero point for weight
+        :param g_idx: optional mapping from column index to group index
+        :param device: optional device to move compressed output to
+        :return: dictionary of compressed weight data
+        """
+        compressed_dict = {}
+        if can_quantize(weight, quantization_args):
+            quantized_weight = quantize(
+                x=weight,
+                scale=scale,
+                zero_point=zero_point,
+                g_idx=g_idx,
+                args=quantization_args,
+                dtype=torch.int8,
+            )
+        else:
+            quantized_weight = weight
+        packed_weight = pack_to_int32(quantized_weight, quantization_args.num_bits)
+        weight_shape = torch.tensor(weight.shape)
+        if device is not None:
+            packed_weight = packed_weight.to(device)
+            weight_shape = weight_shape.to(device)
+        compressed_dict["weight_shape"] = weight_shape
+        compressed_dict["weight_packed"] = packed_weight
+        return compressed_dict
+    def decompress_weight(
+        self,
+        compressed_data: Dict[str, Tensor],
+        quantization_args: Optional[QuantizationArgs] = None,
+    ) -> torch.Tensor:
+        """
+        Decompresses a single compressed weight
+        :param compressed_data: dictionary of data needed for decompression
+        :param quantization_args: quantization parameters for the weight
+        :return: tensor of the decompressed weight
+        """
+        weight = compressed_data["weight_packed"]
+        scale = compressed_data["weight_scale"]
+        zero_point = compressed_data.get("weight_zero_point", None)
+        g_idx = compressed_data.get("weight_g_idx", None)
+        original_shape = torch.Size(compressed_data["weight_shape"])
+        num_bits = quantization_args.num_bits
+        unpacked = unpack_from_int32(weight, num_bits, original_shape)
+        decompressed_weight = dequantize(
+            x_q=unpacked, scale=scale, zero_point=zero_point, g_idx=g_idx
+        )
+        return decompressed_weight
+def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
+    """
+    Packs a tensor of quantized weights stored in int8 into int32s with padding
+    :param value: tensor to pack
+    :param num_bits: number of bits used to store underlying data
+    :returns: packed int32 tensor
+    """
+    if value.dtype is not torch.int8:
+        raise ValueError("Tensor must be quantized to torch.int8 before packing")
+    if num_bits > 8:
+        raise ValueError("Packing is only supported for less than 8 bits")
+    # convert to unsigned for packing
+    offset = pow(2, num_bits) // 2
+    value = (value + offset).to(torch.uint8)
+    value = value.cpu().numpy().astype(np.uint32)
+    pack_factor = 32 // num_bits
+    # pad input tensor and initialize packed output
+    packed_size = math.ceil(value.shape[1] / pack_factor)
+    packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
+    padding = packed.shape[1] * pack_factor - value.shape[1]
+    value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
+    # pack values
+    for i in range(pack_factor):
+        packed |= value[:, i::pack_factor] << num_bits * i
+    # convert back to signed and torch
+    packed = np.ascontiguousarray(packed).view(np.int32)
+    return torch.from_numpy(packed)
+def unpack_from_int32(
+    value: torch.Tensor, num_bits: int, shape: torch.Size
+) -> torch.Tensor:
+    """
+    Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
+    original their bit range
+    :param value: tensor to upack
+    :param num_bits: number of bits to unpack each data point into
+    :param shape: shape to unpack into, used to remove padding
+    :returns: unpacked int8 tensor
+    """
+    if value.dtype is not torch.int32:
+        raise ValueError(
+            f"Expected {torch.int32} but got {value.dtype}, Aborting unpack."
+        )
+    if num_bits > 8:
+        raise ValueError("Unpacking is only supported for less than 8 bits")
+    pack_factor = 32 // num_bits
+    # unpack
+    mask = pow(2, num_bits) - 1
+    unpacked = torch.zeros(
+        (value.shape[0], value.shape[1] * pack_factor),
+        device=value.device,
+        dtype=torch.int32,
+    )
+    for i in range(pack_factor):
+        unpacked[:, i::pack_factor] = (value >> (num_bits * i)) & mask
+    # remove padding
+    original_row_size = int(shape[1])
+    unpacked = unpacked[:, :original_row_size]
+    # bits are packed in unsigned format, reformat to signed
+    # update the value range from unsigned to signed
+    offset = pow(2, num_bits) // 2
+    unpacked = (unpacked - offset).to(torch.int8)
+    return unpacked

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .base import *
+from .dense import *
+from .sparse_24_bitmask import *
+from .sparse_bitmask import *

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (355 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/base.cpython-311.pyc ADDED Viewed

Binary file (7.39 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/dense.cpython-311.pyc ADDED Viewed

Binary file (1.67 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_24_bitmask.cpython-311.pyc ADDED Viewed

Binary file (11.8 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_bitmask.cpython-311.pyc ADDED Viewed

Binary file (7.87 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/base.py ADDED Viewed

	@@ -0,0 +1,148 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import Dict, Generator, Optional, Set, Tuple
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.utils import get_nested_weight_mappings, merge_names
+from safetensors import safe_open
+from torch import Tensor
+from tqdm import tqdm
+__all__ = ["BaseSparseCompressor"]
+_LOGGER: logging.Logger = logging.getLogger(__name__)
+class BaseSparseCompressor(BaseCompressor):
+    """
+    Base class representing a sparse compression algorithm. Each child class should
+    implement compression_param_info, compress_weight and decompress_weight; child
+    classes should also define COMPRESSION_PARAM_NAMES.
+    Compressors support compressing/decompressing a full module state dict or a single
+    quantized PyTorch leaf module.
+    Model Load Lifecycle (run_compressed=False):
+        - ModelCompressor.decompress()
+            - apply_quantization_config()
+            - BaseSparseCompressor.decompress()
+                - BaseSparseCompressor.decompress_weight()
+    Model Save Lifecycle:
+        - ModelCompressor.compress()
+            - BaseSparseCompressor.compress()
+                - BaseSparseCompressor.compress_weight()
+    Module Lifecycle (run_compressed=True):
+        - apply_quantization_config()
+        - compressed_module = CompressedLinear(module)
+            - initialize_module_for_quantization()
+            - BaseSparseCompressor.compression_param_info()
+            - register_parameters()
+        - compressed_module.forward()
+            - compressed_module.decompress()
+    :param config: config specifying compression parameters
+    """
+    def compress(
+        self,
+        model_state: Dict[str, Tensor],
+        compression_targets: Optional[Set[str]] = None,
+    ) -> Dict[str, Tensor]:
+        """
+        Compresses a dense state dict using bitmask compression
+        :param model_state: state dict of uncompressed model
+        :param compression_targets: optional set of layer prefixes to compress,
+            otherwise compress all layers (for backwards compatibility)
+        :return: compressed state dict
+        """
+        compressed_dict = {}
+        _LOGGER.debug(
+            f"Compressing model with {len(model_state)} parameterized layers..."
+        )
+        for name, value in tqdm(model_state.items(), desc="Compressing model"):
+            if not self.should_compress(name, compression_targets):
+                compressed_dict[name] = value
+                continue
+            prefix = name
+            if prefix.endswith(".weight"):
+                prefix = prefix[: -(len(".weight"))]
+            compression_data = self.compress_weight(prefix, value)
+            for key in compression_data.keys():
+                if key in compressed_dict:
+                    _LOGGER.warn(
+                        f"Expected all compressed state_dict keys to be unique, but "
+                        f"found an existing entry for {key}. The existing entry will "
+                        "be replaced."
+                    )
+            compressed_dict.update(compression_data)
+        return compressed_dict
+    def decompress(
+        self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
+    ) -> Generator[Tuple[str, Tensor], None, None]:
+        """
+        Reads a bitmask compressed state dict located
+        at path_to_model_or_tensors and returns a generator
+        for sequentially decompressing back to a dense state dict
+        :param model_path: path to compressed safetensors model (directory with
+            one or more safetensors files) or compressed tensors file
+        :param device: device to load decompressed weights onto
+        :return: iterator for generating decompressed weights
+        """
+        weight_mappings, ignored_params = get_nested_weight_mappings(
+            path_to_model_or_tensors,
+            self.COMPRESSION_PARAM_NAMES,
+            return_unmatched_params=True,
+        )
+        for weight_name in weight_mappings.keys():
+            weight_data = {}
+            for param_name, safe_path in weight_mappings[weight_name].items():
+                full_name = merge_names(weight_name, param_name)
+                with safe_open(safe_path, framework="pt", device=device) as f:
+                    weight_data[param_name] = f.get_tensor(full_name)
+            decompressed = self.decompress_weight(weight_data)
+            yield merge_names(weight_name, "weight"), decompressed
+        for ignored_param_name, safe_path in ignored_params.items():
+            with safe_open(safe_path, framework="pt", device=device) as f:
+                value = f.get_tensor(ignored_param_name)
+            yield ignored_param_name, value
+    @staticmethod
+    def should_compress(name: str, expanded_targets: Optional[Set[str]] = None) -> bool:
+        """
+        Check if a parameter should be compressed.
+        Currently, this only returns True for weight parameters.
+        :param name: name of the parameter
+        :param expanded_targets: set of layer prefixes to compress
+        :return: whether or not the parameter should be compressed
+        """
+        if expanded_targets is None:
+            return name.endswith(".weight")
+        return (
+            name.endswith(".weight") and name[: -(len(".weight"))] in expanded_targets
+        )

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/dense.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, Generator, Tuple
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.config import CompressionFormat
+from torch import Tensor
+@BaseCompressor.register(name=CompressionFormat.dense.value)
+class DenseCompressor(BaseCompressor):
+    """
+    Identity compressor for dense models, returns the original state_dict
+    """
+    def compress(self, model_state: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:
+        return model_state
+    def decompress(
+        self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
+    ) -> Generator[Tuple[str, Tensor], None, None]:
+        return iter([])

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py ADDED Viewed

	@@ -0,0 +1,240 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass
+from typing import Dict, List, Tuple, Union
+import torch
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor
+from compressed_tensors.config import CompressionFormat, SparsityStructure
+from compressed_tensors.quantization import FP8_DTYPE
+from compressed_tensors.utils import merge_names, pack_bitmasks, unpack_bitmasks
+from torch import Tensor
+__all__ = [
+    "Sparse24BitMaskCompressor",
+    "Sparse24BitMaskTensor",
+    "sparse24_bitmask_compress",
+    "sparse24_bitmask_decompress",
+    "get_24_bytemasks",
+]
+@BaseCompressor.register(name=CompressionFormat.sparse_24_bitmask.value)
+class Sparse24BitMaskCompressor(BaseSparseCompressor):
+    """
+    Compression for sparse models using bitmasks. Non-zero weights are stored in a 2d
+    values tensor, with their locations stored in a 2d bitmask
+    """
+    COMPRESSION_PARAM_NAMES = [
+        "shape",
+        "compressed",
+        "bitmask",
+    ]
+    def compress_weight(self, name, value):
+        bitmask_tensor = Sparse24BitMaskTensor.from_dense(
+            value, self.config.sparsity_structure
+        )
+        bitmask_dict = bitmask_tensor.dict(name_prefix=name, device="cpu")
+        return bitmask_dict
+    def decompress_weight(self, weight_data):
+        data = Sparse24BitMaskTensor.from_compressed_data(**weight_data)
+        decompressed = data.decompress()
+        return decompressed
+@dataclass
+class Sparse24BitMaskTensor:
+    """
+    Owns compressions and decompression for a single 2:4 sparse
+    bitmask compressed tensor.
+    :param shape: shape of dense tensor
+    :param compressed: 2d tensor of non-zero values
+    :param bitmask: 2d bitmask of non-zero values
+    """
+    shape: List[int]
+    compressed: Tensor
+    bitmask: Tensor
+    @staticmethod
+    def from_dense(
+        tensor: Tensor,
+        sparsity_structure: Union[SparsityStructure, str] = SparsityStructure.TWO_FOUR,
+    ) -> "Sparse24BitMaskTensor":
+        """
+        :param tensor: dense tensor to compress
+        :return: instantiated compressed tensor
+        """
+        shape = list(tensor.shape)
+        compressed, bitmask = sparse24_bitmask_compress(
+            tensor.cpu(), sparsity_structure=sparsity_structure
+        )
+        return Sparse24BitMaskTensor(
+            shape=shape,
+            compressed=compressed,
+            bitmask=bitmask,
+        )
+    @staticmethod
+    def from_compressed_data(
+        shape: Union[List[int], Tensor], compressed: Tensor, bitmask: Tensor
+    ) -> "Sparse24BitMaskTensor":
+        """
+        :param shape: shape of the dense tensor (can be a list or a tensor)
+        :param compressed: 2d tensor of non-zero values
+        :param bitmask: 2d bitmask of non-zero values
+        :return: instantiated Sparse24BitMaskTensor
+        """
+        if isinstance(shape, list):
+            shape = torch.tensor(shape)
+        if isinstance(shape, torch.Tensor):
+            shape = shape.flatten().tolist()
+        return Sparse24BitMaskTensor(
+            shape=shape, compressed=compressed, bitmask=bitmask
+        )
+    def decompress(self) -> Tensor:
+        """
+        :return: reconstructed dense tensor
+        """
+        return sparse24_bitmask_decompress(self.compressed, self.bitmask, self.shape)
+    def curr_memory_size_bytes(self) -> int:
+        """
+        :return: size in bytes required to store compressed tensor on disk
+        """
+        def sizeof_tensor(a: Tensor) -> int:
+            return a.element_size() * a.nelement()
+        return sizeof_tensor(self.compressed) + sizeof_tensor(self.bitmask)
+    def dict(self, name_prefix: str, device: str = "cpu") -> Dict[str, Tensor]:
+        """
+        :param name_prefix: name of original tensor to store compressed weight as
+        :return: dict of compressed data for the stored weight
+        """
+        if name_prefix.endswith(".weight"):
+            name_prefix = name_prefix[: -len(".weight")]
+        return {
+            merge_names(name_prefix, "shape"): torch.tensor(
+                self.shape, device=device
+            ).reshape(-1, 1),
+            merge_names(name_prefix, "compressed"): self.compressed.to(device),
+            merge_names(name_prefix, "bitmask"): self.bitmask.to(device),
+        }
+    def __repr__(self) -> str:
+        return f"BitMaskTensor(shape={self.shape}, compressed=True)"
+def sparse24_bitmask_compress(
+    tensor: Tensor,
+    sparsity_structure: Union[SparsityStructure, str] = SparsityStructure.TWO_FOUR,
+) -> Tuple[Tensor, Tensor, Tensor]:
+    """
+    Compresses a dense tensor using bitmask compression
+    :param tensor: dense 2D tensor to compress
+    :param sparsity_structure: structure of sparsity in the tensor, defaults
+        to unstructured, can also be set to `2:4`
+    :return: tuple of compressed data representing tensor
+    """
+    assert len(tensor.shape) == 2, "Only 2D tensors are supported"
+    assert (
+        SparsityStructure(sparsity_structure) == SparsityStructure.TWO_FOUR
+    ), "Only 2:4 sparsity is supported"
+    bytemasks = get_24_bytemasks(tensor=tensor)
+    if tensor.dtype == FP8_DTYPE:
+        # acces raw bytes of the tensor
+        tensor_view = tensor.view(torch.int8)
+        values = tensor_view[bytemasks]
+        values = values.view(FP8_DTYPE)
+    else:
+        values = tensor[bytemasks]
+    num_rows, num_cols = tensor.shape
+    compressed_values = values.reshape(num_rows, num_cols // 2)
+    bitmasks_packed = pack_bitmasks(bytemasks)
+    return compressed_values, bitmasks_packed
+def sparse24_bitmask_decompress(
+    values: Tensor, bitmasks: Tensor, original_shape: torch.Size
+) -> Tensor:
+    """
+    Reconstructs a dense tensor from a compressed one
+    :param values: 1d tensor of non-zero values
+    :param bitmasks: 2d int8 tensor flagging locations of non-zero values in the
+    tensors original shape
+    :param original_shape: shape of the dense tensor
+    :return: decompressed dense tensor
+    """
+    bytemasks_unpacked = unpack_bitmasks(bitmasks, original_shape)
+    decompressed_tensor = torch.zeros(original_shape, dtype=values.dtype)
+    decompressed_tensor = decompressed_tensor.to(values.device)
+    values = values.flatten()
+    if decompressed_tensor.dtype == FP8_DTYPE:
+        decompressed_tensor[bytemasks_unpacked] = values
+        decompressed_tensor = decompressed_tensor.cuda()
+    else:
+        decompressed_tensor[bytemasks_unpacked] = values
+    return decompressed_tensor
+def get_24_bytemasks(tensor):
+    """
+    Generate a 2:4 sparsity mask for the given tensor.
+    This function creates a mask where exactly 2 out of every 4 elements are
+    preserved based on their magnitudes. The preserved elements are the ones
+    with the highest absolute values in each group of 4 elements.
+    :param tensor: The input tensor for which the 2:4 sparsity mask is to be created.
+                   The tensor can be of any shape but its total number of elements
+                   must be a multiple of 4.
+    :return: A boolean tensor of the same shape as the input tensor, where `True`
+             indicates the preserved elements and `False` indicates the pruned elements.
+    :raises ValueError: If the total number of elements in the tensor is not a
+                        multiple of 4.
+    """
+    original_dtype = tensor.dtype
+    if tensor.dtype == FP8_DTYPE:
+        tensor = tensor.view(torch.int8)
+    original_shape = tensor.shape
+    num_elements = tensor.numel()
+    if num_elements % 4 != 0:
+        raise ValueError("Tensor size must be a multiple of 4 for TWO_FOUR sparsity")
+    reshaped_tensor = tensor.view(-1, 4)
+    abs_tensor = reshaped_tensor.abs()
+    topk_indices = abs_tensor.topk(2, dim=1).indices
+    mask = torch.zeros_like(reshaped_tensor, dtype=torch.bool)
+    mask.scatter_(1, topk_indices, True)
+    mask = mask.view(original_shape)
+    tensor = tensor.view(original_dtype)
+    return mask

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py ADDED Viewed

	@@ -0,0 +1,163 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, List, Tuple, Union
+import torch
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor
+from compressed_tensors.config import CompressionFormat
+from compressed_tensors.quantization import FP8_DTYPE
+from compressed_tensors.utils import merge_names, pack_bitmasks, unpack_bitmasks
+from torch import Tensor
+__all__ = [
+    "BitmaskCompressor",
+    "BitmaskTensor",
+    "bitmask_compress",
+    "bitmask_decompress",
+]
+@BaseCompressor.register(name=CompressionFormat.sparse_bitmask.value)
+class BitmaskCompressor(BaseSparseCompressor):
+    """
+    Compression for sparse models using bitmasks. Non-zero weights are stored in a 1d
+    values tensor, with their locations stored in a 2d bitmask
+    """
+    COMPRESSION_PARAM_NAMES = ["shape", "compressed", "bitmask", "row_offsets"]
+    def compress_weight(self, name, value):
+        bitmask_tensor = BitmaskTensor.from_dense(value)
+        bitmask_dict = bitmask_tensor.dict(name_prefix=name, device="cpu")
+        return bitmask_dict
+    def decompress_weight(self, weight_data):
+        data = BitmaskTensor(**weight_data)
+        decompressed = data.decompress()
+        return decompressed
+class BitmaskTensor:
+    """
+    Owns compressions and decompression for a single bitmask compressed tensor.
+    Adapted from: https://github.com/mgoin/torch_bitmask/tree/main
+    :param shape: shape of dense tensor
+    :compressed: flat tensor of non-zero values
+    :bitmask: 2d bitmask of non-zero values
+    :row_offsets: flat tensor indicating what index in values each dense row starts at
+    """
+    def __init__(
+        self,
+        shape: Union[torch.Size, List],
+        compressed: Tensor,
+        bitmask: Tensor,
+        row_offsets: Tensor,
+    ):
+        self.shape = list(shape)
+        self.compressed = compressed
+        self.bitmask = bitmask
+        self.row_offsets = row_offsets
+    @staticmethod
+    def from_dense(tensor: Tensor) -> "BitmaskTensor":
+        """
+        :param tensor: dense tensor to compress
+        :return: instantiated compressed tensor
+        """
+        shape = tensor.shape
+        compressed, bitmask, row_offsets = bitmask_compress(tensor.cpu())
+        return BitmaskTensor(
+            shape=shape, compressed=compressed, bitmask=bitmask, row_offsets=row_offsets
+        )
+    def decompress(self) -> Tensor:
+        """
+        :return: reconstructed dense tensor
+        """
+        return bitmask_decompress(self.compressed, self.bitmask, self.shape)
+    def curr_memory_size_bytes(self):
+        """
+        :return: size in bytes required to store compressed tensor on disk
+        """
+        def sizeof_tensor(a):
+            return a.element_size() * a.nelement()
+        return (
+            sizeof_tensor(self.compressed)
+            + sizeof_tensor(self.bitmask)
+            + sizeof_tensor(self.row_offsets)
+        )
+    def dict(self, name_prefix: str, device: str = "cpu") -> Dict[str, Tensor]:
+        """
+        :name_prefix: name of original tensor to store compressed weight as
+        :return: dict of compressed data for the stored weight
+        """
+        return {
+            merge_names(name_prefix, "shape"): torch.tensor(self.shape, device=device),
+            merge_names(name_prefix, "compressed"): self.compressed.to(device),
+            merge_names(name_prefix, "bitmask"): self.bitmask.to(device),
+            merge_names(name_prefix, "row_offsets"): self.row_offsets.to(device),
+        }
+    def __repr__(self):
+        return f"BitmaskTensor(shape={self.shape}, compressed=True)"
+def bitmask_compress(tensor: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
+    """
+    Compresses a dense tensor using bitmask compression
+    :param tensor: dense tensor to compress
+    :return: tuple of compressed data representing tensor
+    """
+    bytemasks = tensor != 0
+    row_counts = bytemasks.sum(dim=-1)
+    row_offsets = torch.cumsum(row_counts, 0) - row_counts
+    if tensor.dtype == FP8_DTYPE:
+        # acces raw bytes of the tensor
+        tensor_view = tensor.view(torch.int8)
+        values = tensor_view[bytemasks]
+        values = values.view(FP8_DTYPE)
+    else:
+        values = tensor[bytemasks]
+    bitmasks_packed = pack_bitmasks(bytemasks)
+    return values, bitmasks_packed, row_offsets
+def bitmask_decompress(
+    values: Tensor, bitmasks: Tensor, original_shape: torch.Size
+) -> Tensor:
+    """
+    Reconstructs a dense tensor from a compressed one
+    :param values: 1d tensor of non-zero values
+    :param bitmasks: 2d int8 tensor flagging locations of non-zero values in the
+    tensors original shape
+    :param original_shape: shape of the dense tensor
+    :return: decompressed dense tensor
+    """
+    bytemasks_unpacked = unpack_bitmasks(bitmasks, original_shape)
+    decompressed_tensor = torch.zeros(original_shape, dtype=values.dtype)
+    decompressed_tensor[bytemasks_unpacked] = values
+    return decompressed_tensor

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+from .marlin_24 import Marlin24Compressor

.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py ADDED Viewed

	@@ -0,0 +1,251 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import Dict, Generator, Tuple
+import numpy as np
+import torch
+from compressed_tensors.compressors.base import BaseCompressor
+from compressed_tensors.config import CompressionFormat
+from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
+from compressed_tensors.quantization.lifecycle.forward import quantize
+from compressed_tensors.utils import (
+    get_permutations_24,
+    is_quantization_param,
+    merge_names,
+    sparse_semi_structured_from_dense_cutlass,
+    tensor_follows_mask_structure,
+)
+from torch import Tensor
+from tqdm import tqdm
+_LOGGER: logging.Logger = logging.getLogger(__name__)
+@BaseCompressor.register(name=CompressionFormat.marlin_24.value)
+class Marlin24Compressor(BaseCompressor):
+    """
+    Compresses a quantized model with 2:4 sparsity structure for inference with the
+    Marlin24 kernel. Decompression is not implemented for this compressor.
+    """
+    COMPRESSION_PARAM_NAMES = ["weight_packed", "scale_packed", "meta"]
+    @staticmethod
+    def validate_quant_compatability(
+        model_quant_args: Dict[str, QuantizationArgs]
+    ) -> bool:
+        """
+        Checks if every quantized module in the model is compatible with Marlin24
+        compression. Quantization must be channel or group strategy with group_size
+        of 128. Only symmetric quantization is supported
+        :param model_quant_args: dictionary of mapping module names to their
+            quantization configuration
+        :return: True if all modules are compatible with Marlin24 compression, raises
+            a ValueError otherwise
+        """
+        for name, quant_args in model_quant_args.items():
+            strategy = quant_args.strategy
+            group_size = quant_args.group_size
+            symmetric = quant_args.symmetric
+            if (
+                strategy is not QuantizationStrategy.GROUP.value
+                and strategy is not QuantizationStrategy.CHANNEL.value
+            ):
+                raise ValueError(
+                    f"Marlin24 Compressor is only valid for group and channel "
+                    f"quantization strategies, got {strategy} in {name}"
+                )
+            if group_size is not None and group_size != 128:
+                raise ValueError(
+                    f"Marlin24 Compressor is only valid for group size 128, "
+                    f"got {group_size} in {name}"
+                )
+            if not symmetric:
+                raise ValueError(
+                    f"Marlin24 Compressor is only valid for symmetric quantzation, "
+                    f"got symmetric={symmetric} in {name}"
+                )
+        return True
+    @staticmethod
+    def validate_sparsity_structure(name: str, weight: Tensor) -> bool:
+        """
+        Checks if a tensor fits the required 2:4 sparsity structure
+        :param name: name of the tensor to check
+        :param weight: tensor to check for sparsity structure
+        :return: True if all rows match the 2:4 sparsity structure, raises
+            ValueError otherwise
+        """
+        if not tensor_follows_mask_structure(weight):
+            raise ValueError(
+                "Marlin24 Compressor is only compatible with weights that have "
+                f"a 2:4 sparsity structure. Found segments in {name} "
+                "that do not match the expected structure."
+            )
+        return True
+    def compress(
+        self,
+        model_state: Dict[str, Tensor],
+        names_to_scheme: Dict[str, QuantizationArgs],
+        **kwargs,
+    ) -> Dict[str, Tensor]:
+        """
+        Compresses a quantized state_dict with 2:4 sparsity structure for inference
+        with the Marlin24 kernel
+        :param model_state: state dict of uncompressed model
+        :param names_to_scheme: quantization args for each quantized weight, needed for
+           quantize function to calculate bit depth
+        :return: compressed state dict
+        """
+        self.validate_quant_compatability(names_to_scheme)
+        compressed_dict = {}
+        weight_suffix = ".weight"
+        _LOGGER.debug(
+            f"Compressing model with {len(model_state)} parameterized layers..."
+        )
+        for name, value in tqdm(model_state.items(), desc="Compressing model"):
+            if name.endswith(weight_suffix):
+                prefix = name[: -(len(weight_suffix))]
+                scale = model_state.get(merge_names(prefix, "weight_scale"), None)
+                zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
+                if scale is not None:  # weight is quantized, compress it
+                    # Marlin24 kernel requires float16 inputs
+                    scale = scale.to(torch.float16)
+                    value = value.to(torch.float16)
+                    # quantize weight, keeping it as a float16 for now
+                    quant_args = names_to_scheme[prefix]
+                    value = quantize(
+                        x=value, scale=scale, zero_point=zp, args=quant_args
+                    )
+                    # compress based on sparsity structure
+                    self.validate_sparsity_structure(prefix, value)
+                    value, meta = compress_weight_24(value)
+                    meta = meta.cpu()
+                    # Marlin24 kernel expects input dim first
+                    value = value.t().contiguous().cpu()
+                    scale = scale.t().contiguous().cpu()
+                    og_weight_shape = value.shape
+                    # Marlin24 kernel expects unsigned values, shift zero-point
+                    value += (1 << quant_args.num_bits) // 2
+                    # pack quantized weight and scale
+                    value = pack_weight_24(value, quant_args)
+                    packed_scale = pack_scales_24(scale, quant_args, og_weight_shape)
+                    meta = meta.resize_(meta.shape[1] // 2, meta.shape[0] * 2)
+                    # save compressed values
+                    compressed_dict[merge_names(prefix, "scale_packed")] = packed_scale
+                    compressed_dict[merge_names(prefix, "weight_packed")] = value
+                    compressed_dict[merge_names(prefix, "meta")] = meta
+                    continue
+            if not is_quantization_param(name):
+                # export unquantized parameters without modifying
+                compressed_dict[name] = value.to("cpu")
+        return compressed_dict
+    def decompress(
+        self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
+    ) -> Generator[Tuple[str, Tensor], None, None]:
+        raise NotImplementedError(
+            "Decompression is not implemented for the Marlin24 Compressor."
+        )
+def compress_weight_24(weight: Tensor):
+    weight = weight.contiguous()
+    w_comp, meta = sparse_semi_structured_from_dense_cutlass(weight)
+    w_comp = w_comp.contiguous()
+    return w_comp, meta
+def marlin_permute_weights(q_w, size_k, size_n, perm, tile):
+    assert q_w.shape == (size_k, size_n)
+    assert size_k % tile == 0, f"size_k = {size_k}, tile = {tile}"
+    assert size_n % tile == 0, f"size_k = {size_n}, tile = {tile}"
+    # Permute weights to 16x64 marlin tiles
+    q_w = q_w.reshape((size_k // tile, tile, size_n // tile, tile))
+    q_w = q_w.permute((0, 2, 1, 3))
+    q_w = q_w.reshape((size_k // tile, size_n * tile))
+    q_w = q_w.reshape((-1, perm.numel()))[:, perm].reshape(q_w.shape)
+    return q_w
+def pack_weight_24(
+    weight: Tensor,
+    quantization_args: QuantizationArgs,
+    tile: int = 16,
+):
+    size_k = weight.shape[0]
+    size_n = weight.shape[1]
+    num_bits = quantization_args.num_bits
+    pack_factor = 32 // num_bits
+    # Reshuffle to marlin_24 format
+    perm, _, _ = get_permutations_24(num_bits)
+    q_w = marlin_permute_weights(weight, size_k, size_n, perm, tile)
+    q_w = q_w.cpu().numpy().astype(np.uint32)
+    q_packed = np.zeros((q_w.shape[0], q_w.shape[1] // pack_factor), dtype=np.uint32)
+    for i in range(pack_factor):
+        q_packed |= q_w[:, i::pack_factor] << num_bits * i
+    q_packed = torch.from_numpy(q_packed.astype(np.int32))
+    return q_packed
+def pack_scales_24(scales, quantization_args, w_shape):
+    size_k = w_shape[0]
+    size_n = w_shape[1]
+    num_bits = quantization_args.num_bits
+    _, scale_perm_2_4, scale_perm_single_2_4 = get_permutations_24(num_bits)
+    if (
+        quantization_args.strategy == QuantizationStrategy.GROUP
+        and quantization_args.group_size < size_k
+    ):
+        scales = scales.reshape((-1, len(scale_perm_2_4)))[:, scale_perm_2_4]
+    else:  # channelwise
+        scales = scales.reshape((-1, len(scale_perm_single_2_4)))[
+            :, scale_perm_single_2_4
+        ]
+    scales = scales.reshape((-1, size_n)).contiguous()
+    return scales

.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__init__.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+# isort: skip_file
+from .quant_args import *
+from .quant_config import *
+from .quant_scheme import *
+from .lifecycle import *

.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (340 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_args.cpython-311.pyc ADDED Viewed

Binary file (10.6 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_config.cpython-311.pyc ADDED Viewed

Binary file (11.2 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_scheme.cpython-311.pyc ADDED Viewed

Binary file (5.46 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__init__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# flake8: noqa
+# isort: skip_file
+from .forward import *
+from .initialize import *
+from .compressed import *
+from .apply import *
+from .helpers import *

.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (368 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/apply.cpython-311.pyc ADDED Viewed

Binary file (17.9 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/compressed.cpython-311.pyc ADDED Viewed

Binary file (1.92 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/forward.cpython-311.pyc ADDED Viewed

Binary file (13.3 kB). View file

.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/helpers.cpython-311.pyc ADDED Viewed

Binary file (764 Bytes). View file

.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/initialize.cpython-311.pyc ADDED Viewed

Binary file (7.93 kB). View file