Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .venv/lib/python3.11/site-packages/OpenSSL/SSL.py +0 -0
- .venv/lib/python3.11/site-packages/OpenSSL/__init__.py +31 -0
- .venv/lib/python3.11/site-packages/OpenSSL/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/OpenSSL/__pycache__/_util.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/OpenSSL/__pycache__/debug.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/OpenSSL/__pycache__/rand.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/OpenSSL/__pycache__/version.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/OpenSSL/_util.py +124 -0
- .venv/lib/python3.11/site-packages/OpenSSL/crypto.py +0 -0
- .venv/lib/python3.11/site-packages/OpenSSL/debug.py +40 -0
- .venv/lib/python3.11/site-packages/OpenSSL/py.typed +0 -0
- .venv/lib/python3.11/site-packages/OpenSSL/rand.py +40 -0
- .venv/lib/python3.11/site-packages/OpenSSL/version.py +28 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/__init__.py +22 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/base.py +20 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/__init__.py +22 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/base.py +188 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/helpers.py +137 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__init__.py +18 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/base.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/naive_quantized.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/pack_quantized.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/base.py +176 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +142 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +213 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__init__.py +19 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/base.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/dense.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_24_bitmask.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_bitmask.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/base.py +148 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/dense.py +34 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +240 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +163 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +16 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +251 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/quantization/__init__.py +21 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_args.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_config.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_scheme.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__init__.py +22 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/apply.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/compressed.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/forward.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/helpers.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/initialize.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/OpenSSL/SSL.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
.venv/lib/python3.11/site-packages/OpenSSL/__init__.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (C) AB Strakt
|
| 2 |
+
# See LICENSE for details.
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
pyOpenSSL - A simple wrapper around the OpenSSL library
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from OpenSSL import SSL, crypto
|
| 9 |
+
from OpenSSL.version import (
|
| 10 |
+
__author__,
|
| 11 |
+
__copyright__,
|
| 12 |
+
__email__,
|
| 13 |
+
__license__,
|
| 14 |
+
__summary__,
|
| 15 |
+
__title__,
|
| 16 |
+
__uri__,
|
| 17 |
+
__version__,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
__all__ = [
|
| 21 |
+
"SSL",
|
| 22 |
+
"crypto",
|
| 23 |
+
"__author__",
|
| 24 |
+
"__copyright__",
|
| 25 |
+
"__email__",
|
| 26 |
+
"__license__",
|
| 27 |
+
"__summary__",
|
| 28 |
+
"__title__",
|
| 29 |
+
"__uri__",
|
| 30 |
+
"__version__",
|
| 31 |
+
]
|
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (716 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/_util.cpython-311.pyc
ADDED
|
Binary file (5.28 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/debug.cpython-311.pyc
ADDED
|
Binary file (1.67 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/rand.cpython-311.pyc
ADDED
|
Binary file (1.75 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/version.cpython-311.pyc
ADDED
|
Binary file (712 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/OpenSSL/_util.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
import warnings
|
| 4 |
+
from typing import Any, Callable, NoReturn, Type, Union
|
| 5 |
+
|
| 6 |
+
from cryptography.hazmat.bindings.openssl.binding import Binding
|
| 7 |
+
|
| 8 |
+
StrOrBytesPath = Union[str, bytes, os.PathLike]
|
| 9 |
+
|
| 10 |
+
binding = Binding()
|
| 11 |
+
ffi = binding.ffi
|
| 12 |
+
lib = binding.lib
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# This is a special CFFI allocator that does not bother to zero its memory
|
| 16 |
+
# after allocation. This has vastly better performance on large allocations and
|
| 17 |
+
# so should be used whenever we don't need the memory zeroed out.
|
| 18 |
+
no_zero_allocator = ffi.new_allocator(should_clear_after_alloc=False)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def text(charp: Any) -> str:
|
| 22 |
+
"""
|
| 23 |
+
Get a native string type representing of the given CFFI ``char*`` object.
|
| 24 |
+
|
| 25 |
+
:param charp: A C-style string represented using CFFI.
|
| 26 |
+
|
| 27 |
+
:return: :class:`str`
|
| 28 |
+
"""
|
| 29 |
+
if not charp:
|
| 30 |
+
return ""
|
| 31 |
+
return ffi.string(charp).decode("utf-8")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def exception_from_error_queue(exception_type: Type[Exception]) -> NoReturn:
|
| 35 |
+
"""
|
| 36 |
+
Convert an OpenSSL library failure into a Python exception.
|
| 37 |
+
|
| 38 |
+
When a call to the native OpenSSL library fails, this is usually signalled
|
| 39 |
+
by the return value, and an error code is stored in an error queue
|
| 40 |
+
associated with the current thread. The err library provides functions to
|
| 41 |
+
obtain these error codes and textual error messages.
|
| 42 |
+
"""
|
| 43 |
+
errors = []
|
| 44 |
+
|
| 45 |
+
while True:
|
| 46 |
+
error = lib.ERR_get_error()
|
| 47 |
+
if error == 0:
|
| 48 |
+
break
|
| 49 |
+
errors.append(
|
| 50 |
+
(
|
| 51 |
+
text(lib.ERR_lib_error_string(error)),
|
| 52 |
+
text(lib.ERR_func_error_string(error)),
|
| 53 |
+
text(lib.ERR_reason_error_string(error)),
|
| 54 |
+
)
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
raise exception_type(errors)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def make_assert(error: Type[Exception]) -> Callable[[bool], Any]:
|
| 61 |
+
"""
|
| 62 |
+
Create an assert function that uses :func:`exception_from_error_queue` to
|
| 63 |
+
raise an exception wrapped by *error*.
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
def openssl_assert(ok: bool) -> None:
|
| 67 |
+
"""
|
| 68 |
+
If *ok* is not True, retrieve the error from OpenSSL and raise it.
|
| 69 |
+
"""
|
| 70 |
+
if ok is not True:
|
| 71 |
+
exception_from_error_queue(error)
|
| 72 |
+
|
| 73 |
+
return openssl_assert
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def path_bytes(s: StrOrBytesPath) -> bytes:
|
| 77 |
+
"""
|
| 78 |
+
Convert a Python path to a :py:class:`bytes` for the path which can be
|
| 79 |
+
passed into an OpenSSL API accepting a filename.
|
| 80 |
+
|
| 81 |
+
:param s: A path (valid for os.fspath).
|
| 82 |
+
|
| 83 |
+
:return: An instance of :py:class:`bytes`.
|
| 84 |
+
"""
|
| 85 |
+
b = os.fspath(s)
|
| 86 |
+
|
| 87 |
+
if isinstance(b, str):
|
| 88 |
+
return b.encode(sys.getfilesystemencoding())
|
| 89 |
+
else:
|
| 90 |
+
return b
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def byte_string(s: str) -> bytes:
|
| 94 |
+
return s.encode("charmap")
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
# A marker object to observe whether some optional arguments are passed any
|
| 98 |
+
# value or not.
|
| 99 |
+
UNSPECIFIED = object()
|
| 100 |
+
|
| 101 |
+
_TEXT_WARNING = "str for {0} is no longer accepted, use bytes"
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def text_to_bytes_and_warn(label: str, obj: Any) -> Any:
|
| 105 |
+
"""
|
| 106 |
+
If ``obj`` is text, emit a warning that it should be bytes instead and try
|
| 107 |
+
to convert it to bytes automatically.
|
| 108 |
+
|
| 109 |
+
:param str label: The name of the parameter from which ``obj`` was taken
|
| 110 |
+
(so a developer can easily find the source of the problem and correct
|
| 111 |
+
it).
|
| 112 |
+
|
| 113 |
+
:return: If ``obj`` is the text string type, a ``bytes`` object giving the
|
| 114 |
+
UTF-8 encoding of that text is returned. Otherwise, ``obj`` itself is
|
| 115 |
+
returned.
|
| 116 |
+
"""
|
| 117 |
+
if isinstance(obj, str):
|
| 118 |
+
warnings.warn(
|
| 119 |
+
_TEXT_WARNING.format(label),
|
| 120 |
+
category=DeprecationWarning,
|
| 121 |
+
stacklevel=3,
|
| 122 |
+
)
|
| 123 |
+
return obj.encode("utf-8")
|
| 124 |
+
return obj
|
.venv/lib/python3.11/site-packages/OpenSSL/crypto.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
.venv/lib/python3.11/site-packages/OpenSSL/debug.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ssl
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
import cffi
|
| 5 |
+
import cryptography
|
| 6 |
+
|
| 7 |
+
import OpenSSL.SSL
|
| 8 |
+
|
| 9 |
+
from . import version
|
| 10 |
+
|
| 11 |
+
_env_info = """\
|
| 12 |
+
pyOpenSSL: {pyopenssl}
|
| 13 |
+
cryptography: {cryptography}
|
| 14 |
+
cffi: {cffi}
|
| 15 |
+
cryptography's compiled against OpenSSL: {crypto_openssl_compile}
|
| 16 |
+
cryptography's linked OpenSSL: {crypto_openssl_link}
|
| 17 |
+
Python's OpenSSL: {python_openssl}
|
| 18 |
+
Python executable: {python}
|
| 19 |
+
Python version: {python_version}
|
| 20 |
+
Platform: {platform}
|
| 21 |
+
sys.path: {sys_path}""".format(
|
| 22 |
+
pyopenssl=version.__version__,
|
| 23 |
+
crypto_openssl_compile=OpenSSL._util.ffi.string(
|
| 24 |
+
OpenSSL._util.lib.OPENSSL_VERSION_TEXT,
|
| 25 |
+
).decode("ascii"),
|
| 26 |
+
crypto_openssl_link=OpenSSL.SSL.SSLeay_version(
|
| 27 |
+
OpenSSL.SSL.SSLEAY_VERSION
|
| 28 |
+
).decode("ascii"),
|
| 29 |
+
python_openssl=getattr(ssl, "OPENSSL_VERSION", "n/a"),
|
| 30 |
+
cryptography=cryptography.__version__,
|
| 31 |
+
cffi=cffi.__version__,
|
| 32 |
+
python=sys.executable,
|
| 33 |
+
python_version=sys.version,
|
| 34 |
+
platform=sys.platform,
|
| 35 |
+
sys_path=sys.path,
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
if __name__ == "__main__":
|
| 40 |
+
print(_env_info)
|
.venv/lib/python3.11/site-packages/OpenSSL/py.typed
ADDED
|
File without changes
|
.venv/lib/python3.11/site-packages/OpenSSL/rand.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PRNG management routines, thin wrappers.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from OpenSSL._util import lib as _lib
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def add(buffer: bytes, entropy: int) -> None:
|
| 9 |
+
"""
|
| 10 |
+
Mix bytes from *string* into the PRNG state.
|
| 11 |
+
|
| 12 |
+
The *entropy* argument is (the lower bound of) an estimate of how much
|
| 13 |
+
randomness is contained in *string*, measured in bytes.
|
| 14 |
+
|
| 15 |
+
For more information, see e.g. :rfc:`1750`.
|
| 16 |
+
|
| 17 |
+
This function is only relevant if you are forking Python processes and
|
| 18 |
+
need to reseed the CSPRNG after fork.
|
| 19 |
+
|
| 20 |
+
:param buffer: Buffer with random data.
|
| 21 |
+
:param entropy: The entropy (in bytes) measurement of the buffer.
|
| 22 |
+
|
| 23 |
+
:return: :obj:`None`
|
| 24 |
+
"""
|
| 25 |
+
if not isinstance(buffer, bytes):
|
| 26 |
+
raise TypeError("buffer must be a byte string")
|
| 27 |
+
|
| 28 |
+
if not isinstance(entropy, int):
|
| 29 |
+
raise TypeError("entropy must be an integer")
|
| 30 |
+
|
| 31 |
+
_lib.RAND_add(buffer, len(buffer), entropy)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def status() -> int:
|
| 35 |
+
"""
|
| 36 |
+
Check whether the PRNG has been seeded with enough data.
|
| 37 |
+
|
| 38 |
+
:return: 1 if the PRNG is seeded enough, 0 otherwise.
|
| 39 |
+
"""
|
| 40 |
+
return _lib.RAND_status()
|
.venv/lib/python3.11/site-packages/OpenSSL/version.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (C) AB Strakt
|
| 2 |
+
# Copyright (C) Jean-Paul Calderone
|
| 3 |
+
# See LICENSE for details.
|
| 4 |
+
|
| 5 |
+
"""
|
| 6 |
+
pyOpenSSL - A simple wrapper around the OpenSSL library
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
__all__ = [
|
| 10 |
+
"__author__",
|
| 11 |
+
"__copyright__",
|
| 12 |
+
"__email__",
|
| 13 |
+
"__license__",
|
| 14 |
+
"__summary__",
|
| 15 |
+
"__title__",
|
| 16 |
+
"__uri__",
|
| 17 |
+
"__version__",
|
| 18 |
+
]
|
| 19 |
+
|
| 20 |
+
__version__ = "24.2.1"
|
| 21 |
+
|
| 22 |
+
__title__ = "pyOpenSSL"
|
| 23 |
+
__uri__ = "https://pyopenssl.org/"
|
| 24 |
+
__summary__ = "Python wrapper module around the OpenSSL library"
|
| 25 |
+
__author__ = "The pyOpenSSL developers"
|
| 26 |
+
__email__ = "cryptography-dev@python.org"
|
| 27 |
+
__license__ = "Apache License, Version 2.0"
|
| 28 |
+
__copyright__ = f"Copyright 2001-2024 {__author__}"
|
.venv/lib/python3.11/site-packages/compressed_tensors/__init__.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
from .base import *
|
| 16 |
+
|
| 17 |
+
# flake8: noqa
|
| 18 |
+
from .compressors import *
|
| 19 |
+
from .config import *
|
| 20 |
+
from .quantization import QuantizationConfig, QuantizationStatus
|
| 21 |
+
from .utils import *
|
| 22 |
+
from .version import *
|
.venv/lib/python3.11/site-packages/compressed_tensors/base.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
SPARSITY_CONFIG_NAME = "sparsity_config"
|
| 16 |
+
QUANTIZATION_CONFIG_NAME = "quantization_config"
|
| 17 |
+
COMPRESSION_CONFIG_NAME = "compression_config"
|
| 18 |
+
KV_CACHE_SCHEME_NAME = "kv_cache_scheme"
|
| 19 |
+
COMPRESSION_VERSION_NAME = "version"
|
| 20 |
+
QUANTIZATION_METHOD_NAME = "quant_method"
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/__init__.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
# flake8: noqa
|
| 16 |
+
|
| 17 |
+
from .base import *
|
| 18 |
+
from .helpers import *
|
| 19 |
+
from .model_compressors import *
|
| 20 |
+
from .quantized_compressors import *
|
| 21 |
+
from .sparse_compressors import *
|
| 22 |
+
from .sparse_quantized_compressors import *
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/base.py
ADDED
|
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
from abc import ABC, abstractmethod
|
| 16 |
+
from typing import Dict, Generator, Optional, Tuple, Union
|
| 17 |
+
|
| 18 |
+
import torch
|
| 19 |
+
from compressed_tensors.config import SparsityCompressionConfig
|
| 20 |
+
from compressed_tensors.quantization import QuantizationArgs, QuantizationConfig
|
| 21 |
+
from compressed_tensors.registry import RegistryMixin
|
| 22 |
+
from torch import Tensor
|
| 23 |
+
from torch.nn import Module
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
__all__ = ["BaseCompressor"]
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class BaseCompressor(RegistryMixin, ABC):
|
| 30 |
+
"""
|
| 31 |
+
Base class representing a model compression algorithm. Each child class should
|
| 32 |
+
implement compression_param_info, compress_weight and decompress_weight.
|
| 33 |
+
|
| 34 |
+
Compressors support compressing/decompressing a full module state dict or a single
|
| 35 |
+
quantized PyTorch leaf module.
|
| 36 |
+
|
| 37 |
+
Model Load Lifecycle (run_compressed=False):
|
| 38 |
+
- ModelCompressor.decompress()
|
| 39 |
+
- apply_quantization_config()
|
| 40 |
+
- BaseCompressor.decompress()
|
| 41 |
+
|
| 42 |
+
Model Save Lifecycle:
|
| 43 |
+
- ModelCompressor.compress()
|
| 44 |
+
- BaseCompressor.compress()
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
Module Lifecycle (run_compressed=True):
|
| 48 |
+
- apply_quantization_config()
|
| 49 |
+
- compressed_module = CompressedLinear(module)
|
| 50 |
+
- initialize_module_for_quantization()
|
| 51 |
+
- BaseCompressor.compression_param_info()
|
| 52 |
+
- register_parameters()
|
| 53 |
+
- compressed_module.forward()
|
| 54 |
+
-compressed_module.decompress()
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
:param config: config specifying compression parameters
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
def __init__(
|
| 61 |
+
self, config: Union[SparsityCompressionConfig, QuantizationConfig, None] = None
|
| 62 |
+
):
|
| 63 |
+
self.config = config
|
| 64 |
+
|
| 65 |
+
def compression_param_info(
|
| 66 |
+
self,
|
| 67 |
+
weight_shape: torch.Size,
|
| 68 |
+
quantization_args: Optional[QuantizationArgs] = None,
|
| 69 |
+
) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
|
| 70 |
+
"""
|
| 71 |
+
Creates a dictionary of expected shapes and dtypes for each compression
|
| 72 |
+
parameter used by the compressor
|
| 73 |
+
|
| 74 |
+
:param weight_shape: uncompressed weight shape
|
| 75 |
+
:param quantization_args: quantization parameters for the weight
|
| 76 |
+
:return: dictionary mapping compressed parameter names to shape and dtype
|
| 77 |
+
"""
|
| 78 |
+
raise NotImplementedError()
|
| 79 |
+
|
| 80 |
+
@abstractmethod
|
| 81 |
+
def compress(
|
| 82 |
+
self,
|
| 83 |
+
model_state: Dict[str, Tensor],
|
| 84 |
+
**kwargs,
|
| 85 |
+
) -> Dict[str, Tensor]:
|
| 86 |
+
"""
|
| 87 |
+
Compresses a dense state dict
|
| 88 |
+
|
| 89 |
+
:param model_state: state dict of uncompressed model
|
| 90 |
+
:param kwargs: additional arguments for compression
|
| 91 |
+
:return: compressed state dict
|
| 92 |
+
"""
|
| 93 |
+
raise NotImplementedError()
|
| 94 |
+
|
| 95 |
+
@abstractmethod
|
| 96 |
+
def decompress(
|
| 97 |
+
self,
|
| 98 |
+
path_to_model_or_tensors: str,
|
| 99 |
+
device: str = "cpu",
|
| 100 |
+
**kwargs,
|
| 101 |
+
) -> Generator[Tuple[str, Tensor], None, None]:
|
| 102 |
+
"""
|
| 103 |
+
Reads a compressed state dict located at path_to_model_or_tensors
|
| 104 |
+
and returns a generator for sequentially decompressing back to a
|
| 105 |
+
dense state dict
|
| 106 |
+
|
| 107 |
+
:param path_to_model_or_tensors: path to compressed safetensors model (directory
|
| 108 |
+
with one or more safetensors files) or compressed tensors file
|
| 109 |
+
:param names_to_scheme: quantization args for each quantized weight
|
| 110 |
+
:param device: optional device to load intermediate weights into
|
| 111 |
+
:return: compressed state dict
|
| 112 |
+
"""
|
| 113 |
+
raise NotImplementedError()
|
| 114 |
+
|
| 115 |
+
def compress_module(self, module: Module) -> Optional[Dict[str, torch.Tensor]]:
|
| 116 |
+
"""
|
| 117 |
+
Compresses a single quantized leaf PyTorch module. If the module is not
|
| 118 |
+
quantized, this function has no effect.
|
| 119 |
+
|
| 120 |
+
:param module: PyTorch module to compress
|
| 121 |
+
:return: dictionary of compressed weight data, or None if module is not
|
| 122 |
+
quantized
|
| 123 |
+
"""
|
| 124 |
+
if not hasattr(module, "quantization_scheme"):
|
| 125 |
+
return None # module is not quantized
|
| 126 |
+
quantization_scheme = module.quantization_scheme
|
| 127 |
+
if not hasattr(quantization_scheme, "weights"):
|
| 128 |
+
return None # weights are not quantized
|
| 129 |
+
|
| 130 |
+
quantization_args = quantization_scheme.weights
|
| 131 |
+
weight = getattr(module, "weight", None)
|
| 132 |
+
weight_scale = getattr(module, "weight_scale", None)
|
| 133 |
+
weight_zero_point = getattr(module, "weight_zero_point", None)
|
| 134 |
+
|
| 135 |
+
return self.compress_weight(
|
| 136 |
+
weight=weight,
|
| 137 |
+
scale=weight_scale,
|
| 138 |
+
zero_point=weight_zero_point,
|
| 139 |
+
quantization_args=quantization_args,
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
def compress_weight(
|
| 143 |
+
self,
|
| 144 |
+
weight: Tensor,
|
| 145 |
+
**kwargs,
|
| 146 |
+
) -> Dict[str, torch.Tensor]:
|
| 147 |
+
"""
|
| 148 |
+
Compresses a single uncompressed weight
|
| 149 |
+
|
| 150 |
+
:param weight: uncompressed weight tensor
|
| 151 |
+
:param kwargs: additional arguments for compression
|
| 152 |
+
"""
|
| 153 |
+
raise NotImplementedError()
|
| 154 |
+
|
| 155 |
+
def decompress_module(self, module: Module):
|
| 156 |
+
"""
|
| 157 |
+
Decompresses a single compressed leaf PyTorch module. If the module is not
|
| 158 |
+
quantized, this function has no effect.
|
| 159 |
+
|
| 160 |
+
:param module: PyTorch module to decompress
|
| 161 |
+
:return: tensor of the decompressed weight, or None if module is not quantized
|
| 162 |
+
"""
|
| 163 |
+
if not hasattr(module, "quantization_scheme"):
|
| 164 |
+
return None # module is not quantized
|
| 165 |
+
quantization_scheme = module.quantization_scheme
|
| 166 |
+
if not hasattr(quantization_scheme, "weights"):
|
| 167 |
+
return None # weights are not quantized
|
| 168 |
+
|
| 169 |
+
quantization_args = quantization_scheme.weights
|
| 170 |
+
compressed_data = {}
|
| 171 |
+
for name, parameter in module.named_parameters():
|
| 172 |
+
compressed_data[name] = parameter
|
| 173 |
+
|
| 174 |
+
return self.decompress_weight(
|
| 175 |
+
compressed_data=compressed_data, quantization_args=quantization_args
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
def decompress_weight(
|
| 179 |
+
self, compressed_data: Dict[str, Tensor], **kwargs
|
| 180 |
+
) -> torch.Tensor:
|
| 181 |
+
"""
|
| 182 |
+
Decompresses a single compressed weight
|
| 183 |
+
|
| 184 |
+
:param compressed_data: dictionary of data needed for decompression
|
| 185 |
+
:param kwargs: additional arguments for decompression
|
| 186 |
+
:return: tensor of the decompressed weight
|
| 187 |
+
"""
|
| 188 |
+
raise NotImplementedError()
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/helpers.py
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from typing import Dict, Generator, Optional, Tuple, Union
|
| 17 |
+
|
| 18 |
+
import torch
|
| 19 |
+
from compressed_tensors.compressors import BaseCompressor
|
| 20 |
+
from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
|
| 21 |
+
from compressed_tensors.utils.safetensors_load import get_weight_mappings
|
| 22 |
+
from safetensors import safe_open
|
| 23 |
+
from safetensors.torch import save_file
|
| 24 |
+
from torch import Tensor
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
__all__ = [
|
| 28 |
+
"load_compressed",
|
| 29 |
+
"save_compressed",
|
| 30 |
+
"save_compressed_model",
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def save_compressed(
|
| 35 |
+
tensors: Dict[str, Tensor],
|
| 36 |
+
save_path: Union[str, Path],
|
| 37 |
+
compression_format: Optional[CompressionFormat] = None,
|
| 38 |
+
):
|
| 39 |
+
"""
|
| 40 |
+
Save compressed tensors to disk. If tensors are not compressed,
|
| 41 |
+
save them as is.
|
| 42 |
+
|
| 43 |
+
:param tensors: dictionary of tensors to compress
|
| 44 |
+
:param save_path: path to save compressed tensors
|
| 45 |
+
:param compression_format: compression format used for the tensors
|
| 46 |
+
:return: compression config, if tensors were compressed - None otherwise
|
| 47 |
+
"""
|
| 48 |
+
if tensors is None or len(tensors) == 0:
|
| 49 |
+
raise ValueError("No tensors or empty tensors provided to compress")
|
| 50 |
+
|
| 51 |
+
# if no compression_format specified, default to `dense`
|
| 52 |
+
compression_format = compression_format or CompressionFormat.dense.value
|
| 53 |
+
|
| 54 |
+
if not (
|
| 55 |
+
compression_format in BaseCompressor.registered_names()
|
| 56 |
+
or compression_format in BaseCompressor.registered_aliases()
|
| 57 |
+
):
|
| 58 |
+
raise ValueError(
|
| 59 |
+
f"Unknown compression format: {compression_format}. "
|
| 60 |
+
f"Must be one of {set(BaseCompressor.registered_names() + BaseCompressor.registered_aliases())}" # noqa E501
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# compress
|
| 64 |
+
compressor = BaseCompressor.load_from_registry(compression_format)
|
| 65 |
+
# save compressed tensors
|
| 66 |
+
compressed_tensors = compressor.compress(tensors)
|
| 67 |
+
save_file(compressed_tensors, save_path)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def load_compressed(
|
| 71 |
+
compressed_tensors: Union[str, Path],
|
| 72 |
+
compression_config: SparsityCompressionConfig = None,
|
| 73 |
+
device: Optional[str] = "cpu",
|
| 74 |
+
) -> Generator[Tuple[str, Tensor], None, None]:
|
| 75 |
+
"""
|
| 76 |
+
Load compressed tensors from disk.
|
| 77 |
+
If tensors are not compressed, load them as is.
|
| 78 |
+
|
| 79 |
+
:param compressed_tensors: path to compressed tensors.
|
| 80 |
+
This can be a path to a file or a directory containing
|
| 81 |
+
one or multiple safetensor files (if multiple - in the format
|
| 82 |
+
assumed by huggingface)
|
| 83 |
+
:param compression_config: compression config to use for decompressing tensors.
|
| 84 |
+
:param device: device to move tensors to. If None, tensors are loaded on CPU.
|
| 85 |
+
:param return_dict: if True, return a dictionary of decompressed tensors
|
| 86 |
+
:return a generator that yields the name and tensor of the decompressed tensor
|
| 87 |
+
"""
|
| 88 |
+
if compressed_tensors is None or not Path(compressed_tensors).exists():
|
| 89 |
+
raise ValueError("No compressed tensors provided to load")
|
| 90 |
+
|
| 91 |
+
if (
|
| 92 |
+
compression_config is None
|
| 93 |
+
or compression_config.format == CompressionFormat.dense.value
|
| 94 |
+
):
|
| 95 |
+
# if no compression_config specified, or `dense` format specified,
|
| 96 |
+
# assume tensors are not compressed on disk
|
| 97 |
+
weight_mappings = get_weight_mappings(compressed_tensors)
|
| 98 |
+
for weight_name, file_with_weight_name in weight_mappings.items():
|
| 99 |
+
with safe_open(file_with_weight_name, framework="pt", device=device) as f:
|
| 100 |
+
weight = f.get_tensor(weight_name)
|
| 101 |
+
yield weight_name, weight
|
| 102 |
+
else:
|
| 103 |
+
# decompress tensors
|
| 104 |
+
compression_format = compression_config.format
|
| 105 |
+
compressor = BaseCompressor.load_from_registry(
|
| 106 |
+
compression_format, config=compression_config
|
| 107 |
+
)
|
| 108 |
+
yield from compressor.decompress(compressed_tensors, device=device)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def save_compressed_model(
|
| 112 |
+
model: torch.nn.Module,
|
| 113 |
+
filename: str,
|
| 114 |
+
compression_format: Optional[CompressionFormat] = None,
|
| 115 |
+
force_contiguous: bool = True,
|
| 116 |
+
):
|
| 117 |
+
"""
|
| 118 |
+
Wrapper around safetensors `save_model` helper function, which allows for
|
| 119 |
+
saving compressed model to disk.
|
| 120 |
+
|
| 121 |
+
Note: The model is assumed to have a
|
| 122 |
+
state_dict with unique entries
|
| 123 |
+
|
| 124 |
+
:param model: model to save on disk
|
| 125 |
+
:param filename: filename location to save the file
|
| 126 |
+
:param compression_format: compression format used for the model
|
| 127 |
+
:param force_contiguous: forcing the state_dict to be saved as contiguous tensors
|
| 128 |
+
"""
|
| 129 |
+
state_dict = model.state_dict()
|
| 130 |
+
if force_contiguous:
|
| 131 |
+
state_dict = {k: v.contiguous() for k, v in state_dict.items()}
|
| 132 |
+
try:
|
| 133 |
+
save_compressed(state_dict, filename, compression_format=compression_format)
|
| 134 |
+
except ValueError as e:
|
| 135 |
+
msg = str(e)
|
| 136 |
+
msg += " Or use save_compressed_model(..., force_contiguous=True), read the docs for potential caveats." # noqa E501
|
| 137 |
+
raise ValueError(msg)
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__init__.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
# flake8: noqa
|
| 15 |
+
|
| 16 |
+
from .base import *
|
| 17 |
+
from .naive_quantized import *
|
| 18 |
+
from .pack_quantized import *
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (329 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/base.cpython-311.pyc
ADDED
|
Binary file (8.51 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/naive_quantized.cpython-311.pyc
ADDED
|
Binary file (5.77 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/pack_quantized.cpython-311.pyc
ADDED
|
Binary file (9.49 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/base.py
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import logging
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
from typing import Any, Dict, Generator, Tuple, Union
|
| 18 |
+
|
| 19 |
+
import torch
|
| 20 |
+
from compressed_tensors.compressors.base import BaseCompressor
|
| 21 |
+
from compressed_tensors.quantization import QuantizationArgs
|
| 22 |
+
from compressed_tensors.utils import (
|
| 23 |
+
get_nested_mappings_from_state_dict,
|
| 24 |
+
get_nested_weight_mappings,
|
| 25 |
+
merge_names,
|
| 26 |
+
)
|
| 27 |
+
from safetensors import safe_open
|
| 28 |
+
from torch import Tensor
|
| 29 |
+
from tqdm import tqdm
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
_LOGGER: logging.Logger = logging.getLogger(__name__)
|
| 33 |
+
|
| 34 |
+
__all__ = ["BaseQuantizationCompressor"]
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class BaseQuantizationCompressor(BaseCompressor):
|
| 38 |
+
"""
|
| 39 |
+
Base class representing a quant compression algorithm. Each child class should
|
| 40 |
+
implement compression_param_info, compress_weight and decompress_weight.
|
| 41 |
+
|
| 42 |
+
Compressors support compressing/decompressing a full module state dict or a single
|
| 43 |
+
quantized PyTorch leaf module.
|
| 44 |
+
|
| 45 |
+
Model Load Lifecycle (run_compressed=False):
|
| 46 |
+
- ModelCompressor.decompress()
|
| 47 |
+
- apply_quantization_config()
|
| 48 |
+
- BaseQuantizationCompressor.decompress()
|
| 49 |
+
- BaseQuantizationCompressor.decompress_weight()
|
| 50 |
+
|
| 51 |
+
Model Save Lifecycle:
|
| 52 |
+
- ModelCompressor.compress()
|
| 53 |
+
- BaseQuantizationCompressor.compress()
|
| 54 |
+
- BaseQuantizationCompressor.compress_weight()
|
| 55 |
+
|
| 56 |
+
Module Lifecycle (run_compressed=True):
|
| 57 |
+
- apply_quantization_config()
|
| 58 |
+
- compressed_module = CompressedLinear(module)
|
| 59 |
+
- initialize_module_for_quantization()
|
| 60 |
+
- BaseQuantizationCompressor.compression_param_info()
|
| 61 |
+
- register_parameters()
|
| 62 |
+
- compressed_module.forward()
|
| 63 |
+
- compressed_module.decompress()
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
:param config: config specifying compression parameters
|
| 67 |
+
"""
|
| 68 |
+
|
| 69 |
+
def compress(
|
| 70 |
+
self,
|
| 71 |
+
model_state: Dict[str, Tensor],
|
| 72 |
+
names_to_scheme: Dict[str, QuantizationArgs],
|
| 73 |
+
**kwargs,
|
| 74 |
+
) -> Dict[str, Tensor]:
|
| 75 |
+
"""
|
| 76 |
+
Compresses a dense state dict
|
| 77 |
+
|
| 78 |
+
:param model_state: state dict of uncompressed model
|
| 79 |
+
:param names_to_scheme: quantization args for each quantized weight, needed for
|
| 80 |
+
quantize function to calculate bit depth
|
| 81 |
+
:return: compressed state dict
|
| 82 |
+
"""
|
| 83 |
+
compressed_dict = {}
|
| 84 |
+
weight_suffix = ".weight"
|
| 85 |
+
_LOGGER.debug(
|
| 86 |
+
f"Compressing model with {len(model_state)} parameterized layers..."
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
for name, value in tqdm(model_state.items(), desc="Quantized Compression"):
|
| 90 |
+
if name.endswith(weight_suffix):
|
| 91 |
+
prefix = name[: -(len(weight_suffix))]
|
| 92 |
+
scale = model_state.get(merge_names(prefix, "weight_scale"), None)
|
| 93 |
+
zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
|
| 94 |
+
g_idx = model_state.get(merge_names(prefix, "weight_g_idx"), None)
|
| 95 |
+
if scale is not None:
|
| 96 |
+
# weight is quantized, compress it
|
| 97 |
+
quant_args = names_to_scheme[prefix]
|
| 98 |
+
compressed_data = self.compress_weight(
|
| 99 |
+
weight=value,
|
| 100 |
+
scale=scale,
|
| 101 |
+
zero_point=zp,
|
| 102 |
+
g_idx=g_idx,
|
| 103 |
+
quantization_args=quant_args,
|
| 104 |
+
device="cpu",
|
| 105 |
+
)
|
| 106 |
+
for key, value in compressed_data.items():
|
| 107 |
+
compressed_dict[merge_names(prefix, key)] = value
|
| 108 |
+
else:
|
| 109 |
+
compressed_dict[name] = value.to("cpu")
|
| 110 |
+
elif name.endswith("zero_point") and torch.all(value == 0):
|
| 111 |
+
continue
|
| 112 |
+
elif name.endswith("g_idx") and torch.any(value <= -1):
|
| 113 |
+
continue
|
| 114 |
+
else:
|
| 115 |
+
compressed_dict[name] = value.to("cpu")
|
| 116 |
+
|
| 117 |
+
return compressed_dict
|
| 118 |
+
|
| 119 |
+
def decompress(
|
| 120 |
+
self,
|
| 121 |
+
path_to_model_or_tensors: Union[str, Path, Dict[str, Any]],
|
| 122 |
+
names_to_scheme: Dict[str, QuantizationArgs],
|
| 123 |
+
device: str = "cpu",
|
| 124 |
+
) -> Generator[Tuple[str, Tensor], None, None]:
|
| 125 |
+
"""
|
| 126 |
+
Reads a compressed state dict located at path_to_model_or_tensors
|
| 127 |
+
and returns a generator for sequentially decompressing back to a
|
| 128 |
+
dense state dict
|
| 129 |
+
:param path_to_model_or_tensors: path to compressed safetensors model (directory
|
| 130 |
+
with one or more safetensors files) or compressed tensors file
|
| 131 |
+
:param names_to_scheme: quantization args for each quantized weight
|
| 132 |
+
:param device: optional device to load intermediate weights into
|
| 133 |
+
:return: compressed state dict
|
| 134 |
+
"""
|
| 135 |
+
if isinstance(path_to_model_or_tensors, (str, Path)):
|
| 136 |
+
yield from self._decompress_from_path(
|
| 137 |
+
path_to_model_or_tensors, names_to_scheme, device
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
else:
|
| 141 |
+
yield from self._decompress_from_state_dict(
|
| 142 |
+
path_to_model_or_tensors, names_to_scheme
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
def _decompress_from_path(self, path_to_model, names_to_scheme, device):
|
| 146 |
+
weight_mappings = get_nested_weight_mappings(
|
| 147 |
+
path_to_model, self.COMPRESSION_PARAM_NAMES
|
| 148 |
+
)
|
| 149 |
+
for weight_name in weight_mappings.keys():
|
| 150 |
+
weight_data = {}
|
| 151 |
+
for param_name, safe_path in weight_mappings[weight_name].items():
|
| 152 |
+
full_name = merge_names(weight_name, param_name)
|
| 153 |
+
with safe_open(safe_path, framework="pt", device=device) as f:
|
| 154 |
+
weight_data[param_name] = f.get_tensor(full_name)
|
| 155 |
+
if "weight_scale" in weight_data:
|
| 156 |
+
quant_args = names_to_scheme[weight_name]
|
| 157 |
+
decompressed = self.decompress_weight(
|
| 158 |
+
compressed_data=weight_data, quantization_args=quant_args
|
| 159 |
+
)
|
| 160 |
+
yield merge_names(weight_name, "weight"), decompressed
|
| 161 |
+
|
| 162 |
+
def _decompress_from_state_dict(self, state_dict, names_to_scheme):
|
| 163 |
+
weight_mappings = get_nested_mappings_from_state_dict(
|
| 164 |
+
state_dict, self.COMPRESSION_PARAM_NAMES
|
| 165 |
+
)
|
| 166 |
+
for weight_name in weight_mappings.keys():
|
| 167 |
+
weight_data = {}
|
| 168 |
+
for param_name, param_value in weight_mappings[weight_name].items():
|
| 169 |
+
weight_data[param_name] = param_value
|
| 170 |
+
|
| 171 |
+
if "weight_scale" in weight_data:
|
| 172 |
+
quant_args = names_to_scheme[weight_name]
|
| 173 |
+
decompressed = self.decompress_weight(
|
| 174 |
+
compressed_data=weight_data, quantization_args=quant_args
|
| 175 |
+
)
|
| 176 |
+
yield merge_names(weight_name, "weight"), decompressed
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/naive_quantized.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
from typing import Dict, Optional, Tuple
|
| 16 |
+
|
| 17 |
+
import torch
|
| 18 |
+
from compressed_tensors.compressors.base import BaseCompressor
|
| 19 |
+
from compressed_tensors.compressors.quantized_compressors.base import (
|
| 20 |
+
BaseQuantizationCompressor,
|
| 21 |
+
)
|
| 22 |
+
from compressed_tensors.config import CompressionFormat
|
| 23 |
+
from compressed_tensors.quantization import QuantizationArgs
|
| 24 |
+
from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
|
| 25 |
+
from compressed_tensors.quantization.utils import can_quantize
|
| 26 |
+
from torch import Tensor
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
__all__ = [
|
| 30 |
+
"NaiveQuantizationCompressor",
|
| 31 |
+
"IntQuantizationCompressor",
|
| 32 |
+
"FloatQuantizationCompressor",
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@BaseCompressor.register(name=CompressionFormat.naive_quantized.value)
|
| 37 |
+
class NaiveQuantizationCompressor(BaseQuantizationCompressor):
|
| 38 |
+
"""
|
| 39 |
+
Implements naive compression for quantized models. Weight of each
|
| 40 |
+
quantized layer is converted from its original float type to the closest Pytorch
|
| 41 |
+
type to the type specified by the layer's QuantizationArgs.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
COMPRESSION_PARAM_NAMES = [
|
| 45 |
+
"weight",
|
| 46 |
+
"weight_scale",
|
| 47 |
+
"weight_zero_point",
|
| 48 |
+
"weight_g_idx",
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
def compression_param_info(
|
| 52 |
+
self,
|
| 53 |
+
weight_shape: torch.Size,
|
| 54 |
+
quantization_args: Optional[QuantizationArgs] = None,
|
| 55 |
+
) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
|
| 56 |
+
"""
|
| 57 |
+
Creates a dictionary of expected shapes and dtypes for each compression
|
| 58 |
+
parameter used by the compressor
|
| 59 |
+
|
| 60 |
+
:param weight_shape: uncompressed weight shape
|
| 61 |
+
:param quantization_args: quantization parameters for the weight
|
| 62 |
+
:return: dictionary mapping compressed parameter names to shape and dtype
|
| 63 |
+
"""
|
| 64 |
+
dtype = quantization_args.pytorch_dtype()
|
| 65 |
+
return {"weight": (weight_shape, dtype)}
|
| 66 |
+
|
| 67 |
+
def compress_weight(
|
| 68 |
+
self,
|
| 69 |
+
weight: Tensor,
|
| 70 |
+
scale: Tensor,
|
| 71 |
+
quantization_args: QuantizationArgs,
|
| 72 |
+
zero_point: Optional[Tensor] = None,
|
| 73 |
+
g_idx: Optional[torch.Tensor] = None,
|
| 74 |
+
device: Optional[torch.device] = None,
|
| 75 |
+
) -> Dict[str, torch.Tensor]:
|
| 76 |
+
"""
|
| 77 |
+
Compresses a single uncompressed weight
|
| 78 |
+
|
| 79 |
+
:param weight: uncompressed weight tensor
|
| 80 |
+
:param scale: quantization scale for weight
|
| 81 |
+
:param quantization_args: quantization parameters for weight
|
| 82 |
+
:param zero_point: quantization zero point for weight
|
| 83 |
+
:param g_idx: optional mapping from column index to group index
|
| 84 |
+
:param device: optional device to move compressed output to
|
| 85 |
+
:return: dictionary of compressed weight data
|
| 86 |
+
"""
|
| 87 |
+
if can_quantize(weight, quantization_args):
|
| 88 |
+
quantized_weight = quantize(
|
| 89 |
+
x=weight,
|
| 90 |
+
scale=scale,
|
| 91 |
+
zero_point=zero_point,
|
| 92 |
+
g_idx=g_idx,
|
| 93 |
+
args=quantization_args,
|
| 94 |
+
dtype=quantization_args.pytorch_dtype(),
|
| 95 |
+
)
|
| 96 |
+
else:
|
| 97 |
+
quantized_weight = weight
|
| 98 |
+
|
| 99 |
+
if device is not None:
|
| 100 |
+
quantized_weight = quantized_weight.to(device)
|
| 101 |
+
|
| 102 |
+
return {"weight": quantized_weight}
|
| 103 |
+
|
| 104 |
+
def decompress_weight(
|
| 105 |
+
self,
|
| 106 |
+
compressed_data: Dict[str, Tensor],
|
| 107 |
+
quantization_args: Optional[QuantizationArgs] = None,
|
| 108 |
+
) -> torch.Tensor:
|
| 109 |
+
"""
|
| 110 |
+
Decompresses a single compressed weight
|
| 111 |
+
|
| 112 |
+
:param compressed_data: dictionary of data needed for decompression
|
| 113 |
+
:param quantization_args: quantization parameters for the weight
|
| 114 |
+
:return: tensor of the decompressed weight
|
| 115 |
+
"""
|
| 116 |
+
weight = compressed_data["weight"]
|
| 117 |
+
scale = compressed_data["weight_scale"]
|
| 118 |
+
zero_point = compressed_data.get("weight_zero_point", None)
|
| 119 |
+
g_idx = compressed_data.get("weight_g_idx", None)
|
| 120 |
+
decompressed_weight = dequantize(
|
| 121 |
+
x_q=weight, scale=scale, zero_point=zero_point, g_idx=g_idx
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
return decompressed_weight
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
@BaseCompressor.register(name=CompressionFormat.int_quantized.value)
|
| 128 |
+
class IntQuantizationCompressor(NaiveQuantizationCompressor):
|
| 129 |
+
"""
|
| 130 |
+
Alias for integer quantized models
|
| 131 |
+
"""
|
| 132 |
+
|
| 133 |
+
pass
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
@BaseCompressor.register(name=CompressionFormat.float_quantized.value)
|
| 137 |
+
class FloatQuantizationCompressor(NaiveQuantizationCompressor):
|
| 138 |
+
"""
|
| 139 |
+
Alias for fp quantized models
|
| 140 |
+
"""
|
| 141 |
+
|
| 142 |
+
pass
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/pack_quantized.py
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
import math
|
| 15 |
+
from typing import Dict, Optional, Tuple
|
| 16 |
+
|
| 17 |
+
import numpy as np
|
| 18 |
+
import torch
|
| 19 |
+
from compressed_tensors.compressors.base import BaseCompressor
|
| 20 |
+
from compressed_tensors.compressors.quantized_compressors.base import (
|
| 21 |
+
BaseQuantizationCompressor,
|
| 22 |
+
)
|
| 23 |
+
from compressed_tensors.config import CompressionFormat
|
| 24 |
+
from compressed_tensors.quantization import QuantizationArgs
|
| 25 |
+
from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
|
| 26 |
+
from compressed_tensors.quantization.utils import can_quantize
|
| 27 |
+
from torch import Tensor
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
__all__ = ["PackedQuantizationCompressor", "pack_to_int32", "unpack_from_int32"]
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@BaseCompressor.register(name=CompressionFormat.pack_quantized.value)
|
| 34 |
+
class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
| 35 |
+
"""
|
| 36 |
+
Compresses a quantized model by packing every eight 4-bit weights into an int32
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
COMPRESSION_PARAM_NAMES = [
|
| 40 |
+
"weight_packed",
|
| 41 |
+
"weight_scale",
|
| 42 |
+
"weight_zero_point",
|
| 43 |
+
"weight_g_idx",
|
| 44 |
+
"weight_shape",
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
def compression_param_info(
|
| 48 |
+
self,
|
| 49 |
+
weight_shape: torch.Size,
|
| 50 |
+
quantization_args: Optional[QuantizationArgs] = None,
|
| 51 |
+
) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
|
| 52 |
+
"""
|
| 53 |
+
Creates a dictionary of expected shapes and dtypes for each compression
|
| 54 |
+
parameter used by the compressor
|
| 55 |
+
|
| 56 |
+
:param weight_shape: uncompressed weight shape
|
| 57 |
+
:param quantization_args: quantization parameters for the weight
|
| 58 |
+
:return: dictionary mapping compressed parameter names to shape and dtype
|
| 59 |
+
"""
|
| 60 |
+
pack_factor = 32 // quantization_args.num_bits
|
| 61 |
+
packed_size = math.ceil(weight_shape[1] / pack_factor)
|
| 62 |
+
return {
|
| 63 |
+
"weight_packed": (torch.Size((weight_shape[0], packed_size)), torch.int32),
|
| 64 |
+
"weight_shape": (torch.Size((2,)), torch.int32),
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
def compress_weight(
|
| 68 |
+
self,
|
| 69 |
+
weight: Tensor,
|
| 70 |
+
scale: Tensor,
|
| 71 |
+
quantization_args: QuantizationArgs,
|
| 72 |
+
zero_point: Optional[Tensor] = None,
|
| 73 |
+
g_idx: Optional[torch.Tensor] = None,
|
| 74 |
+
device: Optional[torch.device] = None,
|
| 75 |
+
) -> Dict[str, torch.Tensor]:
|
| 76 |
+
"""
|
| 77 |
+
Compresses a single uncompressed weight
|
| 78 |
+
|
| 79 |
+
:param weight: uncompressed weight tensor
|
| 80 |
+
:param scale: quantization scale for weight
|
| 81 |
+
:param quantization_args: quantization parameters for weight
|
| 82 |
+
:param zero_point: quantization zero point for weight
|
| 83 |
+
:param g_idx: optional mapping from column index to group index
|
| 84 |
+
:param device: optional device to move compressed output to
|
| 85 |
+
:return: dictionary of compressed weight data
|
| 86 |
+
"""
|
| 87 |
+
compressed_dict = {}
|
| 88 |
+
if can_quantize(weight, quantization_args):
|
| 89 |
+
quantized_weight = quantize(
|
| 90 |
+
x=weight,
|
| 91 |
+
scale=scale,
|
| 92 |
+
zero_point=zero_point,
|
| 93 |
+
g_idx=g_idx,
|
| 94 |
+
args=quantization_args,
|
| 95 |
+
dtype=torch.int8,
|
| 96 |
+
)
|
| 97 |
+
else:
|
| 98 |
+
quantized_weight = weight
|
| 99 |
+
|
| 100 |
+
packed_weight = pack_to_int32(quantized_weight, quantization_args.num_bits)
|
| 101 |
+
weight_shape = torch.tensor(weight.shape)
|
| 102 |
+
if device is not None:
|
| 103 |
+
packed_weight = packed_weight.to(device)
|
| 104 |
+
weight_shape = weight_shape.to(device)
|
| 105 |
+
|
| 106 |
+
compressed_dict["weight_shape"] = weight_shape
|
| 107 |
+
compressed_dict["weight_packed"] = packed_weight
|
| 108 |
+
|
| 109 |
+
return compressed_dict
|
| 110 |
+
|
| 111 |
+
def decompress_weight(
|
| 112 |
+
self,
|
| 113 |
+
compressed_data: Dict[str, Tensor],
|
| 114 |
+
quantization_args: Optional[QuantizationArgs] = None,
|
| 115 |
+
) -> torch.Tensor:
|
| 116 |
+
"""
|
| 117 |
+
Decompresses a single compressed weight
|
| 118 |
+
|
| 119 |
+
:param compressed_data: dictionary of data needed for decompression
|
| 120 |
+
:param quantization_args: quantization parameters for the weight
|
| 121 |
+
:return: tensor of the decompressed weight
|
| 122 |
+
"""
|
| 123 |
+
weight = compressed_data["weight_packed"]
|
| 124 |
+
scale = compressed_data["weight_scale"]
|
| 125 |
+
zero_point = compressed_data.get("weight_zero_point", None)
|
| 126 |
+
g_idx = compressed_data.get("weight_g_idx", None)
|
| 127 |
+
original_shape = torch.Size(compressed_data["weight_shape"])
|
| 128 |
+
num_bits = quantization_args.num_bits
|
| 129 |
+
unpacked = unpack_from_int32(weight, num_bits, original_shape)
|
| 130 |
+
decompressed_weight = dequantize(
|
| 131 |
+
x_q=unpacked, scale=scale, zero_point=zero_point, g_idx=g_idx
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
return decompressed_weight
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
|
| 138 |
+
"""
|
| 139 |
+
Packs a tensor of quantized weights stored in int8 into int32s with padding
|
| 140 |
+
|
| 141 |
+
:param value: tensor to pack
|
| 142 |
+
:param num_bits: number of bits used to store underlying data
|
| 143 |
+
:returns: packed int32 tensor
|
| 144 |
+
"""
|
| 145 |
+
if value.dtype is not torch.int8:
|
| 146 |
+
raise ValueError("Tensor must be quantized to torch.int8 before packing")
|
| 147 |
+
|
| 148 |
+
if num_bits > 8:
|
| 149 |
+
raise ValueError("Packing is only supported for less than 8 bits")
|
| 150 |
+
|
| 151 |
+
# convert to unsigned for packing
|
| 152 |
+
offset = pow(2, num_bits) // 2
|
| 153 |
+
value = (value + offset).to(torch.uint8)
|
| 154 |
+
value = value.cpu().numpy().astype(np.uint32)
|
| 155 |
+
pack_factor = 32 // num_bits
|
| 156 |
+
|
| 157 |
+
# pad input tensor and initialize packed output
|
| 158 |
+
packed_size = math.ceil(value.shape[1] / pack_factor)
|
| 159 |
+
packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
|
| 160 |
+
padding = packed.shape[1] * pack_factor - value.shape[1]
|
| 161 |
+
value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
|
| 162 |
+
|
| 163 |
+
# pack values
|
| 164 |
+
for i in range(pack_factor):
|
| 165 |
+
packed |= value[:, i::pack_factor] << num_bits * i
|
| 166 |
+
|
| 167 |
+
# convert back to signed and torch
|
| 168 |
+
packed = np.ascontiguousarray(packed).view(np.int32)
|
| 169 |
+
return torch.from_numpy(packed)
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def unpack_from_int32(
|
| 173 |
+
value: torch.Tensor, num_bits: int, shape: torch.Size
|
| 174 |
+
) -> torch.Tensor:
|
| 175 |
+
"""
|
| 176 |
+
Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
|
| 177 |
+
original their bit range
|
| 178 |
+
|
| 179 |
+
:param value: tensor to upack
|
| 180 |
+
:param num_bits: number of bits to unpack each data point into
|
| 181 |
+
:param shape: shape to unpack into, used to remove padding
|
| 182 |
+
:returns: unpacked int8 tensor
|
| 183 |
+
"""
|
| 184 |
+
if value.dtype is not torch.int32:
|
| 185 |
+
raise ValueError(
|
| 186 |
+
f"Expected {torch.int32} but got {value.dtype}, Aborting unpack."
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
if num_bits > 8:
|
| 190 |
+
raise ValueError("Unpacking is only supported for less than 8 bits")
|
| 191 |
+
|
| 192 |
+
pack_factor = 32 // num_bits
|
| 193 |
+
|
| 194 |
+
# unpack
|
| 195 |
+
mask = pow(2, num_bits) - 1
|
| 196 |
+
unpacked = torch.zeros(
|
| 197 |
+
(value.shape[0], value.shape[1] * pack_factor),
|
| 198 |
+
device=value.device,
|
| 199 |
+
dtype=torch.int32,
|
| 200 |
+
)
|
| 201 |
+
for i in range(pack_factor):
|
| 202 |
+
unpacked[:, i::pack_factor] = (value >> (num_bits * i)) & mask
|
| 203 |
+
|
| 204 |
+
# remove padding
|
| 205 |
+
original_row_size = int(shape[1])
|
| 206 |
+
unpacked = unpacked[:, :original_row_size]
|
| 207 |
+
|
| 208 |
+
# bits are packed in unsigned format, reformat to signed
|
| 209 |
+
# update the value range from unsigned to signed
|
| 210 |
+
offset = pow(2, num_bits) // 2
|
| 211 |
+
unpacked = (unpacked - offset).to(torch.int8)
|
| 212 |
+
|
| 213 |
+
return unpacked
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__init__.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
# flake8: noqa
|
| 15 |
+
|
| 16 |
+
from .base import *
|
| 17 |
+
from .dense import *
|
| 18 |
+
from .sparse_24_bitmask import *
|
| 19 |
+
from .sparse_bitmask import *
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (355 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/base.cpython-311.pyc
ADDED
|
Binary file (7.39 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/dense.cpython-311.pyc
ADDED
|
Binary file (1.67 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_24_bitmask.cpython-311.pyc
ADDED
|
Binary file (11.8 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_bitmask.cpython-311.pyc
ADDED
|
Binary file (7.87 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/base.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import logging
|
| 16 |
+
from typing import Dict, Generator, Optional, Set, Tuple
|
| 17 |
+
|
| 18 |
+
from compressed_tensors.compressors.base import BaseCompressor
|
| 19 |
+
from compressed_tensors.utils import get_nested_weight_mappings, merge_names
|
| 20 |
+
from safetensors import safe_open
|
| 21 |
+
from torch import Tensor
|
| 22 |
+
from tqdm import tqdm
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
__all__ = ["BaseSparseCompressor"]
|
| 26 |
+
|
| 27 |
+
_LOGGER: logging.Logger = logging.getLogger(__name__)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class BaseSparseCompressor(BaseCompressor):
|
| 31 |
+
"""
|
| 32 |
+
Base class representing a sparse compression algorithm. Each child class should
|
| 33 |
+
implement compression_param_info, compress_weight and decompress_weight; child
|
| 34 |
+
classes should also define COMPRESSION_PARAM_NAMES.
|
| 35 |
+
|
| 36 |
+
Compressors support compressing/decompressing a full module state dict or a single
|
| 37 |
+
quantized PyTorch leaf module.
|
| 38 |
+
|
| 39 |
+
Model Load Lifecycle (run_compressed=False):
|
| 40 |
+
- ModelCompressor.decompress()
|
| 41 |
+
- apply_quantization_config()
|
| 42 |
+
- BaseSparseCompressor.decompress()
|
| 43 |
+
- BaseSparseCompressor.decompress_weight()
|
| 44 |
+
|
| 45 |
+
Model Save Lifecycle:
|
| 46 |
+
- ModelCompressor.compress()
|
| 47 |
+
- BaseSparseCompressor.compress()
|
| 48 |
+
- BaseSparseCompressor.compress_weight()
|
| 49 |
+
|
| 50 |
+
Module Lifecycle (run_compressed=True):
|
| 51 |
+
- apply_quantization_config()
|
| 52 |
+
- compressed_module = CompressedLinear(module)
|
| 53 |
+
- initialize_module_for_quantization()
|
| 54 |
+
- BaseSparseCompressor.compression_param_info()
|
| 55 |
+
- register_parameters()
|
| 56 |
+
- compressed_module.forward()
|
| 57 |
+
- compressed_module.decompress()
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
:param config: config specifying compression parameters
|
| 61 |
+
"""
|
| 62 |
+
|
| 63 |
+
def compress(
|
| 64 |
+
self,
|
| 65 |
+
model_state: Dict[str, Tensor],
|
| 66 |
+
compression_targets: Optional[Set[str]] = None,
|
| 67 |
+
) -> Dict[str, Tensor]:
|
| 68 |
+
"""
|
| 69 |
+
Compresses a dense state dict using bitmask compression
|
| 70 |
+
|
| 71 |
+
:param model_state: state dict of uncompressed model
|
| 72 |
+
:param compression_targets: optional set of layer prefixes to compress,
|
| 73 |
+
otherwise compress all layers (for backwards compatibility)
|
| 74 |
+
:return: compressed state dict
|
| 75 |
+
"""
|
| 76 |
+
compressed_dict = {}
|
| 77 |
+
_LOGGER.debug(
|
| 78 |
+
f"Compressing model with {len(model_state)} parameterized layers..."
|
| 79 |
+
)
|
| 80 |
+
for name, value in tqdm(model_state.items(), desc="Compressing model"):
|
| 81 |
+
if not self.should_compress(name, compression_targets):
|
| 82 |
+
compressed_dict[name] = value
|
| 83 |
+
continue
|
| 84 |
+
prefix = name
|
| 85 |
+
if prefix.endswith(".weight"):
|
| 86 |
+
prefix = prefix[: -(len(".weight"))]
|
| 87 |
+
|
| 88 |
+
compression_data = self.compress_weight(prefix, value)
|
| 89 |
+
for key in compression_data.keys():
|
| 90 |
+
if key in compressed_dict:
|
| 91 |
+
_LOGGER.warn(
|
| 92 |
+
f"Expected all compressed state_dict keys to be unique, but "
|
| 93 |
+
f"found an existing entry for {key}. The existing entry will "
|
| 94 |
+
"be replaced."
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
compressed_dict.update(compression_data)
|
| 98 |
+
|
| 99 |
+
return compressed_dict
|
| 100 |
+
|
| 101 |
+
def decompress(
|
| 102 |
+
self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
|
| 103 |
+
) -> Generator[Tuple[str, Tensor], None, None]:
|
| 104 |
+
"""
|
| 105 |
+
Reads a bitmask compressed state dict located
|
| 106 |
+
at path_to_model_or_tensors and returns a generator
|
| 107 |
+
for sequentially decompressing back to a dense state dict
|
| 108 |
+
|
| 109 |
+
:param model_path: path to compressed safetensors model (directory with
|
| 110 |
+
one or more safetensors files) or compressed tensors file
|
| 111 |
+
:param device: device to load decompressed weights onto
|
| 112 |
+
:return: iterator for generating decompressed weights
|
| 113 |
+
"""
|
| 114 |
+
weight_mappings, ignored_params = get_nested_weight_mappings(
|
| 115 |
+
path_to_model_or_tensors,
|
| 116 |
+
self.COMPRESSION_PARAM_NAMES,
|
| 117 |
+
return_unmatched_params=True,
|
| 118 |
+
)
|
| 119 |
+
for weight_name in weight_mappings.keys():
|
| 120 |
+
weight_data = {}
|
| 121 |
+
for param_name, safe_path in weight_mappings[weight_name].items():
|
| 122 |
+
full_name = merge_names(weight_name, param_name)
|
| 123 |
+
with safe_open(safe_path, framework="pt", device=device) as f:
|
| 124 |
+
weight_data[param_name] = f.get_tensor(full_name)
|
| 125 |
+
decompressed = self.decompress_weight(weight_data)
|
| 126 |
+
yield merge_names(weight_name, "weight"), decompressed
|
| 127 |
+
|
| 128 |
+
for ignored_param_name, safe_path in ignored_params.items():
|
| 129 |
+
with safe_open(safe_path, framework="pt", device=device) as f:
|
| 130 |
+
value = f.get_tensor(ignored_param_name)
|
| 131 |
+
yield ignored_param_name, value
|
| 132 |
+
|
| 133 |
+
@staticmethod
|
| 134 |
+
def should_compress(name: str, expanded_targets: Optional[Set[str]] = None) -> bool:
|
| 135 |
+
"""
|
| 136 |
+
Check if a parameter should be compressed.
|
| 137 |
+
Currently, this only returns True for weight parameters.
|
| 138 |
+
|
| 139 |
+
:param name: name of the parameter
|
| 140 |
+
:param expanded_targets: set of layer prefixes to compress
|
| 141 |
+
:return: whether or not the parameter should be compressed
|
| 142 |
+
"""
|
| 143 |
+
if expanded_targets is None:
|
| 144 |
+
return name.endswith(".weight")
|
| 145 |
+
|
| 146 |
+
return (
|
| 147 |
+
name.endswith(".weight") and name[: -(len(".weight"))] in expanded_targets
|
| 148 |
+
)
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/dense.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
from typing import Dict, Generator, Tuple
|
| 16 |
+
|
| 17 |
+
from compressed_tensors.compressors.base import BaseCompressor
|
| 18 |
+
from compressed_tensors.config import CompressionFormat
|
| 19 |
+
from torch import Tensor
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@BaseCompressor.register(name=CompressionFormat.dense.value)
|
| 23 |
+
class DenseCompressor(BaseCompressor):
|
| 24 |
+
"""
|
| 25 |
+
Identity compressor for dense models, returns the original state_dict
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
def compress(self, model_state: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:
|
| 29 |
+
return model_state
|
| 30 |
+
|
| 31 |
+
def decompress(
|
| 32 |
+
self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
|
| 33 |
+
) -> Generator[Tuple[str, Tensor], None, None]:
|
| 34 |
+
return iter([])
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
from dataclasses import dataclass
|
| 16 |
+
from typing import Dict, List, Tuple, Union
|
| 17 |
+
|
| 18 |
+
import torch
|
| 19 |
+
from compressed_tensors.compressors.base import BaseCompressor
|
| 20 |
+
from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor
|
| 21 |
+
from compressed_tensors.config import CompressionFormat, SparsityStructure
|
| 22 |
+
from compressed_tensors.quantization import FP8_DTYPE
|
| 23 |
+
from compressed_tensors.utils import merge_names, pack_bitmasks, unpack_bitmasks
|
| 24 |
+
from torch import Tensor
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
__all__ = [
|
| 28 |
+
"Sparse24BitMaskCompressor",
|
| 29 |
+
"Sparse24BitMaskTensor",
|
| 30 |
+
"sparse24_bitmask_compress",
|
| 31 |
+
"sparse24_bitmask_decompress",
|
| 32 |
+
"get_24_bytemasks",
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@BaseCompressor.register(name=CompressionFormat.sparse_24_bitmask.value)
|
| 37 |
+
class Sparse24BitMaskCompressor(BaseSparseCompressor):
|
| 38 |
+
"""
|
| 39 |
+
Compression for sparse models using bitmasks. Non-zero weights are stored in a 2d
|
| 40 |
+
values tensor, with their locations stored in a 2d bitmask
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
COMPRESSION_PARAM_NAMES = [
|
| 44 |
+
"shape",
|
| 45 |
+
"compressed",
|
| 46 |
+
"bitmask",
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
def compress_weight(self, name, value):
|
| 50 |
+
bitmask_tensor = Sparse24BitMaskTensor.from_dense(
|
| 51 |
+
value, self.config.sparsity_structure
|
| 52 |
+
)
|
| 53 |
+
bitmask_dict = bitmask_tensor.dict(name_prefix=name, device="cpu")
|
| 54 |
+
return bitmask_dict
|
| 55 |
+
|
| 56 |
+
def decompress_weight(self, weight_data):
|
| 57 |
+
data = Sparse24BitMaskTensor.from_compressed_data(**weight_data)
|
| 58 |
+
decompressed = data.decompress()
|
| 59 |
+
return decompressed
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
@dataclass
|
| 63 |
+
class Sparse24BitMaskTensor:
|
| 64 |
+
"""
|
| 65 |
+
Owns compressions and decompression for a single 2:4 sparse
|
| 66 |
+
bitmask compressed tensor.
|
| 67 |
+
|
| 68 |
+
:param shape: shape of dense tensor
|
| 69 |
+
:param compressed: 2d tensor of non-zero values
|
| 70 |
+
:param bitmask: 2d bitmask of non-zero values
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
+
shape: List[int]
|
| 74 |
+
compressed: Tensor
|
| 75 |
+
bitmask: Tensor
|
| 76 |
+
|
| 77 |
+
@staticmethod
|
| 78 |
+
def from_dense(
|
| 79 |
+
tensor: Tensor,
|
| 80 |
+
sparsity_structure: Union[SparsityStructure, str] = SparsityStructure.TWO_FOUR,
|
| 81 |
+
) -> "Sparse24BitMaskTensor":
|
| 82 |
+
"""
|
| 83 |
+
:param tensor: dense tensor to compress
|
| 84 |
+
:return: instantiated compressed tensor
|
| 85 |
+
"""
|
| 86 |
+
shape = list(tensor.shape)
|
| 87 |
+
compressed, bitmask = sparse24_bitmask_compress(
|
| 88 |
+
tensor.cpu(), sparsity_structure=sparsity_structure
|
| 89 |
+
)
|
| 90 |
+
return Sparse24BitMaskTensor(
|
| 91 |
+
shape=shape,
|
| 92 |
+
compressed=compressed,
|
| 93 |
+
bitmask=bitmask,
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
@staticmethod
|
| 97 |
+
def from_compressed_data(
|
| 98 |
+
shape: Union[List[int], Tensor], compressed: Tensor, bitmask: Tensor
|
| 99 |
+
) -> "Sparse24BitMaskTensor":
|
| 100 |
+
"""
|
| 101 |
+
:param shape: shape of the dense tensor (can be a list or a tensor)
|
| 102 |
+
:param compressed: 2d tensor of non-zero values
|
| 103 |
+
:param bitmask: 2d bitmask of non-zero values
|
| 104 |
+
:return: instantiated Sparse24BitMaskTensor
|
| 105 |
+
"""
|
| 106 |
+
if isinstance(shape, list):
|
| 107 |
+
shape = torch.tensor(shape)
|
| 108 |
+
if isinstance(shape, torch.Tensor):
|
| 109 |
+
shape = shape.flatten().tolist()
|
| 110 |
+
return Sparse24BitMaskTensor(
|
| 111 |
+
shape=shape, compressed=compressed, bitmask=bitmask
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
def decompress(self) -> Tensor:
|
| 115 |
+
"""
|
| 116 |
+
:return: reconstructed dense tensor
|
| 117 |
+
"""
|
| 118 |
+
return sparse24_bitmask_decompress(self.compressed, self.bitmask, self.shape)
|
| 119 |
+
|
| 120 |
+
def curr_memory_size_bytes(self) -> int:
|
| 121 |
+
"""
|
| 122 |
+
:return: size in bytes required to store compressed tensor on disk
|
| 123 |
+
"""
|
| 124 |
+
|
| 125 |
+
def sizeof_tensor(a: Tensor) -> int:
|
| 126 |
+
return a.element_size() * a.nelement()
|
| 127 |
+
|
| 128 |
+
return sizeof_tensor(self.compressed) + sizeof_tensor(self.bitmask)
|
| 129 |
+
|
| 130 |
+
def dict(self, name_prefix: str, device: str = "cpu") -> Dict[str, Tensor]:
|
| 131 |
+
"""
|
| 132 |
+
:param name_prefix: name of original tensor to store compressed weight as
|
| 133 |
+
:return: dict of compressed data for the stored weight
|
| 134 |
+
"""
|
| 135 |
+
if name_prefix.endswith(".weight"):
|
| 136 |
+
name_prefix = name_prefix[: -len(".weight")]
|
| 137 |
+
return {
|
| 138 |
+
merge_names(name_prefix, "shape"): torch.tensor(
|
| 139 |
+
self.shape, device=device
|
| 140 |
+
).reshape(-1, 1),
|
| 141 |
+
merge_names(name_prefix, "compressed"): self.compressed.to(device),
|
| 142 |
+
merge_names(name_prefix, "bitmask"): self.bitmask.to(device),
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
def __repr__(self) -> str:
|
| 146 |
+
return f"BitMaskTensor(shape={self.shape}, compressed=True)"
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def sparse24_bitmask_compress(
|
| 150 |
+
tensor: Tensor,
|
| 151 |
+
sparsity_structure: Union[SparsityStructure, str] = SparsityStructure.TWO_FOUR,
|
| 152 |
+
) -> Tuple[Tensor, Tensor, Tensor]:
|
| 153 |
+
"""
|
| 154 |
+
Compresses a dense tensor using bitmask compression
|
| 155 |
+
|
| 156 |
+
:param tensor: dense 2D tensor to compress
|
| 157 |
+
:param sparsity_structure: structure of sparsity in the tensor, defaults
|
| 158 |
+
to unstructured, can also be set to `2:4`
|
| 159 |
+
:return: tuple of compressed data representing tensor
|
| 160 |
+
"""
|
| 161 |
+
assert len(tensor.shape) == 2, "Only 2D tensors are supported"
|
| 162 |
+
assert (
|
| 163 |
+
SparsityStructure(sparsity_structure) == SparsityStructure.TWO_FOUR
|
| 164 |
+
), "Only 2:4 sparsity is supported"
|
| 165 |
+
|
| 166 |
+
bytemasks = get_24_bytemasks(tensor=tensor)
|
| 167 |
+
|
| 168 |
+
if tensor.dtype == FP8_DTYPE:
|
| 169 |
+
# acces raw bytes of the tensor
|
| 170 |
+
tensor_view = tensor.view(torch.int8)
|
| 171 |
+
values = tensor_view[bytemasks]
|
| 172 |
+
values = values.view(FP8_DTYPE)
|
| 173 |
+
else:
|
| 174 |
+
values = tensor[bytemasks]
|
| 175 |
+
|
| 176 |
+
num_rows, num_cols = tensor.shape
|
| 177 |
+
compressed_values = values.reshape(num_rows, num_cols // 2)
|
| 178 |
+
bitmasks_packed = pack_bitmasks(bytemasks)
|
| 179 |
+
return compressed_values, bitmasks_packed
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
def sparse24_bitmask_decompress(
|
| 183 |
+
values: Tensor, bitmasks: Tensor, original_shape: torch.Size
|
| 184 |
+
) -> Tensor:
|
| 185 |
+
"""
|
| 186 |
+
Reconstructs a dense tensor from a compressed one
|
| 187 |
+
|
| 188 |
+
:param values: 1d tensor of non-zero values
|
| 189 |
+
:param bitmasks: 2d int8 tensor flagging locations of non-zero values in the
|
| 190 |
+
tensors original shape
|
| 191 |
+
:param original_shape: shape of the dense tensor
|
| 192 |
+
:return: decompressed dense tensor
|
| 193 |
+
"""
|
| 194 |
+
bytemasks_unpacked = unpack_bitmasks(bitmasks, original_shape)
|
| 195 |
+
|
| 196 |
+
decompressed_tensor = torch.zeros(original_shape, dtype=values.dtype)
|
| 197 |
+
decompressed_tensor = decompressed_tensor.to(values.device)
|
| 198 |
+
values = values.flatten()
|
| 199 |
+
if decompressed_tensor.dtype == FP8_DTYPE:
|
| 200 |
+
decompressed_tensor[bytemasks_unpacked] = values
|
| 201 |
+
decompressed_tensor = decompressed_tensor.cuda()
|
| 202 |
+
else:
|
| 203 |
+
decompressed_tensor[bytemasks_unpacked] = values
|
| 204 |
+
return decompressed_tensor
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def get_24_bytemasks(tensor):
|
| 208 |
+
"""
|
| 209 |
+
Generate a 2:4 sparsity mask for the given tensor.
|
| 210 |
+
|
| 211 |
+
This function creates a mask where exactly 2 out of every 4 elements are
|
| 212 |
+
preserved based on their magnitudes. The preserved elements are the ones
|
| 213 |
+
with the highest absolute values in each group of 4 elements.
|
| 214 |
+
|
| 215 |
+
:param tensor: The input tensor for which the 2:4 sparsity mask is to be created.
|
| 216 |
+
The tensor can be of any shape but its total number of elements
|
| 217 |
+
must be a multiple of 4.
|
| 218 |
+
:return: A boolean tensor of the same shape as the input tensor, where `True`
|
| 219 |
+
indicates the preserved elements and `False` indicates the pruned elements.
|
| 220 |
+
:raises ValueError: If the total number of elements in the tensor is not a
|
| 221 |
+
multiple of 4.
|
| 222 |
+
"""
|
| 223 |
+
original_dtype = tensor.dtype
|
| 224 |
+
if tensor.dtype == FP8_DTYPE:
|
| 225 |
+
tensor = tensor.view(torch.int8)
|
| 226 |
+
original_shape = tensor.shape
|
| 227 |
+
num_elements = tensor.numel()
|
| 228 |
+
|
| 229 |
+
if num_elements % 4 != 0:
|
| 230 |
+
raise ValueError("Tensor size must be a multiple of 4 for TWO_FOUR sparsity")
|
| 231 |
+
|
| 232 |
+
reshaped_tensor = tensor.view(-1, 4)
|
| 233 |
+
abs_tensor = reshaped_tensor.abs()
|
| 234 |
+
topk_indices = abs_tensor.topk(2, dim=1).indices
|
| 235 |
+
mask = torch.zeros_like(reshaped_tensor, dtype=torch.bool)
|
| 236 |
+
mask.scatter_(1, topk_indices, True)
|
| 237 |
+
mask = mask.view(original_shape)
|
| 238 |
+
tensor = tensor.view(original_dtype)
|
| 239 |
+
|
| 240 |
+
return mask
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
from typing import Dict, List, Tuple, Union
|
| 16 |
+
|
| 17 |
+
import torch
|
| 18 |
+
from compressed_tensors.compressors.base import BaseCompressor
|
| 19 |
+
from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor
|
| 20 |
+
from compressed_tensors.config import CompressionFormat
|
| 21 |
+
from compressed_tensors.quantization import FP8_DTYPE
|
| 22 |
+
from compressed_tensors.utils import merge_names, pack_bitmasks, unpack_bitmasks
|
| 23 |
+
from torch import Tensor
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
__all__ = [
|
| 27 |
+
"BitmaskCompressor",
|
| 28 |
+
"BitmaskTensor",
|
| 29 |
+
"bitmask_compress",
|
| 30 |
+
"bitmask_decompress",
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@BaseCompressor.register(name=CompressionFormat.sparse_bitmask.value)
|
| 35 |
+
class BitmaskCompressor(BaseSparseCompressor):
|
| 36 |
+
"""
|
| 37 |
+
Compression for sparse models using bitmasks. Non-zero weights are stored in a 1d
|
| 38 |
+
values tensor, with their locations stored in a 2d bitmask
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
COMPRESSION_PARAM_NAMES = ["shape", "compressed", "bitmask", "row_offsets"]
|
| 42 |
+
|
| 43 |
+
def compress_weight(self, name, value):
|
| 44 |
+
bitmask_tensor = BitmaskTensor.from_dense(value)
|
| 45 |
+
bitmask_dict = bitmask_tensor.dict(name_prefix=name, device="cpu")
|
| 46 |
+
return bitmask_dict
|
| 47 |
+
|
| 48 |
+
def decompress_weight(self, weight_data):
|
| 49 |
+
data = BitmaskTensor(**weight_data)
|
| 50 |
+
decompressed = data.decompress()
|
| 51 |
+
return decompressed
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class BitmaskTensor:
|
| 55 |
+
"""
|
| 56 |
+
Owns compressions and decompression for a single bitmask compressed tensor.
|
| 57 |
+
Adapted from: https://github.com/mgoin/torch_bitmask/tree/main
|
| 58 |
+
|
| 59 |
+
:param shape: shape of dense tensor
|
| 60 |
+
:compressed: flat tensor of non-zero values
|
| 61 |
+
:bitmask: 2d bitmask of non-zero values
|
| 62 |
+
:row_offsets: flat tensor indicating what index in values each dense row starts at
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
def __init__(
|
| 66 |
+
self,
|
| 67 |
+
shape: Union[torch.Size, List],
|
| 68 |
+
compressed: Tensor,
|
| 69 |
+
bitmask: Tensor,
|
| 70 |
+
row_offsets: Tensor,
|
| 71 |
+
):
|
| 72 |
+
self.shape = list(shape)
|
| 73 |
+
self.compressed = compressed
|
| 74 |
+
self.bitmask = bitmask
|
| 75 |
+
self.row_offsets = row_offsets
|
| 76 |
+
|
| 77 |
+
@staticmethod
|
| 78 |
+
def from_dense(tensor: Tensor) -> "BitmaskTensor":
|
| 79 |
+
"""
|
| 80 |
+
:param tensor: dense tensor to compress
|
| 81 |
+
:return: instantiated compressed tensor
|
| 82 |
+
"""
|
| 83 |
+
shape = tensor.shape
|
| 84 |
+
compressed, bitmask, row_offsets = bitmask_compress(tensor.cpu())
|
| 85 |
+
return BitmaskTensor(
|
| 86 |
+
shape=shape, compressed=compressed, bitmask=bitmask, row_offsets=row_offsets
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
def decompress(self) -> Tensor:
|
| 90 |
+
"""
|
| 91 |
+
:return: reconstructed dense tensor
|
| 92 |
+
"""
|
| 93 |
+
return bitmask_decompress(self.compressed, self.bitmask, self.shape)
|
| 94 |
+
|
| 95 |
+
def curr_memory_size_bytes(self):
|
| 96 |
+
"""
|
| 97 |
+
:return: size in bytes required to store compressed tensor on disk
|
| 98 |
+
"""
|
| 99 |
+
|
| 100 |
+
def sizeof_tensor(a):
|
| 101 |
+
return a.element_size() * a.nelement()
|
| 102 |
+
|
| 103 |
+
return (
|
| 104 |
+
sizeof_tensor(self.compressed)
|
| 105 |
+
+ sizeof_tensor(self.bitmask)
|
| 106 |
+
+ sizeof_tensor(self.row_offsets)
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
def dict(self, name_prefix: str, device: str = "cpu") -> Dict[str, Tensor]:
|
| 110 |
+
"""
|
| 111 |
+
:name_prefix: name of original tensor to store compressed weight as
|
| 112 |
+
:return: dict of compressed data for the stored weight
|
| 113 |
+
"""
|
| 114 |
+
return {
|
| 115 |
+
merge_names(name_prefix, "shape"): torch.tensor(self.shape, device=device),
|
| 116 |
+
merge_names(name_prefix, "compressed"): self.compressed.to(device),
|
| 117 |
+
merge_names(name_prefix, "bitmask"): self.bitmask.to(device),
|
| 118 |
+
merge_names(name_prefix, "row_offsets"): self.row_offsets.to(device),
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
def __repr__(self):
|
| 122 |
+
return f"BitmaskTensor(shape={self.shape}, compressed=True)"
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def bitmask_compress(tensor: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
|
| 126 |
+
"""
|
| 127 |
+
Compresses a dense tensor using bitmask compression
|
| 128 |
+
|
| 129 |
+
:param tensor: dense tensor to compress
|
| 130 |
+
:return: tuple of compressed data representing tensor
|
| 131 |
+
"""
|
| 132 |
+
bytemasks = tensor != 0
|
| 133 |
+
row_counts = bytemasks.sum(dim=-1)
|
| 134 |
+
row_offsets = torch.cumsum(row_counts, 0) - row_counts
|
| 135 |
+
if tensor.dtype == FP8_DTYPE:
|
| 136 |
+
# acces raw bytes of the tensor
|
| 137 |
+
tensor_view = tensor.view(torch.int8)
|
| 138 |
+
values = tensor_view[bytemasks]
|
| 139 |
+
values = values.view(FP8_DTYPE)
|
| 140 |
+
else:
|
| 141 |
+
values = tensor[bytemasks]
|
| 142 |
+
bitmasks_packed = pack_bitmasks(bytemasks)
|
| 143 |
+
return values, bitmasks_packed, row_offsets
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def bitmask_decompress(
|
| 147 |
+
values: Tensor, bitmasks: Tensor, original_shape: torch.Size
|
| 148 |
+
) -> Tensor:
|
| 149 |
+
"""
|
| 150 |
+
Reconstructs a dense tensor from a compressed one
|
| 151 |
+
|
| 152 |
+
:param values: 1d tensor of non-zero values
|
| 153 |
+
:param bitmasks: 2d int8 tensor flagging locations of non-zero values in the
|
| 154 |
+
tensors original shape
|
| 155 |
+
:param original_shape: shape of the dense tensor
|
| 156 |
+
:return: decompressed dense tensor
|
| 157 |
+
"""
|
| 158 |
+
bytemasks_unpacked = unpack_bitmasks(bitmasks, original_shape)
|
| 159 |
+
|
| 160 |
+
decompressed_tensor = torch.zeros(original_shape, dtype=values.dtype)
|
| 161 |
+
decompressed_tensor[bytemasks_unpacked] = values
|
| 162 |
+
|
| 163 |
+
return decompressed_tensor
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
# flake8: noqa
|
| 15 |
+
|
| 16 |
+
from .marlin_24 import Marlin24Compressor
|
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
import logging
|
| 16 |
+
from typing import Dict, Generator, Tuple
|
| 17 |
+
|
| 18 |
+
import numpy as np
|
| 19 |
+
import torch
|
| 20 |
+
from compressed_tensors.compressors.base import BaseCompressor
|
| 21 |
+
from compressed_tensors.config import CompressionFormat
|
| 22 |
+
from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
|
| 23 |
+
from compressed_tensors.quantization.lifecycle.forward import quantize
|
| 24 |
+
from compressed_tensors.utils import (
|
| 25 |
+
get_permutations_24,
|
| 26 |
+
is_quantization_param,
|
| 27 |
+
merge_names,
|
| 28 |
+
sparse_semi_structured_from_dense_cutlass,
|
| 29 |
+
tensor_follows_mask_structure,
|
| 30 |
+
)
|
| 31 |
+
from torch import Tensor
|
| 32 |
+
from tqdm import tqdm
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
_LOGGER: logging.Logger = logging.getLogger(__name__)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@BaseCompressor.register(name=CompressionFormat.marlin_24.value)
|
| 39 |
+
class Marlin24Compressor(BaseCompressor):
|
| 40 |
+
"""
|
| 41 |
+
Compresses a quantized model with 2:4 sparsity structure for inference with the
|
| 42 |
+
Marlin24 kernel. Decompression is not implemented for this compressor.
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
COMPRESSION_PARAM_NAMES = ["weight_packed", "scale_packed", "meta"]
|
| 46 |
+
|
| 47 |
+
@staticmethod
|
| 48 |
+
def validate_quant_compatability(
|
| 49 |
+
model_quant_args: Dict[str, QuantizationArgs]
|
| 50 |
+
) -> bool:
|
| 51 |
+
"""
|
| 52 |
+
Checks if every quantized module in the model is compatible with Marlin24
|
| 53 |
+
compression. Quantization must be channel or group strategy with group_size
|
| 54 |
+
of 128. Only symmetric quantization is supported
|
| 55 |
+
|
| 56 |
+
:param model_quant_args: dictionary of mapping module names to their
|
| 57 |
+
quantization configuration
|
| 58 |
+
:return: True if all modules are compatible with Marlin24 compression, raises
|
| 59 |
+
a ValueError otherwise
|
| 60 |
+
"""
|
| 61 |
+
for name, quant_args in model_quant_args.items():
|
| 62 |
+
strategy = quant_args.strategy
|
| 63 |
+
group_size = quant_args.group_size
|
| 64 |
+
symmetric = quant_args.symmetric
|
| 65 |
+
if (
|
| 66 |
+
strategy is not QuantizationStrategy.GROUP.value
|
| 67 |
+
and strategy is not QuantizationStrategy.CHANNEL.value
|
| 68 |
+
):
|
| 69 |
+
raise ValueError(
|
| 70 |
+
f"Marlin24 Compressor is only valid for group and channel "
|
| 71 |
+
f"quantization strategies, got {strategy} in {name}"
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
if group_size is not None and group_size != 128:
|
| 75 |
+
raise ValueError(
|
| 76 |
+
f"Marlin24 Compressor is only valid for group size 128, "
|
| 77 |
+
f"got {group_size} in {name}"
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
if not symmetric:
|
| 81 |
+
raise ValueError(
|
| 82 |
+
f"Marlin24 Compressor is only valid for symmetric quantzation, "
|
| 83 |
+
f"got symmetric={symmetric} in {name}"
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
return True
|
| 87 |
+
|
| 88 |
+
@staticmethod
|
| 89 |
+
def validate_sparsity_structure(name: str, weight: Tensor) -> bool:
|
| 90 |
+
"""
|
| 91 |
+
Checks if a tensor fits the required 2:4 sparsity structure
|
| 92 |
+
|
| 93 |
+
:param name: name of the tensor to check
|
| 94 |
+
:param weight: tensor to check for sparsity structure
|
| 95 |
+
:return: True if all rows match the 2:4 sparsity structure, raises
|
| 96 |
+
ValueError otherwise
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
if not tensor_follows_mask_structure(weight):
|
| 100 |
+
raise ValueError(
|
| 101 |
+
"Marlin24 Compressor is only compatible with weights that have "
|
| 102 |
+
f"a 2:4 sparsity structure. Found segments in {name} "
|
| 103 |
+
"that do not match the expected structure."
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
return True
|
| 107 |
+
|
| 108 |
+
def compress(
|
| 109 |
+
self,
|
| 110 |
+
model_state: Dict[str, Tensor],
|
| 111 |
+
names_to_scheme: Dict[str, QuantizationArgs],
|
| 112 |
+
**kwargs,
|
| 113 |
+
) -> Dict[str, Tensor]:
|
| 114 |
+
"""
|
| 115 |
+
Compresses a quantized state_dict with 2:4 sparsity structure for inference
|
| 116 |
+
with the Marlin24 kernel
|
| 117 |
+
|
| 118 |
+
:param model_state: state dict of uncompressed model
|
| 119 |
+
:param names_to_scheme: quantization args for each quantized weight, needed for
|
| 120 |
+
quantize function to calculate bit depth
|
| 121 |
+
:return: compressed state dict
|
| 122 |
+
"""
|
| 123 |
+
self.validate_quant_compatability(names_to_scheme)
|
| 124 |
+
|
| 125 |
+
compressed_dict = {}
|
| 126 |
+
weight_suffix = ".weight"
|
| 127 |
+
_LOGGER.debug(
|
| 128 |
+
f"Compressing model with {len(model_state)} parameterized layers..."
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
for name, value in tqdm(model_state.items(), desc="Compressing model"):
|
| 132 |
+
if name.endswith(weight_suffix):
|
| 133 |
+
prefix = name[: -(len(weight_suffix))]
|
| 134 |
+
scale = model_state.get(merge_names(prefix, "weight_scale"), None)
|
| 135 |
+
zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
|
| 136 |
+
if scale is not None: # weight is quantized, compress it
|
| 137 |
+
|
| 138 |
+
# Marlin24 kernel requires float16 inputs
|
| 139 |
+
scale = scale.to(torch.float16)
|
| 140 |
+
value = value.to(torch.float16)
|
| 141 |
+
|
| 142 |
+
# quantize weight, keeping it as a float16 for now
|
| 143 |
+
quant_args = names_to_scheme[prefix]
|
| 144 |
+
value = quantize(
|
| 145 |
+
x=value, scale=scale, zero_point=zp, args=quant_args
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# compress based on sparsity structure
|
| 149 |
+
self.validate_sparsity_structure(prefix, value)
|
| 150 |
+
value, meta = compress_weight_24(value)
|
| 151 |
+
meta = meta.cpu()
|
| 152 |
+
|
| 153 |
+
# Marlin24 kernel expects input dim first
|
| 154 |
+
value = value.t().contiguous().cpu()
|
| 155 |
+
scale = scale.t().contiguous().cpu()
|
| 156 |
+
og_weight_shape = value.shape
|
| 157 |
+
|
| 158 |
+
# Marlin24 kernel expects unsigned values, shift zero-point
|
| 159 |
+
value += (1 << quant_args.num_bits) // 2
|
| 160 |
+
|
| 161 |
+
# pack quantized weight and scale
|
| 162 |
+
value = pack_weight_24(value, quant_args)
|
| 163 |
+
packed_scale = pack_scales_24(scale, quant_args, og_weight_shape)
|
| 164 |
+
meta = meta.resize_(meta.shape[1] // 2, meta.shape[0] * 2)
|
| 165 |
+
|
| 166 |
+
# save compressed values
|
| 167 |
+
compressed_dict[merge_names(prefix, "scale_packed")] = packed_scale
|
| 168 |
+
compressed_dict[merge_names(prefix, "weight_packed")] = value
|
| 169 |
+
compressed_dict[merge_names(prefix, "meta")] = meta
|
| 170 |
+
continue
|
| 171 |
+
|
| 172 |
+
if not is_quantization_param(name):
|
| 173 |
+
# export unquantized parameters without modifying
|
| 174 |
+
compressed_dict[name] = value.to("cpu")
|
| 175 |
+
|
| 176 |
+
return compressed_dict
|
| 177 |
+
|
| 178 |
+
def decompress(
|
| 179 |
+
self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
|
| 180 |
+
) -> Generator[Tuple[str, Tensor], None, None]:
|
| 181 |
+
raise NotImplementedError(
|
| 182 |
+
"Decompression is not implemented for the Marlin24 Compressor."
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def compress_weight_24(weight: Tensor):
|
| 187 |
+
weight = weight.contiguous()
|
| 188 |
+
w_comp, meta = sparse_semi_structured_from_dense_cutlass(weight)
|
| 189 |
+
w_comp = w_comp.contiguous()
|
| 190 |
+
return w_comp, meta
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def marlin_permute_weights(q_w, size_k, size_n, perm, tile):
|
| 194 |
+
assert q_w.shape == (size_k, size_n)
|
| 195 |
+
assert size_k % tile == 0, f"size_k = {size_k}, tile = {tile}"
|
| 196 |
+
assert size_n % tile == 0, f"size_k = {size_n}, tile = {tile}"
|
| 197 |
+
|
| 198 |
+
# Permute weights to 16x64 marlin tiles
|
| 199 |
+
q_w = q_w.reshape((size_k // tile, tile, size_n // tile, tile))
|
| 200 |
+
q_w = q_w.permute((0, 2, 1, 3))
|
| 201 |
+
q_w = q_w.reshape((size_k // tile, size_n * tile))
|
| 202 |
+
|
| 203 |
+
q_w = q_w.reshape((-1, perm.numel()))[:, perm].reshape(q_w.shape)
|
| 204 |
+
|
| 205 |
+
return q_w
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def pack_weight_24(
|
| 209 |
+
weight: Tensor,
|
| 210 |
+
quantization_args: QuantizationArgs,
|
| 211 |
+
tile: int = 16,
|
| 212 |
+
):
|
| 213 |
+
size_k = weight.shape[0]
|
| 214 |
+
size_n = weight.shape[1]
|
| 215 |
+
num_bits = quantization_args.num_bits
|
| 216 |
+
pack_factor = 32 // num_bits
|
| 217 |
+
|
| 218 |
+
# Reshuffle to marlin_24 format
|
| 219 |
+
perm, _, _ = get_permutations_24(num_bits)
|
| 220 |
+
q_w = marlin_permute_weights(weight, size_k, size_n, perm, tile)
|
| 221 |
+
|
| 222 |
+
q_w = q_w.cpu().numpy().astype(np.uint32)
|
| 223 |
+
|
| 224 |
+
q_packed = np.zeros((q_w.shape[0], q_w.shape[1] // pack_factor), dtype=np.uint32)
|
| 225 |
+
for i in range(pack_factor):
|
| 226 |
+
q_packed |= q_w[:, i::pack_factor] << num_bits * i
|
| 227 |
+
|
| 228 |
+
q_packed = torch.from_numpy(q_packed.astype(np.int32))
|
| 229 |
+
|
| 230 |
+
return q_packed
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
def pack_scales_24(scales, quantization_args, w_shape):
|
| 234 |
+
size_k = w_shape[0]
|
| 235 |
+
size_n = w_shape[1]
|
| 236 |
+
num_bits = quantization_args.num_bits
|
| 237 |
+
|
| 238 |
+
_, scale_perm_2_4, scale_perm_single_2_4 = get_permutations_24(num_bits)
|
| 239 |
+
|
| 240 |
+
if (
|
| 241 |
+
quantization_args.strategy == QuantizationStrategy.GROUP
|
| 242 |
+
and quantization_args.group_size < size_k
|
| 243 |
+
):
|
| 244 |
+
scales = scales.reshape((-1, len(scale_perm_2_4)))[:, scale_perm_2_4]
|
| 245 |
+
else: # channelwise
|
| 246 |
+
scales = scales.reshape((-1, len(scale_perm_single_2_4)))[
|
| 247 |
+
:, scale_perm_single_2_4
|
| 248 |
+
]
|
| 249 |
+
scales = scales.reshape((-1, size_n)).contiguous()
|
| 250 |
+
|
| 251 |
+
return scales
|
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
# flake8: noqa
|
| 16 |
+
# isort: skip_file
|
| 17 |
+
|
| 18 |
+
from .quant_args import *
|
| 19 |
+
from .quant_config import *
|
| 20 |
+
from .quant_scheme import *
|
| 21 |
+
from .lifecycle import *
|
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (340 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_args.cpython-311.pyc
ADDED
|
Binary file (10.6 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_config.cpython-311.pyc
ADDED
|
Binary file (11.2 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_scheme.cpython-311.pyc
ADDED
|
Binary file (5.46 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__init__.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing,
|
| 10 |
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
# flake8: noqa
|
| 16 |
+
# isort: skip_file
|
| 17 |
+
|
| 18 |
+
from .forward import *
|
| 19 |
+
from .initialize import *
|
| 20 |
+
from .compressed import *
|
| 21 |
+
from .apply import *
|
| 22 |
+
from .helpers import *
|
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (368 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/apply.cpython-311.pyc
ADDED
|
Binary file (17.9 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/compressed.cpython-311.pyc
ADDED
|
Binary file (1.92 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/forward.cpython-311.pyc
ADDED
|
Binary file (13.3 kB). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/helpers.cpython-311.pyc
ADDED
|
Binary file (764 Bytes). View file
|
|
|
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/initialize.cpython-311.pyc
ADDED
|
Binary file (7.93 kB). View file
|
|
|