koichi12 commited on
Commit
4ac3d46
·
verified ·
1 Parent(s): d33aea4

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .venv/lib/python3.11/site-packages/OpenSSL/SSL.py +0 -0
  2. .venv/lib/python3.11/site-packages/OpenSSL/__init__.py +31 -0
  3. .venv/lib/python3.11/site-packages/OpenSSL/__pycache__/__init__.cpython-311.pyc +0 -0
  4. .venv/lib/python3.11/site-packages/OpenSSL/__pycache__/_util.cpython-311.pyc +0 -0
  5. .venv/lib/python3.11/site-packages/OpenSSL/__pycache__/debug.cpython-311.pyc +0 -0
  6. .venv/lib/python3.11/site-packages/OpenSSL/__pycache__/rand.cpython-311.pyc +0 -0
  7. .venv/lib/python3.11/site-packages/OpenSSL/__pycache__/version.cpython-311.pyc +0 -0
  8. .venv/lib/python3.11/site-packages/OpenSSL/_util.py +124 -0
  9. .venv/lib/python3.11/site-packages/OpenSSL/crypto.py +0 -0
  10. .venv/lib/python3.11/site-packages/OpenSSL/debug.py +40 -0
  11. .venv/lib/python3.11/site-packages/OpenSSL/py.typed +0 -0
  12. .venv/lib/python3.11/site-packages/OpenSSL/rand.py +40 -0
  13. .venv/lib/python3.11/site-packages/OpenSSL/version.py +28 -0
  14. .venv/lib/python3.11/site-packages/compressed_tensors/__init__.py +22 -0
  15. .venv/lib/python3.11/site-packages/compressed_tensors/base.py +20 -0
  16. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/__init__.py +22 -0
  17. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/base.py +188 -0
  18. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/helpers.py +137 -0
  19. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__init__.py +18 -0
  20. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/__init__.cpython-311.pyc +0 -0
  21. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/base.cpython-311.pyc +0 -0
  22. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/naive_quantized.cpython-311.pyc +0 -0
  23. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/pack_quantized.cpython-311.pyc +0 -0
  24. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/base.py +176 -0
  25. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +142 -0
  26. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +213 -0
  27. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__init__.py +19 -0
  28. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/__init__.cpython-311.pyc +0 -0
  29. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/base.cpython-311.pyc +0 -0
  30. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/dense.cpython-311.pyc +0 -0
  31. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_24_bitmask.cpython-311.pyc +0 -0
  32. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_bitmask.cpython-311.pyc +0 -0
  33. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/base.py +148 -0
  34. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/dense.py +34 -0
  35. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +240 -0
  36. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +163 -0
  37. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +16 -0
  38. .venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +251 -0
  39. .venv/lib/python3.11/site-packages/compressed_tensors/quantization/__init__.py +21 -0
  40. .venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/__init__.cpython-311.pyc +0 -0
  41. .venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_args.cpython-311.pyc +0 -0
  42. .venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_config.cpython-311.pyc +0 -0
  43. .venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_scheme.cpython-311.pyc +0 -0
  44. .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__init__.py +22 -0
  45. .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/__init__.cpython-311.pyc +0 -0
  46. .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/apply.cpython-311.pyc +0 -0
  47. .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/compressed.cpython-311.pyc +0 -0
  48. .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/forward.cpython-311.pyc +0 -0
  49. .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/helpers.cpython-311.pyc +0 -0
  50. .venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/initialize.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/OpenSSL/SSL.py ADDED
The diff for this file is too large to render. See raw diff
 
.venv/lib/python3.11/site-packages/OpenSSL/__init__.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (C) AB Strakt
2
+ # See LICENSE for details.
3
+
4
+ """
5
+ pyOpenSSL - A simple wrapper around the OpenSSL library
6
+ """
7
+
8
+ from OpenSSL import SSL, crypto
9
+ from OpenSSL.version import (
10
+ __author__,
11
+ __copyright__,
12
+ __email__,
13
+ __license__,
14
+ __summary__,
15
+ __title__,
16
+ __uri__,
17
+ __version__,
18
+ )
19
+
20
+ __all__ = [
21
+ "SSL",
22
+ "crypto",
23
+ "__author__",
24
+ "__copyright__",
25
+ "__email__",
26
+ "__license__",
27
+ "__summary__",
28
+ "__title__",
29
+ "__uri__",
30
+ "__version__",
31
+ ]
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (716 Bytes). View file
 
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/_util.cpython-311.pyc ADDED
Binary file (5.28 kB). View file
 
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/debug.cpython-311.pyc ADDED
Binary file (1.67 kB). View file
 
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/rand.cpython-311.pyc ADDED
Binary file (1.75 kB). View file
 
.venv/lib/python3.11/site-packages/OpenSSL/__pycache__/version.cpython-311.pyc ADDED
Binary file (712 Bytes). View file
 
.venv/lib/python3.11/site-packages/OpenSSL/_util.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import warnings
4
+ from typing import Any, Callable, NoReturn, Type, Union
5
+
6
+ from cryptography.hazmat.bindings.openssl.binding import Binding
7
+
8
+ StrOrBytesPath = Union[str, bytes, os.PathLike]
9
+
10
+ binding = Binding()
11
+ ffi = binding.ffi
12
+ lib = binding.lib
13
+
14
+
15
+ # This is a special CFFI allocator that does not bother to zero its memory
16
+ # after allocation. This has vastly better performance on large allocations and
17
+ # so should be used whenever we don't need the memory zeroed out.
18
+ no_zero_allocator = ffi.new_allocator(should_clear_after_alloc=False)
19
+
20
+
21
+ def text(charp: Any) -> str:
22
+ """
23
+ Get a native string type representing of the given CFFI ``char*`` object.
24
+
25
+ :param charp: A C-style string represented using CFFI.
26
+
27
+ :return: :class:`str`
28
+ """
29
+ if not charp:
30
+ return ""
31
+ return ffi.string(charp).decode("utf-8")
32
+
33
+
34
+ def exception_from_error_queue(exception_type: Type[Exception]) -> NoReturn:
35
+ """
36
+ Convert an OpenSSL library failure into a Python exception.
37
+
38
+ When a call to the native OpenSSL library fails, this is usually signalled
39
+ by the return value, and an error code is stored in an error queue
40
+ associated with the current thread. The err library provides functions to
41
+ obtain these error codes and textual error messages.
42
+ """
43
+ errors = []
44
+
45
+ while True:
46
+ error = lib.ERR_get_error()
47
+ if error == 0:
48
+ break
49
+ errors.append(
50
+ (
51
+ text(lib.ERR_lib_error_string(error)),
52
+ text(lib.ERR_func_error_string(error)),
53
+ text(lib.ERR_reason_error_string(error)),
54
+ )
55
+ )
56
+
57
+ raise exception_type(errors)
58
+
59
+
60
+ def make_assert(error: Type[Exception]) -> Callable[[bool], Any]:
61
+ """
62
+ Create an assert function that uses :func:`exception_from_error_queue` to
63
+ raise an exception wrapped by *error*.
64
+ """
65
+
66
+ def openssl_assert(ok: bool) -> None:
67
+ """
68
+ If *ok* is not True, retrieve the error from OpenSSL and raise it.
69
+ """
70
+ if ok is not True:
71
+ exception_from_error_queue(error)
72
+
73
+ return openssl_assert
74
+
75
+
76
+ def path_bytes(s: StrOrBytesPath) -> bytes:
77
+ """
78
+ Convert a Python path to a :py:class:`bytes` for the path which can be
79
+ passed into an OpenSSL API accepting a filename.
80
+
81
+ :param s: A path (valid for os.fspath).
82
+
83
+ :return: An instance of :py:class:`bytes`.
84
+ """
85
+ b = os.fspath(s)
86
+
87
+ if isinstance(b, str):
88
+ return b.encode(sys.getfilesystemencoding())
89
+ else:
90
+ return b
91
+
92
+
93
+ def byte_string(s: str) -> bytes:
94
+ return s.encode("charmap")
95
+
96
+
97
+ # A marker object to observe whether some optional arguments are passed any
98
+ # value or not.
99
+ UNSPECIFIED = object()
100
+
101
+ _TEXT_WARNING = "str for {0} is no longer accepted, use bytes"
102
+
103
+
104
+ def text_to_bytes_and_warn(label: str, obj: Any) -> Any:
105
+ """
106
+ If ``obj`` is text, emit a warning that it should be bytes instead and try
107
+ to convert it to bytes automatically.
108
+
109
+ :param str label: The name of the parameter from which ``obj`` was taken
110
+ (so a developer can easily find the source of the problem and correct
111
+ it).
112
+
113
+ :return: If ``obj`` is the text string type, a ``bytes`` object giving the
114
+ UTF-8 encoding of that text is returned. Otherwise, ``obj`` itself is
115
+ returned.
116
+ """
117
+ if isinstance(obj, str):
118
+ warnings.warn(
119
+ _TEXT_WARNING.format(label),
120
+ category=DeprecationWarning,
121
+ stacklevel=3,
122
+ )
123
+ return obj.encode("utf-8")
124
+ return obj
.venv/lib/python3.11/site-packages/OpenSSL/crypto.py ADDED
The diff for this file is too large to render. See raw diff
 
.venv/lib/python3.11/site-packages/OpenSSL/debug.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ssl
2
+ import sys
3
+
4
+ import cffi
5
+ import cryptography
6
+
7
+ import OpenSSL.SSL
8
+
9
+ from . import version
10
+
11
+ _env_info = """\
12
+ pyOpenSSL: {pyopenssl}
13
+ cryptography: {cryptography}
14
+ cffi: {cffi}
15
+ cryptography's compiled against OpenSSL: {crypto_openssl_compile}
16
+ cryptography's linked OpenSSL: {crypto_openssl_link}
17
+ Python's OpenSSL: {python_openssl}
18
+ Python executable: {python}
19
+ Python version: {python_version}
20
+ Platform: {platform}
21
+ sys.path: {sys_path}""".format(
22
+ pyopenssl=version.__version__,
23
+ crypto_openssl_compile=OpenSSL._util.ffi.string(
24
+ OpenSSL._util.lib.OPENSSL_VERSION_TEXT,
25
+ ).decode("ascii"),
26
+ crypto_openssl_link=OpenSSL.SSL.SSLeay_version(
27
+ OpenSSL.SSL.SSLEAY_VERSION
28
+ ).decode("ascii"),
29
+ python_openssl=getattr(ssl, "OPENSSL_VERSION", "n/a"),
30
+ cryptography=cryptography.__version__,
31
+ cffi=cffi.__version__,
32
+ python=sys.executable,
33
+ python_version=sys.version,
34
+ platform=sys.platform,
35
+ sys_path=sys.path,
36
+ )
37
+
38
+
39
+ if __name__ == "__main__":
40
+ print(_env_info)
.venv/lib/python3.11/site-packages/OpenSSL/py.typed ADDED
File without changes
.venv/lib/python3.11/site-packages/OpenSSL/rand.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PRNG management routines, thin wrappers.
3
+ """
4
+
5
+ from OpenSSL._util import lib as _lib
6
+
7
+
8
+ def add(buffer: bytes, entropy: int) -> None:
9
+ """
10
+ Mix bytes from *string* into the PRNG state.
11
+
12
+ The *entropy* argument is (the lower bound of) an estimate of how much
13
+ randomness is contained in *string*, measured in bytes.
14
+
15
+ For more information, see e.g. :rfc:`1750`.
16
+
17
+ This function is only relevant if you are forking Python processes and
18
+ need to reseed the CSPRNG after fork.
19
+
20
+ :param buffer: Buffer with random data.
21
+ :param entropy: The entropy (in bytes) measurement of the buffer.
22
+
23
+ :return: :obj:`None`
24
+ """
25
+ if not isinstance(buffer, bytes):
26
+ raise TypeError("buffer must be a byte string")
27
+
28
+ if not isinstance(entropy, int):
29
+ raise TypeError("entropy must be an integer")
30
+
31
+ _lib.RAND_add(buffer, len(buffer), entropy)
32
+
33
+
34
+ def status() -> int:
35
+ """
36
+ Check whether the PRNG has been seeded with enough data.
37
+
38
+ :return: 1 if the PRNG is seeded enough, 0 otherwise.
39
+ """
40
+ return _lib.RAND_status()
.venv/lib/python3.11/site-packages/OpenSSL/version.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (C) AB Strakt
2
+ # Copyright (C) Jean-Paul Calderone
3
+ # See LICENSE for details.
4
+
5
+ """
6
+ pyOpenSSL - A simple wrapper around the OpenSSL library
7
+ """
8
+
9
+ __all__ = [
10
+ "__author__",
11
+ "__copyright__",
12
+ "__email__",
13
+ "__license__",
14
+ "__summary__",
15
+ "__title__",
16
+ "__uri__",
17
+ "__version__",
18
+ ]
19
+
20
+ __version__ = "24.2.1"
21
+
22
+ __title__ = "pyOpenSSL"
23
+ __uri__ = "https://pyopenssl.org/"
24
+ __summary__ = "Python wrapper module around the OpenSSL library"
25
+ __author__ = "The pyOpenSSL developers"
26
+ __email__ = "cryptography-dev@python.org"
27
+ __license__ = "Apache License, Version 2.0"
28
+ __copyright__ = f"Copyright 2001-2024 {__author__}"
.venv/lib/python3.11/site-packages/compressed_tensors/__init__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .base import *
16
+
17
+ # flake8: noqa
18
+ from .compressors import *
19
+ from .config import *
20
+ from .quantization import QuantizationConfig, QuantizationStatus
21
+ from .utils import *
22
+ from .version import *
.venv/lib/python3.11/site-packages/compressed_tensors/base.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ SPARSITY_CONFIG_NAME = "sparsity_config"
16
+ QUANTIZATION_CONFIG_NAME = "quantization_config"
17
+ COMPRESSION_CONFIG_NAME = "compression_config"
18
+ KV_CACHE_SCHEME_NAME = "kv_cache_scheme"
19
+ COMPRESSION_VERSION_NAME = "version"
20
+ QUANTIZATION_METHOD_NAME = "quant_method"
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/__init__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # flake8: noqa
16
+
17
+ from .base import *
18
+ from .helpers import *
19
+ from .model_compressors import *
20
+ from .quantized_compressors import *
21
+ from .sparse_compressors import *
22
+ from .sparse_quantized_compressors import *
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/base.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC, abstractmethod
16
+ from typing import Dict, Generator, Optional, Tuple, Union
17
+
18
+ import torch
19
+ from compressed_tensors.config import SparsityCompressionConfig
20
+ from compressed_tensors.quantization import QuantizationArgs, QuantizationConfig
21
+ from compressed_tensors.registry import RegistryMixin
22
+ from torch import Tensor
23
+ from torch.nn import Module
24
+
25
+
26
+ __all__ = ["BaseCompressor"]
27
+
28
+
29
+ class BaseCompressor(RegistryMixin, ABC):
30
+ """
31
+ Base class representing a model compression algorithm. Each child class should
32
+ implement compression_param_info, compress_weight and decompress_weight.
33
+
34
+ Compressors support compressing/decompressing a full module state dict or a single
35
+ quantized PyTorch leaf module.
36
+
37
+ Model Load Lifecycle (run_compressed=False):
38
+ - ModelCompressor.decompress()
39
+ - apply_quantization_config()
40
+ - BaseCompressor.decompress()
41
+
42
+ Model Save Lifecycle:
43
+ - ModelCompressor.compress()
44
+ - BaseCompressor.compress()
45
+
46
+
47
+ Module Lifecycle (run_compressed=True):
48
+ - apply_quantization_config()
49
+ - compressed_module = CompressedLinear(module)
50
+ - initialize_module_for_quantization()
51
+ - BaseCompressor.compression_param_info()
52
+ - register_parameters()
53
+ - compressed_module.forward()
54
+ -compressed_module.decompress()
55
+
56
+
57
+ :param config: config specifying compression parameters
58
+ """
59
+
60
+ def __init__(
61
+ self, config: Union[SparsityCompressionConfig, QuantizationConfig, None] = None
62
+ ):
63
+ self.config = config
64
+
65
+ def compression_param_info(
66
+ self,
67
+ weight_shape: torch.Size,
68
+ quantization_args: Optional[QuantizationArgs] = None,
69
+ ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
70
+ """
71
+ Creates a dictionary of expected shapes and dtypes for each compression
72
+ parameter used by the compressor
73
+
74
+ :param weight_shape: uncompressed weight shape
75
+ :param quantization_args: quantization parameters for the weight
76
+ :return: dictionary mapping compressed parameter names to shape and dtype
77
+ """
78
+ raise NotImplementedError()
79
+
80
+ @abstractmethod
81
+ def compress(
82
+ self,
83
+ model_state: Dict[str, Tensor],
84
+ **kwargs,
85
+ ) -> Dict[str, Tensor]:
86
+ """
87
+ Compresses a dense state dict
88
+
89
+ :param model_state: state dict of uncompressed model
90
+ :param kwargs: additional arguments for compression
91
+ :return: compressed state dict
92
+ """
93
+ raise NotImplementedError()
94
+
95
+ @abstractmethod
96
+ def decompress(
97
+ self,
98
+ path_to_model_or_tensors: str,
99
+ device: str = "cpu",
100
+ **kwargs,
101
+ ) -> Generator[Tuple[str, Tensor], None, None]:
102
+ """
103
+ Reads a compressed state dict located at path_to_model_or_tensors
104
+ and returns a generator for sequentially decompressing back to a
105
+ dense state dict
106
+
107
+ :param path_to_model_or_tensors: path to compressed safetensors model (directory
108
+ with one or more safetensors files) or compressed tensors file
109
+ :param names_to_scheme: quantization args for each quantized weight
110
+ :param device: optional device to load intermediate weights into
111
+ :return: compressed state dict
112
+ """
113
+ raise NotImplementedError()
114
+
115
+ def compress_module(self, module: Module) -> Optional[Dict[str, torch.Tensor]]:
116
+ """
117
+ Compresses a single quantized leaf PyTorch module. If the module is not
118
+ quantized, this function has no effect.
119
+
120
+ :param module: PyTorch module to compress
121
+ :return: dictionary of compressed weight data, or None if module is not
122
+ quantized
123
+ """
124
+ if not hasattr(module, "quantization_scheme"):
125
+ return None # module is not quantized
126
+ quantization_scheme = module.quantization_scheme
127
+ if not hasattr(quantization_scheme, "weights"):
128
+ return None # weights are not quantized
129
+
130
+ quantization_args = quantization_scheme.weights
131
+ weight = getattr(module, "weight", None)
132
+ weight_scale = getattr(module, "weight_scale", None)
133
+ weight_zero_point = getattr(module, "weight_zero_point", None)
134
+
135
+ return self.compress_weight(
136
+ weight=weight,
137
+ scale=weight_scale,
138
+ zero_point=weight_zero_point,
139
+ quantization_args=quantization_args,
140
+ )
141
+
142
+ def compress_weight(
143
+ self,
144
+ weight: Tensor,
145
+ **kwargs,
146
+ ) -> Dict[str, torch.Tensor]:
147
+ """
148
+ Compresses a single uncompressed weight
149
+
150
+ :param weight: uncompressed weight tensor
151
+ :param kwargs: additional arguments for compression
152
+ """
153
+ raise NotImplementedError()
154
+
155
+ def decompress_module(self, module: Module):
156
+ """
157
+ Decompresses a single compressed leaf PyTorch module. If the module is not
158
+ quantized, this function has no effect.
159
+
160
+ :param module: PyTorch module to decompress
161
+ :return: tensor of the decompressed weight, or None if module is not quantized
162
+ """
163
+ if not hasattr(module, "quantization_scheme"):
164
+ return None # module is not quantized
165
+ quantization_scheme = module.quantization_scheme
166
+ if not hasattr(quantization_scheme, "weights"):
167
+ return None # weights are not quantized
168
+
169
+ quantization_args = quantization_scheme.weights
170
+ compressed_data = {}
171
+ for name, parameter in module.named_parameters():
172
+ compressed_data[name] = parameter
173
+
174
+ return self.decompress_weight(
175
+ compressed_data=compressed_data, quantization_args=quantization_args
176
+ )
177
+
178
+ def decompress_weight(
179
+ self, compressed_data: Dict[str, Tensor], **kwargs
180
+ ) -> torch.Tensor:
181
+ """
182
+ Decompresses a single compressed weight
183
+
184
+ :param compressed_data: dictionary of data needed for decompression
185
+ :param kwargs: additional arguments for decompression
186
+ :return: tensor of the decompressed weight
187
+ """
188
+ raise NotImplementedError()
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/helpers.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from pathlib import Path
16
+ from typing import Dict, Generator, Optional, Tuple, Union
17
+
18
+ import torch
19
+ from compressed_tensors.compressors import BaseCompressor
20
+ from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
21
+ from compressed_tensors.utils.safetensors_load import get_weight_mappings
22
+ from safetensors import safe_open
23
+ from safetensors.torch import save_file
24
+ from torch import Tensor
25
+
26
+
27
+ __all__ = [
28
+ "load_compressed",
29
+ "save_compressed",
30
+ "save_compressed_model",
31
+ ]
32
+
33
+
34
+ def save_compressed(
35
+ tensors: Dict[str, Tensor],
36
+ save_path: Union[str, Path],
37
+ compression_format: Optional[CompressionFormat] = None,
38
+ ):
39
+ """
40
+ Save compressed tensors to disk. If tensors are not compressed,
41
+ save them as is.
42
+
43
+ :param tensors: dictionary of tensors to compress
44
+ :param save_path: path to save compressed tensors
45
+ :param compression_format: compression format used for the tensors
46
+ :return: compression config, if tensors were compressed - None otherwise
47
+ """
48
+ if tensors is None or len(tensors) == 0:
49
+ raise ValueError("No tensors or empty tensors provided to compress")
50
+
51
+ # if no compression_format specified, default to `dense`
52
+ compression_format = compression_format or CompressionFormat.dense.value
53
+
54
+ if not (
55
+ compression_format in BaseCompressor.registered_names()
56
+ or compression_format in BaseCompressor.registered_aliases()
57
+ ):
58
+ raise ValueError(
59
+ f"Unknown compression format: {compression_format}. "
60
+ f"Must be one of {set(BaseCompressor.registered_names() + BaseCompressor.registered_aliases())}" # noqa E501
61
+ )
62
+
63
+ # compress
64
+ compressor = BaseCompressor.load_from_registry(compression_format)
65
+ # save compressed tensors
66
+ compressed_tensors = compressor.compress(tensors)
67
+ save_file(compressed_tensors, save_path)
68
+
69
+
70
+ def load_compressed(
71
+ compressed_tensors: Union[str, Path],
72
+ compression_config: SparsityCompressionConfig = None,
73
+ device: Optional[str] = "cpu",
74
+ ) -> Generator[Tuple[str, Tensor], None, None]:
75
+ """
76
+ Load compressed tensors from disk.
77
+ If tensors are not compressed, load them as is.
78
+
79
+ :param compressed_tensors: path to compressed tensors.
80
+ This can be a path to a file or a directory containing
81
+ one or multiple safetensor files (if multiple - in the format
82
+ assumed by huggingface)
83
+ :param compression_config: compression config to use for decompressing tensors.
84
+ :param device: device to move tensors to. If None, tensors are loaded on CPU.
85
+ :param return_dict: if True, return a dictionary of decompressed tensors
86
+ :return a generator that yields the name and tensor of the decompressed tensor
87
+ """
88
+ if compressed_tensors is None or not Path(compressed_tensors).exists():
89
+ raise ValueError("No compressed tensors provided to load")
90
+
91
+ if (
92
+ compression_config is None
93
+ or compression_config.format == CompressionFormat.dense.value
94
+ ):
95
+ # if no compression_config specified, or `dense` format specified,
96
+ # assume tensors are not compressed on disk
97
+ weight_mappings = get_weight_mappings(compressed_tensors)
98
+ for weight_name, file_with_weight_name in weight_mappings.items():
99
+ with safe_open(file_with_weight_name, framework="pt", device=device) as f:
100
+ weight = f.get_tensor(weight_name)
101
+ yield weight_name, weight
102
+ else:
103
+ # decompress tensors
104
+ compression_format = compression_config.format
105
+ compressor = BaseCompressor.load_from_registry(
106
+ compression_format, config=compression_config
107
+ )
108
+ yield from compressor.decompress(compressed_tensors, device=device)
109
+
110
+
111
+ def save_compressed_model(
112
+ model: torch.nn.Module,
113
+ filename: str,
114
+ compression_format: Optional[CompressionFormat] = None,
115
+ force_contiguous: bool = True,
116
+ ):
117
+ """
118
+ Wrapper around safetensors `save_model` helper function, which allows for
119
+ saving compressed model to disk.
120
+
121
+ Note: The model is assumed to have a
122
+ state_dict with unique entries
123
+
124
+ :param model: model to save on disk
125
+ :param filename: filename location to save the file
126
+ :param compression_format: compression format used for the model
127
+ :param force_contiguous: forcing the state_dict to be saved as contiguous tensors
128
+ """
129
+ state_dict = model.state_dict()
130
+ if force_contiguous:
131
+ state_dict = {k: v.contiguous() for k, v in state_dict.items()}
132
+ try:
133
+ save_compressed(state_dict, filename, compression_format=compression_format)
134
+ except ValueError as e:
135
+ msg = str(e)
136
+ msg += " Or use save_compressed_model(..., force_contiguous=True), read the docs for potential caveats." # noqa E501
137
+ raise ValueError(msg)
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # flake8: noqa
15
+
16
+ from .base import *
17
+ from .naive_quantized import *
18
+ from .pack_quantized import *
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (329 Bytes). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/base.cpython-311.pyc ADDED
Binary file (8.51 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/naive_quantized.cpython-311.pyc ADDED
Binary file (5.77 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/__pycache__/pack_quantized.cpython-311.pyc ADDED
Binary file (9.49 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/base.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ from pathlib import Path
17
+ from typing import Any, Dict, Generator, Tuple, Union
18
+
19
+ import torch
20
+ from compressed_tensors.compressors.base import BaseCompressor
21
+ from compressed_tensors.quantization import QuantizationArgs
22
+ from compressed_tensors.utils import (
23
+ get_nested_mappings_from_state_dict,
24
+ get_nested_weight_mappings,
25
+ merge_names,
26
+ )
27
+ from safetensors import safe_open
28
+ from torch import Tensor
29
+ from tqdm import tqdm
30
+
31
+
32
+ _LOGGER: logging.Logger = logging.getLogger(__name__)
33
+
34
+ __all__ = ["BaseQuantizationCompressor"]
35
+
36
+
37
+ class BaseQuantizationCompressor(BaseCompressor):
38
+ """
39
+ Base class representing a quant compression algorithm. Each child class should
40
+ implement compression_param_info, compress_weight and decompress_weight.
41
+
42
+ Compressors support compressing/decompressing a full module state dict or a single
43
+ quantized PyTorch leaf module.
44
+
45
+ Model Load Lifecycle (run_compressed=False):
46
+ - ModelCompressor.decompress()
47
+ - apply_quantization_config()
48
+ - BaseQuantizationCompressor.decompress()
49
+ - BaseQuantizationCompressor.decompress_weight()
50
+
51
+ Model Save Lifecycle:
52
+ - ModelCompressor.compress()
53
+ - BaseQuantizationCompressor.compress()
54
+ - BaseQuantizationCompressor.compress_weight()
55
+
56
+ Module Lifecycle (run_compressed=True):
57
+ - apply_quantization_config()
58
+ - compressed_module = CompressedLinear(module)
59
+ - initialize_module_for_quantization()
60
+ - BaseQuantizationCompressor.compression_param_info()
61
+ - register_parameters()
62
+ - compressed_module.forward()
63
+ - compressed_module.decompress()
64
+
65
+
66
+ :param config: config specifying compression parameters
67
+ """
68
+
69
+ def compress(
70
+ self,
71
+ model_state: Dict[str, Tensor],
72
+ names_to_scheme: Dict[str, QuantizationArgs],
73
+ **kwargs,
74
+ ) -> Dict[str, Tensor]:
75
+ """
76
+ Compresses a dense state dict
77
+
78
+ :param model_state: state dict of uncompressed model
79
+ :param names_to_scheme: quantization args for each quantized weight, needed for
80
+ quantize function to calculate bit depth
81
+ :return: compressed state dict
82
+ """
83
+ compressed_dict = {}
84
+ weight_suffix = ".weight"
85
+ _LOGGER.debug(
86
+ f"Compressing model with {len(model_state)} parameterized layers..."
87
+ )
88
+
89
+ for name, value in tqdm(model_state.items(), desc="Quantized Compression"):
90
+ if name.endswith(weight_suffix):
91
+ prefix = name[: -(len(weight_suffix))]
92
+ scale = model_state.get(merge_names(prefix, "weight_scale"), None)
93
+ zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
94
+ g_idx = model_state.get(merge_names(prefix, "weight_g_idx"), None)
95
+ if scale is not None:
96
+ # weight is quantized, compress it
97
+ quant_args = names_to_scheme[prefix]
98
+ compressed_data = self.compress_weight(
99
+ weight=value,
100
+ scale=scale,
101
+ zero_point=zp,
102
+ g_idx=g_idx,
103
+ quantization_args=quant_args,
104
+ device="cpu",
105
+ )
106
+ for key, value in compressed_data.items():
107
+ compressed_dict[merge_names(prefix, key)] = value
108
+ else:
109
+ compressed_dict[name] = value.to("cpu")
110
+ elif name.endswith("zero_point") and torch.all(value == 0):
111
+ continue
112
+ elif name.endswith("g_idx") and torch.any(value <= -1):
113
+ continue
114
+ else:
115
+ compressed_dict[name] = value.to("cpu")
116
+
117
+ return compressed_dict
118
+
119
+ def decompress(
120
+ self,
121
+ path_to_model_or_tensors: Union[str, Path, Dict[str, Any]],
122
+ names_to_scheme: Dict[str, QuantizationArgs],
123
+ device: str = "cpu",
124
+ ) -> Generator[Tuple[str, Tensor], None, None]:
125
+ """
126
+ Reads a compressed state dict located at path_to_model_or_tensors
127
+ and returns a generator for sequentially decompressing back to a
128
+ dense state dict
129
+ :param path_to_model_or_tensors: path to compressed safetensors model (directory
130
+ with one or more safetensors files) or compressed tensors file
131
+ :param names_to_scheme: quantization args for each quantized weight
132
+ :param device: optional device to load intermediate weights into
133
+ :return: compressed state dict
134
+ """
135
+ if isinstance(path_to_model_or_tensors, (str, Path)):
136
+ yield from self._decompress_from_path(
137
+ path_to_model_or_tensors, names_to_scheme, device
138
+ )
139
+
140
+ else:
141
+ yield from self._decompress_from_state_dict(
142
+ path_to_model_or_tensors, names_to_scheme
143
+ )
144
+
145
+ def _decompress_from_path(self, path_to_model, names_to_scheme, device):
146
+ weight_mappings = get_nested_weight_mappings(
147
+ path_to_model, self.COMPRESSION_PARAM_NAMES
148
+ )
149
+ for weight_name in weight_mappings.keys():
150
+ weight_data = {}
151
+ for param_name, safe_path in weight_mappings[weight_name].items():
152
+ full_name = merge_names(weight_name, param_name)
153
+ with safe_open(safe_path, framework="pt", device=device) as f:
154
+ weight_data[param_name] = f.get_tensor(full_name)
155
+ if "weight_scale" in weight_data:
156
+ quant_args = names_to_scheme[weight_name]
157
+ decompressed = self.decompress_weight(
158
+ compressed_data=weight_data, quantization_args=quant_args
159
+ )
160
+ yield merge_names(weight_name, "weight"), decompressed
161
+
162
+ def _decompress_from_state_dict(self, state_dict, names_to_scheme):
163
+ weight_mappings = get_nested_mappings_from_state_dict(
164
+ state_dict, self.COMPRESSION_PARAM_NAMES
165
+ )
166
+ for weight_name in weight_mappings.keys():
167
+ weight_data = {}
168
+ for param_name, param_value in weight_mappings[weight_name].items():
169
+ weight_data[param_name] = param_value
170
+
171
+ if "weight_scale" in weight_data:
172
+ quant_args = names_to_scheme[weight_name]
173
+ decompressed = self.decompress_weight(
174
+ compressed_data=weight_data, quantization_args=quant_args
175
+ )
176
+ yield merge_names(weight_name, "weight"), decompressed
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/naive_quantized.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Dict, Optional, Tuple
16
+
17
+ import torch
18
+ from compressed_tensors.compressors.base import BaseCompressor
19
+ from compressed_tensors.compressors.quantized_compressors.base import (
20
+ BaseQuantizationCompressor,
21
+ )
22
+ from compressed_tensors.config import CompressionFormat
23
+ from compressed_tensors.quantization import QuantizationArgs
24
+ from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
25
+ from compressed_tensors.quantization.utils import can_quantize
26
+ from torch import Tensor
27
+
28
+
29
+ __all__ = [
30
+ "NaiveQuantizationCompressor",
31
+ "IntQuantizationCompressor",
32
+ "FloatQuantizationCompressor",
33
+ ]
34
+
35
+
36
+ @BaseCompressor.register(name=CompressionFormat.naive_quantized.value)
37
+ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
38
+ """
39
+ Implements naive compression for quantized models. Weight of each
40
+ quantized layer is converted from its original float type to the closest Pytorch
41
+ type to the type specified by the layer's QuantizationArgs.
42
+ """
43
+
44
+ COMPRESSION_PARAM_NAMES = [
45
+ "weight",
46
+ "weight_scale",
47
+ "weight_zero_point",
48
+ "weight_g_idx",
49
+ ]
50
+
51
+ def compression_param_info(
52
+ self,
53
+ weight_shape: torch.Size,
54
+ quantization_args: Optional[QuantizationArgs] = None,
55
+ ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
56
+ """
57
+ Creates a dictionary of expected shapes and dtypes for each compression
58
+ parameter used by the compressor
59
+
60
+ :param weight_shape: uncompressed weight shape
61
+ :param quantization_args: quantization parameters for the weight
62
+ :return: dictionary mapping compressed parameter names to shape and dtype
63
+ """
64
+ dtype = quantization_args.pytorch_dtype()
65
+ return {"weight": (weight_shape, dtype)}
66
+
67
+ def compress_weight(
68
+ self,
69
+ weight: Tensor,
70
+ scale: Tensor,
71
+ quantization_args: QuantizationArgs,
72
+ zero_point: Optional[Tensor] = None,
73
+ g_idx: Optional[torch.Tensor] = None,
74
+ device: Optional[torch.device] = None,
75
+ ) -> Dict[str, torch.Tensor]:
76
+ """
77
+ Compresses a single uncompressed weight
78
+
79
+ :param weight: uncompressed weight tensor
80
+ :param scale: quantization scale for weight
81
+ :param quantization_args: quantization parameters for weight
82
+ :param zero_point: quantization zero point for weight
83
+ :param g_idx: optional mapping from column index to group index
84
+ :param device: optional device to move compressed output to
85
+ :return: dictionary of compressed weight data
86
+ """
87
+ if can_quantize(weight, quantization_args):
88
+ quantized_weight = quantize(
89
+ x=weight,
90
+ scale=scale,
91
+ zero_point=zero_point,
92
+ g_idx=g_idx,
93
+ args=quantization_args,
94
+ dtype=quantization_args.pytorch_dtype(),
95
+ )
96
+ else:
97
+ quantized_weight = weight
98
+
99
+ if device is not None:
100
+ quantized_weight = quantized_weight.to(device)
101
+
102
+ return {"weight": quantized_weight}
103
+
104
+ def decompress_weight(
105
+ self,
106
+ compressed_data: Dict[str, Tensor],
107
+ quantization_args: Optional[QuantizationArgs] = None,
108
+ ) -> torch.Tensor:
109
+ """
110
+ Decompresses a single compressed weight
111
+
112
+ :param compressed_data: dictionary of data needed for decompression
113
+ :param quantization_args: quantization parameters for the weight
114
+ :return: tensor of the decompressed weight
115
+ """
116
+ weight = compressed_data["weight"]
117
+ scale = compressed_data["weight_scale"]
118
+ zero_point = compressed_data.get("weight_zero_point", None)
119
+ g_idx = compressed_data.get("weight_g_idx", None)
120
+ decompressed_weight = dequantize(
121
+ x_q=weight, scale=scale, zero_point=zero_point, g_idx=g_idx
122
+ )
123
+
124
+ return decompressed_weight
125
+
126
+
127
+ @BaseCompressor.register(name=CompressionFormat.int_quantized.value)
128
+ class IntQuantizationCompressor(NaiveQuantizationCompressor):
129
+ """
130
+ Alias for integer quantized models
131
+ """
132
+
133
+ pass
134
+
135
+
136
+ @BaseCompressor.register(name=CompressionFormat.float_quantized.value)
137
+ class FloatQuantizationCompressor(NaiveQuantizationCompressor):
138
+ """
139
+ Alias for fp quantized models
140
+ """
141
+
142
+ pass
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/quantized_compressors/pack_quantized.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import math
15
+ from typing import Dict, Optional, Tuple
16
+
17
+ import numpy as np
18
+ import torch
19
+ from compressed_tensors.compressors.base import BaseCompressor
20
+ from compressed_tensors.compressors.quantized_compressors.base import (
21
+ BaseQuantizationCompressor,
22
+ )
23
+ from compressed_tensors.config import CompressionFormat
24
+ from compressed_tensors.quantization import QuantizationArgs
25
+ from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
26
+ from compressed_tensors.quantization.utils import can_quantize
27
+ from torch import Tensor
28
+
29
+
30
+ __all__ = ["PackedQuantizationCompressor", "pack_to_int32", "unpack_from_int32"]
31
+
32
+
33
+ @BaseCompressor.register(name=CompressionFormat.pack_quantized.value)
34
+ class PackedQuantizationCompressor(BaseQuantizationCompressor):
35
+ """
36
+ Compresses a quantized model by packing every eight 4-bit weights into an int32
37
+ """
38
+
39
+ COMPRESSION_PARAM_NAMES = [
40
+ "weight_packed",
41
+ "weight_scale",
42
+ "weight_zero_point",
43
+ "weight_g_idx",
44
+ "weight_shape",
45
+ ]
46
+
47
+ def compression_param_info(
48
+ self,
49
+ weight_shape: torch.Size,
50
+ quantization_args: Optional[QuantizationArgs] = None,
51
+ ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
52
+ """
53
+ Creates a dictionary of expected shapes and dtypes for each compression
54
+ parameter used by the compressor
55
+
56
+ :param weight_shape: uncompressed weight shape
57
+ :param quantization_args: quantization parameters for the weight
58
+ :return: dictionary mapping compressed parameter names to shape and dtype
59
+ """
60
+ pack_factor = 32 // quantization_args.num_bits
61
+ packed_size = math.ceil(weight_shape[1] / pack_factor)
62
+ return {
63
+ "weight_packed": (torch.Size((weight_shape[0], packed_size)), torch.int32),
64
+ "weight_shape": (torch.Size((2,)), torch.int32),
65
+ }
66
+
67
+ def compress_weight(
68
+ self,
69
+ weight: Tensor,
70
+ scale: Tensor,
71
+ quantization_args: QuantizationArgs,
72
+ zero_point: Optional[Tensor] = None,
73
+ g_idx: Optional[torch.Tensor] = None,
74
+ device: Optional[torch.device] = None,
75
+ ) -> Dict[str, torch.Tensor]:
76
+ """
77
+ Compresses a single uncompressed weight
78
+
79
+ :param weight: uncompressed weight tensor
80
+ :param scale: quantization scale for weight
81
+ :param quantization_args: quantization parameters for weight
82
+ :param zero_point: quantization zero point for weight
83
+ :param g_idx: optional mapping from column index to group index
84
+ :param device: optional device to move compressed output to
85
+ :return: dictionary of compressed weight data
86
+ """
87
+ compressed_dict = {}
88
+ if can_quantize(weight, quantization_args):
89
+ quantized_weight = quantize(
90
+ x=weight,
91
+ scale=scale,
92
+ zero_point=zero_point,
93
+ g_idx=g_idx,
94
+ args=quantization_args,
95
+ dtype=torch.int8,
96
+ )
97
+ else:
98
+ quantized_weight = weight
99
+
100
+ packed_weight = pack_to_int32(quantized_weight, quantization_args.num_bits)
101
+ weight_shape = torch.tensor(weight.shape)
102
+ if device is not None:
103
+ packed_weight = packed_weight.to(device)
104
+ weight_shape = weight_shape.to(device)
105
+
106
+ compressed_dict["weight_shape"] = weight_shape
107
+ compressed_dict["weight_packed"] = packed_weight
108
+
109
+ return compressed_dict
110
+
111
+ def decompress_weight(
112
+ self,
113
+ compressed_data: Dict[str, Tensor],
114
+ quantization_args: Optional[QuantizationArgs] = None,
115
+ ) -> torch.Tensor:
116
+ """
117
+ Decompresses a single compressed weight
118
+
119
+ :param compressed_data: dictionary of data needed for decompression
120
+ :param quantization_args: quantization parameters for the weight
121
+ :return: tensor of the decompressed weight
122
+ """
123
+ weight = compressed_data["weight_packed"]
124
+ scale = compressed_data["weight_scale"]
125
+ zero_point = compressed_data.get("weight_zero_point", None)
126
+ g_idx = compressed_data.get("weight_g_idx", None)
127
+ original_shape = torch.Size(compressed_data["weight_shape"])
128
+ num_bits = quantization_args.num_bits
129
+ unpacked = unpack_from_int32(weight, num_bits, original_shape)
130
+ decompressed_weight = dequantize(
131
+ x_q=unpacked, scale=scale, zero_point=zero_point, g_idx=g_idx
132
+ )
133
+
134
+ return decompressed_weight
135
+
136
+
137
+ def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
138
+ """
139
+ Packs a tensor of quantized weights stored in int8 into int32s with padding
140
+
141
+ :param value: tensor to pack
142
+ :param num_bits: number of bits used to store underlying data
143
+ :returns: packed int32 tensor
144
+ """
145
+ if value.dtype is not torch.int8:
146
+ raise ValueError("Tensor must be quantized to torch.int8 before packing")
147
+
148
+ if num_bits > 8:
149
+ raise ValueError("Packing is only supported for less than 8 bits")
150
+
151
+ # convert to unsigned for packing
152
+ offset = pow(2, num_bits) // 2
153
+ value = (value + offset).to(torch.uint8)
154
+ value = value.cpu().numpy().astype(np.uint32)
155
+ pack_factor = 32 // num_bits
156
+
157
+ # pad input tensor and initialize packed output
158
+ packed_size = math.ceil(value.shape[1] / pack_factor)
159
+ packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
160
+ padding = packed.shape[1] * pack_factor - value.shape[1]
161
+ value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
162
+
163
+ # pack values
164
+ for i in range(pack_factor):
165
+ packed |= value[:, i::pack_factor] << num_bits * i
166
+
167
+ # convert back to signed and torch
168
+ packed = np.ascontiguousarray(packed).view(np.int32)
169
+ return torch.from_numpy(packed)
170
+
171
+
172
+ def unpack_from_int32(
173
+ value: torch.Tensor, num_bits: int, shape: torch.Size
174
+ ) -> torch.Tensor:
175
+ """
176
+ Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
177
+ original their bit range
178
+
179
+ :param value: tensor to upack
180
+ :param num_bits: number of bits to unpack each data point into
181
+ :param shape: shape to unpack into, used to remove padding
182
+ :returns: unpacked int8 tensor
183
+ """
184
+ if value.dtype is not torch.int32:
185
+ raise ValueError(
186
+ f"Expected {torch.int32} but got {value.dtype}, Aborting unpack."
187
+ )
188
+
189
+ if num_bits > 8:
190
+ raise ValueError("Unpacking is only supported for less than 8 bits")
191
+
192
+ pack_factor = 32 // num_bits
193
+
194
+ # unpack
195
+ mask = pow(2, num_bits) - 1
196
+ unpacked = torch.zeros(
197
+ (value.shape[0], value.shape[1] * pack_factor),
198
+ device=value.device,
199
+ dtype=torch.int32,
200
+ )
201
+ for i in range(pack_factor):
202
+ unpacked[:, i::pack_factor] = (value >> (num_bits * i)) & mask
203
+
204
+ # remove padding
205
+ original_row_size = int(shape[1])
206
+ unpacked = unpacked[:, :original_row_size]
207
+
208
+ # bits are packed in unsigned format, reformat to signed
209
+ # update the value range from unsigned to signed
210
+ offset = pow(2, num_bits) // 2
211
+ unpacked = (unpacked - offset).to(torch.int8)
212
+
213
+ return unpacked
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # flake8: noqa
15
+
16
+ from .base import *
17
+ from .dense import *
18
+ from .sparse_24_bitmask import *
19
+ from .sparse_bitmask import *
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (355 Bytes). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/base.cpython-311.pyc ADDED
Binary file (7.39 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/dense.cpython-311.pyc ADDED
Binary file (1.67 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_24_bitmask.cpython-311.pyc ADDED
Binary file (11.8 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/__pycache__/sparse_bitmask.cpython-311.pyc ADDED
Binary file (7.87 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/base.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ from typing import Dict, Generator, Optional, Set, Tuple
17
+
18
+ from compressed_tensors.compressors.base import BaseCompressor
19
+ from compressed_tensors.utils import get_nested_weight_mappings, merge_names
20
+ from safetensors import safe_open
21
+ from torch import Tensor
22
+ from tqdm import tqdm
23
+
24
+
25
+ __all__ = ["BaseSparseCompressor"]
26
+
27
+ _LOGGER: logging.Logger = logging.getLogger(__name__)
28
+
29
+
30
+ class BaseSparseCompressor(BaseCompressor):
31
+ """
32
+ Base class representing a sparse compression algorithm. Each child class should
33
+ implement compression_param_info, compress_weight and decompress_weight; child
34
+ classes should also define COMPRESSION_PARAM_NAMES.
35
+
36
+ Compressors support compressing/decompressing a full module state dict or a single
37
+ quantized PyTorch leaf module.
38
+
39
+ Model Load Lifecycle (run_compressed=False):
40
+ - ModelCompressor.decompress()
41
+ - apply_quantization_config()
42
+ - BaseSparseCompressor.decompress()
43
+ - BaseSparseCompressor.decompress_weight()
44
+
45
+ Model Save Lifecycle:
46
+ - ModelCompressor.compress()
47
+ - BaseSparseCompressor.compress()
48
+ - BaseSparseCompressor.compress_weight()
49
+
50
+ Module Lifecycle (run_compressed=True):
51
+ - apply_quantization_config()
52
+ - compressed_module = CompressedLinear(module)
53
+ - initialize_module_for_quantization()
54
+ - BaseSparseCompressor.compression_param_info()
55
+ - register_parameters()
56
+ - compressed_module.forward()
57
+ - compressed_module.decompress()
58
+
59
+
60
+ :param config: config specifying compression parameters
61
+ """
62
+
63
+ def compress(
64
+ self,
65
+ model_state: Dict[str, Tensor],
66
+ compression_targets: Optional[Set[str]] = None,
67
+ ) -> Dict[str, Tensor]:
68
+ """
69
+ Compresses a dense state dict using bitmask compression
70
+
71
+ :param model_state: state dict of uncompressed model
72
+ :param compression_targets: optional set of layer prefixes to compress,
73
+ otherwise compress all layers (for backwards compatibility)
74
+ :return: compressed state dict
75
+ """
76
+ compressed_dict = {}
77
+ _LOGGER.debug(
78
+ f"Compressing model with {len(model_state)} parameterized layers..."
79
+ )
80
+ for name, value in tqdm(model_state.items(), desc="Compressing model"):
81
+ if not self.should_compress(name, compression_targets):
82
+ compressed_dict[name] = value
83
+ continue
84
+ prefix = name
85
+ if prefix.endswith(".weight"):
86
+ prefix = prefix[: -(len(".weight"))]
87
+
88
+ compression_data = self.compress_weight(prefix, value)
89
+ for key in compression_data.keys():
90
+ if key in compressed_dict:
91
+ _LOGGER.warn(
92
+ f"Expected all compressed state_dict keys to be unique, but "
93
+ f"found an existing entry for {key}. The existing entry will "
94
+ "be replaced."
95
+ )
96
+
97
+ compressed_dict.update(compression_data)
98
+
99
+ return compressed_dict
100
+
101
+ def decompress(
102
+ self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
103
+ ) -> Generator[Tuple[str, Tensor], None, None]:
104
+ """
105
+ Reads a bitmask compressed state dict located
106
+ at path_to_model_or_tensors and returns a generator
107
+ for sequentially decompressing back to a dense state dict
108
+
109
+ :param model_path: path to compressed safetensors model (directory with
110
+ one or more safetensors files) or compressed tensors file
111
+ :param device: device to load decompressed weights onto
112
+ :return: iterator for generating decompressed weights
113
+ """
114
+ weight_mappings, ignored_params = get_nested_weight_mappings(
115
+ path_to_model_or_tensors,
116
+ self.COMPRESSION_PARAM_NAMES,
117
+ return_unmatched_params=True,
118
+ )
119
+ for weight_name in weight_mappings.keys():
120
+ weight_data = {}
121
+ for param_name, safe_path in weight_mappings[weight_name].items():
122
+ full_name = merge_names(weight_name, param_name)
123
+ with safe_open(safe_path, framework="pt", device=device) as f:
124
+ weight_data[param_name] = f.get_tensor(full_name)
125
+ decompressed = self.decompress_weight(weight_data)
126
+ yield merge_names(weight_name, "weight"), decompressed
127
+
128
+ for ignored_param_name, safe_path in ignored_params.items():
129
+ with safe_open(safe_path, framework="pt", device=device) as f:
130
+ value = f.get_tensor(ignored_param_name)
131
+ yield ignored_param_name, value
132
+
133
+ @staticmethod
134
+ def should_compress(name: str, expanded_targets: Optional[Set[str]] = None) -> bool:
135
+ """
136
+ Check if a parameter should be compressed.
137
+ Currently, this only returns True for weight parameters.
138
+
139
+ :param name: name of the parameter
140
+ :param expanded_targets: set of layer prefixes to compress
141
+ :return: whether or not the parameter should be compressed
142
+ """
143
+ if expanded_targets is None:
144
+ return name.endswith(".weight")
145
+
146
+ return (
147
+ name.endswith(".weight") and name[: -(len(".weight"))] in expanded_targets
148
+ )
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/dense.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Dict, Generator, Tuple
16
+
17
+ from compressed_tensors.compressors.base import BaseCompressor
18
+ from compressed_tensors.config import CompressionFormat
19
+ from torch import Tensor
20
+
21
+
22
+ @BaseCompressor.register(name=CompressionFormat.dense.value)
23
+ class DenseCompressor(BaseCompressor):
24
+ """
25
+ Identity compressor for dense models, returns the original state_dict
26
+ """
27
+
28
+ def compress(self, model_state: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:
29
+ return model_state
30
+
31
+ def decompress(
32
+ self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
33
+ ) -> Generator[Tuple[str, Tensor], None, None]:
34
+ return iter([])
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from dataclasses import dataclass
16
+ from typing import Dict, List, Tuple, Union
17
+
18
+ import torch
19
+ from compressed_tensors.compressors.base import BaseCompressor
20
+ from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor
21
+ from compressed_tensors.config import CompressionFormat, SparsityStructure
22
+ from compressed_tensors.quantization import FP8_DTYPE
23
+ from compressed_tensors.utils import merge_names, pack_bitmasks, unpack_bitmasks
24
+ from torch import Tensor
25
+
26
+
27
+ __all__ = [
28
+ "Sparse24BitMaskCompressor",
29
+ "Sparse24BitMaskTensor",
30
+ "sparse24_bitmask_compress",
31
+ "sparse24_bitmask_decompress",
32
+ "get_24_bytemasks",
33
+ ]
34
+
35
+
36
+ @BaseCompressor.register(name=CompressionFormat.sparse_24_bitmask.value)
37
+ class Sparse24BitMaskCompressor(BaseSparseCompressor):
38
+ """
39
+ Compression for sparse models using bitmasks. Non-zero weights are stored in a 2d
40
+ values tensor, with their locations stored in a 2d bitmask
41
+ """
42
+
43
+ COMPRESSION_PARAM_NAMES = [
44
+ "shape",
45
+ "compressed",
46
+ "bitmask",
47
+ ]
48
+
49
+ def compress_weight(self, name, value):
50
+ bitmask_tensor = Sparse24BitMaskTensor.from_dense(
51
+ value, self.config.sparsity_structure
52
+ )
53
+ bitmask_dict = bitmask_tensor.dict(name_prefix=name, device="cpu")
54
+ return bitmask_dict
55
+
56
+ def decompress_weight(self, weight_data):
57
+ data = Sparse24BitMaskTensor.from_compressed_data(**weight_data)
58
+ decompressed = data.decompress()
59
+ return decompressed
60
+
61
+
62
+ @dataclass
63
+ class Sparse24BitMaskTensor:
64
+ """
65
+ Owns compressions and decompression for a single 2:4 sparse
66
+ bitmask compressed tensor.
67
+
68
+ :param shape: shape of dense tensor
69
+ :param compressed: 2d tensor of non-zero values
70
+ :param bitmask: 2d bitmask of non-zero values
71
+ """
72
+
73
+ shape: List[int]
74
+ compressed: Tensor
75
+ bitmask: Tensor
76
+
77
+ @staticmethod
78
+ def from_dense(
79
+ tensor: Tensor,
80
+ sparsity_structure: Union[SparsityStructure, str] = SparsityStructure.TWO_FOUR,
81
+ ) -> "Sparse24BitMaskTensor":
82
+ """
83
+ :param tensor: dense tensor to compress
84
+ :return: instantiated compressed tensor
85
+ """
86
+ shape = list(tensor.shape)
87
+ compressed, bitmask = sparse24_bitmask_compress(
88
+ tensor.cpu(), sparsity_structure=sparsity_structure
89
+ )
90
+ return Sparse24BitMaskTensor(
91
+ shape=shape,
92
+ compressed=compressed,
93
+ bitmask=bitmask,
94
+ )
95
+
96
+ @staticmethod
97
+ def from_compressed_data(
98
+ shape: Union[List[int], Tensor], compressed: Tensor, bitmask: Tensor
99
+ ) -> "Sparse24BitMaskTensor":
100
+ """
101
+ :param shape: shape of the dense tensor (can be a list or a tensor)
102
+ :param compressed: 2d tensor of non-zero values
103
+ :param bitmask: 2d bitmask of non-zero values
104
+ :return: instantiated Sparse24BitMaskTensor
105
+ """
106
+ if isinstance(shape, list):
107
+ shape = torch.tensor(shape)
108
+ if isinstance(shape, torch.Tensor):
109
+ shape = shape.flatten().tolist()
110
+ return Sparse24BitMaskTensor(
111
+ shape=shape, compressed=compressed, bitmask=bitmask
112
+ )
113
+
114
+ def decompress(self) -> Tensor:
115
+ """
116
+ :return: reconstructed dense tensor
117
+ """
118
+ return sparse24_bitmask_decompress(self.compressed, self.bitmask, self.shape)
119
+
120
+ def curr_memory_size_bytes(self) -> int:
121
+ """
122
+ :return: size in bytes required to store compressed tensor on disk
123
+ """
124
+
125
+ def sizeof_tensor(a: Tensor) -> int:
126
+ return a.element_size() * a.nelement()
127
+
128
+ return sizeof_tensor(self.compressed) + sizeof_tensor(self.bitmask)
129
+
130
+ def dict(self, name_prefix: str, device: str = "cpu") -> Dict[str, Tensor]:
131
+ """
132
+ :param name_prefix: name of original tensor to store compressed weight as
133
+ :return: dict of compressed data for the stored weight
134
+ """
135
+ if name_prefix.endswith(".weight"):
136
+ name_prefix = name_prefix[: -len(".weight")]
137
+ return {
138
+ merge_names(name_prefix, "shape"): torch.tensor(
139
+ self.shape, device=device
140
+ ).reshape(-1, 1),
141
+ merge_names(name_prefix, "compressed"): self.compressed.to(device),
142
+ merge_names(name_prefix, "bitmask"): self.bitmask.to(device),
143
+ }
144
+
145
+ def __repr__(self) -> str:
146
+ return f"BitMaskTensor(shape={self.shape}, compressed=True)"
147
+
148
+
149
+ def sparse24_bitmask_compress(
150
+ tensor: Tensor,
151
+ sparsity_structure: Union[SparsityStructure, str] = SparsityStructure.TWO_FOUR,
152
+ ) -> Tuple[Tensor, Tensor, Tensor]:
153
+ """
154
+ Compresses a dense tensor using bitmask compression
155
+
156
+ :param tensor: dense 2D tensor to compress
157
+ :param sparsity_structure: structure of sparsity in the tensor, defaults
158
+ to unstructured, can also be set to `2:4`
159
+ :return: tuple of compressed data representing tensor
160
+ """
161
+ assert len(tensor.shape) == 2, "Only 2D tensors are supported"
162
+ assert (
163
+ SparsityStructure(sparsity_structure) == SparsityStructure.TWO_FOUR
164
+ ), "Only 2:4 sparsity is supported"
165
+
166
+ bytemasks = get_24_bytemasks(tensor=tensor)
167
+
168
+ if tensor.dtype == FP8_DTYPE:
169
+ # acces raw bytes of the tensor
170
+ tensor_view = tensor.view(torch.int8)
171
+ values = tensor_view[bytemasks]
172
+ values = values.view(FP8_DTYPE)
173
+ else:
174
+ values = tensor[bytemasks]
175
+
176
+ num_rows, num_cols = tensor.shape
177
+ compressed_values = values.reshape(num_rows, num_cols // 2)
178
+ bitmasks_packed = pack_bitmasks(bytemasks)
179
+ return compressed_values, bitmasks_packed
180
+
181
+
182
+ def sparse24_bitmask_decompress(
183
+ values: Tensor, bitmasks: Tensor, original_shape: torch.Size
184
+ ) -> Tensor:
185
+ """
186
+ Reconstructs a dense tensor from a compressed one
187
+
188
+ :param values: 1d tensor of non-zero values
189
+ :param bitmasks: 2d int8 tensor flagging locations of non-zero values in the
190
+ tensors original shape
191
+ :param original_shape: shape of the dense tensor
192
+ :return: decompressed dense tensor
193
+ """
194
+ bytemasks_unpacked = unpack_bitmasks(bitmasks, original_shape)
195
+
196
+ decompressed_tensor = torch.zeros(original_shape, dtype=values.dtype)
197
+ decompressed_tensor = decompressed_tensor.to(values.device)
198
+ values = values.flatten()
199
+ if decompressed_tensor.dtype == FP8_DTYPE:
200
+ decompressed_tensor[bytemasks_unpacked] = values
201
+ decompressed_tensor = decompressed_tensor.cuda()
202
+ else:
203
+ decompressed_tensor[bytemasks_unpacked] = values
204
+ return decompressed_tensor
205
+
206
+
207
+ def get_24_bytemasks(tensor):
208
+ """
209
+ Generate a 2:4 sparsity mask for the given tensor.
210
+
211
+ This function creates a mask where exactly 2 out of every 4 elements are
212
+ preserved based on their magnitudes. The preserved elements are the ones
213
+ with the highest absolute values in each group of 4 elements.
214
+
215
+ :param tensor: The input tensor for which the 2:4 sparsity mask is to be created.
216
+ The tensor can be of any shape but its total number of elements
217
+ must be a multiple of 4.
218
+ :return: A boolean tensor of the same shape as the input tensor, where `True`
219
+ indicates the preserved elements and `False` indicates the pruned elements.
220
+ :raises ValueError: If the total number of elements in the tensor is not a
221
+ multiple of 4.
222
+ """
223
+ original_dtype = tensor.dtype
224
+ if tensor.dtype == FP8_DTYPE:
225
+ tensor = tensor.view(torch.int8)
226
+ original_shape = tensor.shape
227
+ num_elements = tensor.numel()
228
+
229
+ if num_elements % 4 != 0:
230
+ raise ValueError("Tensor size must be a multiple of 4 for TWO_FOUR sparsity")
231
+
232
+ reshaped_tensor = tensor.view(-1, 4)
233
+ abs_tensor = reshaped_tensor.abs()
234
+ topk_indices = abs_tensor.topk(2, dim=1).indices
235
+ mask = torch.zeros_like(reshaped_tensor, dtype=torch.bool)
236
+ mask.scatter_(1, topk_indices, True)
237
+ mask = mask.view(original_shape)
238
+ tensor = tensor.view(original_dtype)
239
+
240
+ return mask
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Dict, List, Tuple, Union
16
+
17
+ import torch
18
+ from compressed_tensors.compressors.base import BaseCompressor
19
+ from compressed_tensors.compressors.sparse_compressors.base import BaseSparseCompressor
20
+ from compressed_tensors.config import CompressionFormat
21
+ from compressed_tensors.quantization import FP8_DTYPE
22
+ from compressed_tensors.utils import merge_names, pack_bitmasks, unpack_bitmasks
23
+ from torch import Tensor
24
+
25
+
26
+ __all__ = [
27
+ "BitmaskCompressor",
28
+ "BitmaskTensor",
29
+ "bitmask_compress",
30
+ "bitmask_decompress",
31
+ ]
32
+
33
+
34
+ @BaseCompressor.register(name=CompressionFormat.sparse_bitmask.value)
35
+ class BitmaskCompressor(BaseSparseCompressor):
36
+ """
37
+ Compression for sparse models using bitmasks. Non-zero weights are stored in a 1d
38
+ values tensor, with their locations stored in a 2d bitmask
39
+ """
40
+
41
+ COMPRESSION_PARAM_NAMES = ["shape", "compressed", "bitmask", "row_offsets"]
42
+
43
+ def compress_weight(self, name, value):
44
+ bitmask_tensor = BitmaskTensor.from_dense(value)
45
+ bitmask_dict = bitmask_tensor.dict(name_prefix=name, device="cpu")
46
+ return bitmask_dict
47
+
48
+ def decompress_weight(self, weight_data):
49
+ data = BitmaskTensor(**weight_data)
50
+ decompressed = data.decompress()
51
+ return decompressed
52
+
53
+
54
+ class BitmaskTensor:
55
+ """
56
+ Owns compressions and decompression for a single bitmask compressed tensor.
57
+ Adapted from: https://github.com/mgoin/torch_bitmask/tree/main
58
+
59
+ :param shape: shape of dense tensor
60
+ :compressed: flat tensor of non-zero values
61
+ :bitmask: 2d bitmask of non-zero values
62
+ :row_offsets: flat tensor indicating what index in values each dense row starts at
63
+ """
64
+
65
+ def __init__(
66
+ self,
67
+ shape: Union[torch.Size, List],
68
+ compressed: Tensor,
69
+ bitmask: Tensor,
70
+ row_offsets: Tensor,
71
+ ):
72
+ self.shape = list(shape)
73
+ self.compressed = compressed
74
+ self.bitmask = bitmask
75
+ self.row_offsets = row_offsets
76
+
77
+ @staticmethod
78
+ def from_dense(tensor: Tensor) -> "BitmaskTensor":
79
+ """
80
+ :param tensor: dense tensor to compress
81
+ :return: instantiated compressed tensor
82
+ """
83
+ shape = tensor.shape
84
+ compressed, bitmask, row_offsets = bitmask_compress(tensor.cpu())
85
+ return BitmaskTensor(
86
+ shape=shape, compressed=compressed, bitmask=bitmask, row_offsets=row_offsets
87
+ )
88
+
89
+ def decompress(self) -> Tensor:
90
+ """
91
+ :return: reconstructed dense tensor
92
+ """
93
+ return bitmask_decompress(self.compressed, self.bitmask, self.shape)
94
+
95
+ def curr_memory_size_bytes(self):
96
+ """
97
+ :return: size in bytes required to store compressed tensor on disk
98
+ """
99
+
100
+ def sizeof_tensor(a):
101
+ return a.element_size() * a.nelement()
102
+
103
+ return (
104
+ sizeof_tensor(self.compressed)
105
+ + sizeof_tensor(self.bitmask)
106
+ + sizeof_tensor(self.row_offsets)
107
+ )
108
+
109
+ def dict(self, name_prefix: str, device: str = "cpu") -> Dict[str, Tensor]:
110
+ """
111
+ :name_prefix: name of original tensor to store compressed weight as
112
+ :return: dict of compressed data for the stored weight
113
+ """
114
+ return {
115
+ merge_names(name_prefix, "shape"): torch.tensor(self.shape, device=device),
116
+ merge_names(name_prefix, "compressed"): self.compressed.to(device),
117
+ merge_names(name_prefix, "bitmask"): self.bitmask.to(device),
118
+ merge_names(name_prefix, "row_offsets"): self.row_offsets.to(device),
119
+ }
120
+
121
+ def __repr__(self):
122
+ return f"BitmaskTensor(shape={self.shape}, compressed=True)"
123
+
124
+
125
+ def bitmask_compress(tensor: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
126
+ """
127
+ Compresses a dense tensor using bitmask compression
128
+
129
+ :param tensor: dense tensor to compress
130
+ :return: tuple of compressed data representing tensor
131
+ """
132
+ bytemasks = tensor != 0
133
+ row_counts = bytemasks.sum(dim=-1)
134
+ row_offsets = torch.cumsum(row_counts, 0) - row_counts
135
+ if tensor.dtype == FP8_DTYPE:
136
+ # acces raw bytes of the tensor
137
+ tensor_view = tensor.view(torch.int8)
138
+ values = tensor_view[bytemasks]
139
+ values = values.view(FP8_DTYPE)
140
+ else:
141
+ values = tensor[bytemasks]
142
+ bitmasks_packed = pack_bitmasks(bytemasks)
143
+ return values, bitmasks_packed, row_offsets
144
+
145
+
146
+ def bitmask_decompress(
147
+ values: Tensor, bitmasks: Tensor, original_shape: torch.Size
148
+ ) -> Tensor:
149
+ """
150
+ Reconstructs a dense tensor from a compressed one
151
+
152
+ :param values: 1d tensor of non-zero values
153
+ :param bitmasks: 2d int8 tensor flagging locations of non-zero values in the
154
+ tensors original shape
155
+ :param original_shape: shape of the dense tensor
156
+ :return: decompressed dense tensor
157
+ """
158
+ bytemasks_unpacked = unpack_bitmasks(bitmasks, original_shape)
159
+
160
+ decompressed_tensor = torch.zeros(original_shape, dtype=values.dtype)
161
+ decompressed_tensor[bytemasks_unpacked] = values
162
+
163
+ return decompressed_tensor
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # flake8: noqa
15
+
16
+ from .marlin_24 import Marlin24Compressor
.venv/lib/python3.11/site-packages/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ from typing import Dict, Generator, Tuple
17
+
18
+ import numpy as np
19
+ import torch
20
+ from compressed_tensors.compressors.base import BaseCompressor
21
+ from compressed_tensors.config import CompressionFormat
22
+ from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
23
+ from compressed_tensors.quantization.lifecycle.forward import quantize
24
+ from compressed_tensors.utils import (
25
+ get_permutations_24,
26
+ is_quantization_param,
27
+ merge_names,
28
+ sparse_semi_structured_from_dense_cutlass,
29
+ tensor_follows_mask_structure,
30
+ )
31
+ from torch import Tensor
32
+ from tqdm import tqdm
33
+
34
+
35
+ _LOGGER: logging.Logger = logging.getLogger(__name__)
36
+
37
+
38
+ @BaseCompressor.register(name=CompressionFormat.marlin_24.value)
39
+ class Marlin24Compressor(BaseCompressor):
40
+ """
41
+ Compresses a quantized model with 2:4 sparsity structure for inference with the
42
+ Marlin24 kernel. Decompression is not implemented for this compressor.
43
+ """
44
+
45
+ COMPRESSION_PARAM_NAMES = ["weight_packed", "scale_packed", "meta"]
46
+
47
+ @staticmethod
48
+ def validate_quant_compatability(
49
+ model_quant_args: Dict[str, QuantizationArgs]
50
+ ) -> bool:
51
+ """
52
+ Checks if every quantized module in the model is compatible with Marlin24
53
+ compression. Quantization must be channel or group strategy with group_size
54
+ of 128. Only symmetric quantization is supported
55
+
56
+ :param model_quant_args: dictionary of mapping module names to their
57
+ quantization configuration
58
+ :return: True if all modules are compatible with Marlin24 compression, raises
59
+ a ValueError otherwise
60
+ """
61
+ for name, quant_args in model_quant_args.items():
62
+ strategy = quant_args.strategy
63
+ group_size = quant_args.group_size
64
+ symmetric = quant_args.symmetric
65
+ if (
66
+ strategy is not QuantizationStrategy.GROUP.value
67
+ and strategy is not QuantizationStrategy.CHANNEL.value
68
+ ):
69
+ raise ValueError(
70
+ f"Marlin24 Compressor is only valid for group and channel "
71
+ f"quantization strategies, got {strategy} in {name}"
72
+ )
73
+
74
+ if group_size is not None and group_size != 128:
75
+ raise ValueError(
76
+ f"Marlin24 Compressor is only valid for group size 128, "
77
+ f"got {group_size} in {name}"
78
+ )
79
+
80
+ if not symmetric:
81
+ raise ValueError(
82
+ f"Marlin24 Compressor is only valid for symmetric quantzation, "
83
+ f"got symmetric={symmetric} in {name}"
84
+ )
85
+
86
+ return True
87
+
88
+ @staticmethod
89
+ def validate_sparsity_structure(name: str, weight: Tensor) -> bool:
90
+ """
91
+ Checks if a tensor fits the required 2:4 sparsity structure
92
+
93
+ :param name: name of the tensor to check
94
+ :param weight: tensor to check for sparsity structure
95
+ :return: True if all rows match the 2:4 sparsity structure, raises
96
+ ValueError otherwise
97
+ """
98
+
99
+ if not tensor_follows_mask_structure(weight):
100
+ raise ValueError(
101
+ "Marlin24 Compressor is only compatible with weights that have "
102
+ f"a 2:4 sparsity structure. Found segments in {name} "
103
+ "that do not match the expected structure."
104
+ )
105
+
106
+ return True
107
+
108
+ def compress(
109
+ self,
110
+ model_state: Dict[str, Tensor],
111
+ names_to_scheme: Dict[str, QuantizationArgs],
112
+ **kwargs,
113
+ ) -> Dict[str, Tensor]:
114
+ """
115
+ Compresses a quantized state_dict with 2:4 sparsity structure for inference
116
+ with the Marlin24 kernel
117
+
118
+ :param model_state: state dict of uncompressed model
119
+ :param names_to_scheme: quantization args for each quantized weight, needed for
120
+ quantize function to calculate bit depth
121
+ :return: compressed state dict
122
+ """
123
+ self.validate_quant_compatability(names_to_scheme)
124
+
125
+ compressed_dict = {}
126
+ weight_suffix = ".weight"
127
+ _LOGGER.debug(
128
+ f"Compressing model with {len(model_state)} parameterized layers..."
129
+ )
130
+
131
+ for name, value in tqdm(model_state.items(), desc="Compressing model"):
132
+ if name.endswith(weight_suffix):
133
+ prefix = name[: -(len(weight_suffix))]
134
+ scale = model_state.get(merge_names(prefix, "weight_scale"), None)
135
+ zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
136
+ if scale is not None: # weight is quantized, compress it
137
+
138
+ # Marlin24 kernel requires float16 inputs
139
+ scale = scale.to(torch.float16)
140
+ value = value.to(torch.float16)
141
+
142
+ # quantize weight, keeping it as a float16 for now
143
+ quant_args = names_to_scheme[prefix]
144
+ value = quantize(
145
+ x=value, scale=scale, zero_point=zp, args=quant_args
146
+ )
147
+
148
+ # compress based on sparsity structure
149
+ self.validate_sparsity_structure(prefix, value)
150
+ value, meta = compress_weight_24(value)
151
+ meta = meta.cpu()
152
+
153
+ # Marlin24 kernel expects input dim first
154
+ value = value.t().contiguous().cpu()
155
+ scale = scale.t().contiguous().cpu()
156
+ og_weight_shape = value.shape
157
+
158
+ # Marlin24 kernel expects unsigned values, shift zero-point
159
+ value += (1 << quant_args.num_bits) // 2
160
+
161
+ # pack quantized weight and scale
162
+ value = pack_weight_24(value, quant_args)
163
+ packed_scale = pack_scales_24(scale, quant_args, og_weight_shape)
164
+ meta = meta.resize_(meta.shape[1] // 2, meta.shape[0] * 2)
165
+
166
+ # save compressed values
167
+ compressed_dict[merge_names(prefix, "scale_packed")] = packed_scale
168
+ compressed_dict[merge_names(prefix, "weight_packed")] = value
169
+ compressed_dict[merge_names(prefix, "meta")] = meta
170
+ continue
171
+
172
+ if not is_quantization_param(name):
173
+ # export unquantized parameters without modifying
174
+ compressed_dict[name] = value.to("cpu")
175
+
176
+ return compressed_dict
177
+
178
+ def decompress(
179
+ self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
180
+ ) -> Generator[Tuple[str, Tensor], None, None]:
181
+ raise NotImplementedError(
182
+ "Decompression is not implemented for the Marlin24 Compressor."
183
+ )
184
+
185
+
186
+ def compress_weight_24(weight: Tensor):
187
+ weight = weight.contiguous()
188
+ w_comp, meta = sparse_semi_structured_from_dense_cutlass(weight)
189
+ w_comp = w_comp.contiguous()
190
+ return w_comp, meta
191
+
192
+
193
+ def marlin_permute_weights(q_w, size_k, size_n, perm, tile):
194
+ assert q_w.shape == (size_k, size_n)
195
+ assert size_k % tile == 0, f"size_k = {size_k}, tile = {tile}"
196
+ assert size_n % tile == 0, f"size_k = {size_n}, tile = {tile}"
197
+
198
+ # Permute weights to 16x64 marlin tiles
199
+ q_w = q_w.reshape((size_k // tile, tile, size_n // tile, tile))
200
+ q_w = q_w.permute((0, 2, 1, 3))
201
+ q_w = q_w.reshape((size_k // tile, size_n * tile))
202
+
203
+ q_w = q_w.reshape((-1, perm.numel()))[:, perm].reshape(q_w.shape)
204
+
205
+ return q_w
206
+
207
+
208
+ def pack_weight_24(
209
+ weight: Tensor,
210
+ quantization_args: QuantizationArgs,
211
+ tile: int = 16,
212
+ ):
213
+ size_k = weight.shape[0]
214
+ size_n = weight.shape[1]
215
+ num_bits = quantization_args.num_bits
216
+ pack_factor = 32 // num_bits
217
+
218
+ # Reshuffle to marlin_24 format
219
+ perm, _, _ = get_permutations_24(num_bits)
220
+ q_w = marlin_permute_weights(weight, size_k, size_n, perm, tile)
221
+
222
+ q_w = q_w.cpu().numpy().astype(np.uint32)
223
+
224
+ q_packed = np.zeros((q_w.shape[0], q_w.shape[1] // pack_factor), dtype=np.uint32)
225
+ for i in range(pack_factor):
226
+ q_packed |= q_w[:, i::pack_factor] << num_bits * i
227
+
228
+ q_packed = torch.from_numpy(q_packed.astype(np.int32))
229
+
230
+ return q_packed
231
+
232
+
233
+ def pack_scales_24(scales, quantization_args, w_shape):
234
+ size_k = w_shape[0]
235
+ size_n = w_shape[1]
236
+ num_bits = quantization_args.num_bits
237
+
238
+ _, scale_perm_2_4, scale_perm_single_2_4 = get_permutations_24(num_bits)
239
+
240
+ if (
241
+ quantization_args.strategy == QuantizationStrategy.GROUP
242
+ and quantization_args.group_size < size_k
243
+ ):
244
+ scales = scales.reshape((-1, len(scale_perm_2_4)))[:, scale_perm_2_4]
245
+ else: # channelwise
246
+ scales = scales.reshape((-1, len(scale_perm_single_2_4)))[
247
+ :, scale_perm_single_2_4
248
+ ]
249
+ scales = scales.reshape((-1, size_n)).contiguous()
250
+
251
+ return scales
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__init__.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # flake8: noqa
16
+ # isort: skip_file
17
+
18
+ from .quant_args import *
19
+ from .quant_config import *
20
+ from .quant_scheme import *
21
+ from .lifecycle import *
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (340 Bytes). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_args.cpython-311.pyc ADDED
Binary file (10.6 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_config.cpython-311.pyc ADDED
Binary file (11.2 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/__pycache__/quant_scheme.cpython-311.pyc ADDED
Binary file (5.46 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__init__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # flake8: noqa
16
+ # isort: skip_file
17
+
18
+ from .forward import *
19
+ from .initialize import *
20
+ from .compressed import *
21
+ from .apply import *
22
+ from .helpers import *
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (368 Bytes). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/apply.cpython-311.pyc ADDED
Binary file (17.9 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/compressed.cpython-311.pyc ADDED
Binary file (1.92 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/forward.cpython-311.pyc ADDED
Binary file (13.3 kB). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/helpers.cpython-311.pyc ADDED
Binary file (764 Bytes). View file
 
.venv/lib/python3.11/site-packages/compressed_tensors/quantization/lifecycle/__pycache__/initialize.cpython-311.pyc ADDED
Binary file (7.93 kB). View file