danieldk HF Staff commited on
Commit
a8d29bb
·
1 Parent(s): 3631a93

Remove builds incompatible with kernels >= 0.14

Browse files
build/torch29-metal-aarch64-darwin/__init__.py DELETED
@@ -1,165 +0,0 @@
1
- from typing import Optional, Tuple
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
- # Quant type constants (match bitsandbytes DataType_t)
8
- FP4 = 1
9
- NF4 = 2
10
-
11
-
12
- def quantize_4bit(
13
- input: torch.Tensor,
14
- blocksize: int = 64,
15
- quant_type: int = NF4,
16
- ) -> Tuple[torch.Tensor, torch.Tensor]:
17
- """Blockwise 4-bit quantization using NF4 or FP4 codebook.
18
-
19
- Args:
20
- input: Input tensor on MPS device (float16, bfloat16, or float32).
21
- blocksize: Number of elements per quantization block (64 or 128).
22
- quant_type: FP4 (1) or NF4 (2).
23
-
24
- Returns:
25
- Tuple of (packed, absmax):
26
- packed: uint8 tensor of packed 4-bit values [numel/2].
27
- absmax: float32 tensor of per-block max absolute values.
28
- """
29
- return ops.bnb_quantize_4bit(input, blocksize, quant_type)
30
-
31
-
32
- def dequantize_4bit(
33
- packed: torch.Tensor,
34
- absmax: torch.Tensor,
35
- blocksize: int = 64,
36
- quant_type: int = NF4,
37
- numel: int = -1,
38
- output_dtype: torch.dtype = torch.float16,
39
- ) -> torch.Tensor:
40
- """Blockwise 4-bit dequantization using NF4 or FP4 codebook.
41
-
42
- Args:
43
- packed: uint8 tensor of packed 4-bit values.
44
- absmax: float32 tensor of per-block max absolute values.
45
- blocksize: Number of elements per quantization block (64 or 128).
46
- quant_type: FP4 (1) or NF4 (2).
47
- numel: Number of elements in the original tensor.
48
- If -1, inferred as packed.numel() * 2.
49
- output_dtype: Output scalar type.
50
-
51
- Returns:
52
- Dequantized tensor.
53
- """
54
- if numel < 0:
55
- numel = packed.numel() * 2
56
- return ops.bnb_dequantize_4bit(
57
- packed, absmax, blocksize, quant_type, numel, output_dtype
58
- )
59
-
60
-
61
- def gemv_4bit(
62
- x: torch.Tensor,
63
- w: torch.Tensor,
64
- absmax: torch.Tensor,
65
- output_features: int,
66
- blocksize: int = 64,
67
- quant_type: int = NF4,
68
- ) -> torch.Tensor:
69
- """Fused matrix-vector multiply with 4-bit quantized weights.
70
-
71
- Computes y = dequant(W) @ x, where W is blockwise NF4/FP4 quantized.
72
-
73
- Args:
74
- x: Input vector [..., K] on MPS device.
75
- w: Packed weight matrix [N, K/2] (uint8) on MPS device.
76
- absmax: Per-block scales [N, ceil(K/blocksize)] (float32).
77
- output_features: Number of output features (N).
78
- blocksize: Quantization block size (64 or 128).
79
- quant_type: FP4 (1) or NF4 (2).
80
-
81
- Returns:
82
- Output tensor [..., N].
83
- """
84
- return ops.bnb_gemv_4bit(x, w, absmax, blocksize, quant_type, output_features)
85
-
86
-
87
- def gemm_4bit(
88
- x: torch.Tensor,
89
- w: torch.Tensor,
90
- absmax: torch.Tensor,
91
- output_features: int,
92
- blocksize: int = 64,
93
- quant_type: int = NF4,
94
- ) -> torch.Tensor:
95
- """Fused matrix-matrix multiply with 4-bit quantized transposed weights.
96
-
97
- Computes Y = X @ dequant(W).T, where W is blockwise NF4/FP4 quantized.
98
-
99
- Args:
100
- x: Input matrix [..., M, K] on MPS device.
101
- w: Packed weight matrix [N, K/2] (uint8) on MPS device.
102
- absmax: Per-block scales [N, ceil(K/blocksize)] (float32).
103
- output_features: Number of output features (N).
104
- blocksize: Quantization block size (64 or 128).
105
- quant_type: FP4 (1) or NF4 (2).
106
-
107
- Returns:
108
- Output tensor [..., M, N].
109
- """
110
- return ops.bnb_gemm_4bit(x, w, absmax, blocksize, quant_type, output_features)
111
-
112
-
113
- def linear_4bit(
114
- x: torch.Tensor,
115
- w: torch.Tensor,
116
- absmax: torch.Tensor,
117
- output_features: int,
118
- blocksize: int = 64,
119
- quant_type: int = NF4,
120
- bias: Optional[torch.Tensor] = None,
121
- ) -> torch.Tensor:
122
- """4-bit quantized linear layer (auto-selects GEMV or GEMM).
123
-
124
- Args:
125
- x: Input tensor on MPS device.
126
- w: Packed weight [N, K/2] (uint8).
127
- absmax: Scales [N, ceil(K/blocksize)] (float32).
128
- output_features: N.
129
- blocksize: 64 or 128.
130
- quant_type: FP4 (1) or NF4 (2).
131
- bias: Optional bias [N].
132
-
133
- Returns:
134
- Output tensor.
135
- """
136
- input_1d = x.dim() == 1
137
- if input_1d or (x.dim() >= 2 and x.size(-2) == 1):
138
- x_flat = x.view(x.size(-1)) if input_1d else x.squeeze(-2)
139
- y = gemv_4bit(
140
- x_flat,
141
- w,
142
- absmax,
143
- output_features,
144
- blocksize,
145
- quant_type,
146
- )
147
- if input_1d:
148
- y = y.squeeze(0)
149
- elif x.dim() >= 2:
150
- y = y.unsqueeze(-2)
151
- else:
152
- y = gemm_4bit(x, w, absmax, output_features, blocksize, quant_type)
153
-
154
- if bias is not None:
155
- y = y + bias
156
-
157
- return y
158
-
159
- __all__ = [
160
- "quantize_4bit",
161
- "dequantize_4bit",
162
- "gemv_4bit",
163
- "gemm_4bit",
164
- "linear_4bit",
165
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch29-metal-aarch64-darwin/_bitsandbytes_mps_metal_42e0dd1.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1376c17866d5c32339a331a725fbff002041d94d825ccf97795420788cc74f4
3
- size 844504
 
 
 
 
build/torch29-metal-aarch64-darwin/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _bitsandbytes_mps_metal_42e0dd1
3
- ops = torch.ops._bitsandbytes_mps_metal_42e0dd1
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_bitsandbytes_mps_metal_42e0dd1::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch29-metal-aarch64-darwin/bitsandbytes_mps/__init__.py DELETED
@@ -1,26 +0,0 @@
1
- import ctypes
2
- import sys
3
-
4
- import importlib
5
- from pathlib import Path
6
- from types import ModuleType
7
-
8
- def _import_from_path(file_path: Path) -> ModuleType:
9
- # We cannot use the module name as-is, after adding it to `sys.modules`,
10
- # it would also be used for other imports. So, we make a module name that
11
- # depends on the path for it to be unique using the hex-encoded hash of
12
- # the path.
13
- path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
14
- module_name = path_hash
15
- spec = importlib.util.spec_from_file_location(module_name, file_path)
16
- if spec is None:
17
- raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
18
- module = importlib.util.module_from_spec(spec)
19
- if module is None:
20
- raise ImportError(f"Cannot load module {module_name} from spec")
21
- sys.modules[module_name] = module
22
- spec.loader.exec_module(module) # type: ignore
23
- return module
24
-
25
-
26
- globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch29-metal-aarch64-darwin/metadata.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "version": 1,
3
- "python-depends": []
4
- }