Spaces:
Sleeping
Sleeping
Commit
Β·
a49281c
1
Parent(s):
011c926
Fix BaseQuantizationConfig import: add fallback approaches
Browse files- Try BaseQuantizationConfig import first
- Fallback to dict-based config_groups if import fails
- Fallback to default AWQModifier() if config creation fails
- Handles different compressed_tensors API versions
- quantize_to_awq_colab.ipynb +41 -17
quantize_to_awq_colab.ipynb
CHANGED
|
@@ -254,26 +254,50 @@
|
|
| 254 |
" print(f\" β Starting quantization with LLM Compressor...\")\n",
|
| 255 |
" print(f\" β This may take 30-60 minutes depending on model size...\")\n",
|
| 256 |
" \n",
|
| 257 |
-
" # AWQModifier
|
| 258 |
-
" #
|
| 259 |
-
" from compressed_tensors.quantization import QuantizationConfig, BaseQuantizationConfig\n",
|
| 260 |
-
" \n",
|
| 261 |
" print(f\" β Creating quantization config for 4-bit AWQ...\")\n",
|
| 262 |
-
"
|
| 263 |
-
"
|
| 264 |
-
"
|
| 265 |
-
"
|
| 266 |
-
"
|
| 267 |
-
"
|
| 268 |
-
"
|
| 269 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
" )\n",
|
| 271 |
-
"
|
| 272 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
" \n",
|
| 274 |
-
"
|
| 275 |
-
"
|
| 276 |
-
"
|
|
|
|
| 277 |
" \n",
|
| 278 |
" # Call oneshot with the modifier\n",
|
| 279 |
" print(f\" β Starting quantization process...\")\n",
|
|
|
|
| 254 |
" print(f\" β Starting quantization with LLM Compressor...\")\n",
|
| 255 |
" print(f\" β This may take 30-60 minutes depending on model size...\")\n",
|
| 256 |
" \n",
|
| 257 |
+
" # AWQModifier quantization config\n",
|
| 258 |
+
" # Try multiple approaches to create the config\n",
|
|
|
|
|
|
|
| 259 |
" print(f\" β Creating quantization config for 4-bit AWQ...\")\n",
|
| 260 |
+
" \n",
|
| 261 |
+
" try:\n",
|
| 262 |
+
" # Approach 1: Try importing BaseQuantizationConfig\n",
|
| 263 |
+
" from compressed_tensors.quantization import QuantizationConfig, BaseQuantizationConfig\n",
|
| 264 |
+
" quant_config = QuantizationConfig(\n",
|
| 265 |
+
" config_groups={\n",
|
| 266 |
+
" \"default\": BaseQuantizationConfig(\n",
|
| 267 |
+
" num_bits=4,\n",
|
| 268 |
+
" group_size=128,\n",
|
| 269 |
+
" zero_point=True\n",
|
| 270 |
+
" )\n",
|
| 271 |
+
" }\n",
|
| 272 |
+
" )\n",
|
| 273 |
+
" print(f\" β
Created QuantizationConfig with BaseQuantizationConfig\")\n",
|
| 274 |
+
" except ImportError:\n",
|
| 275 |
+
" try:\n",
|
| 276 |
+
" # Approach 2: Try using QuantizationConfig with dict directly\n",
|
| 277 |
+
" from compressed_tensors.quantization import QuantizationConfig\n",
|
| 278 |
+
" quant_config = QuantizationConfig(\n",
|
| 279 |
+
" config_groups={\n",
|
| 280 |
+
" \"default\": {\n",
|
| 281 |
+
" \"num_bits\": 4,\n",
|
| 282 |
+
" \"group_size\": 128,\n",
|
| 283 |
+
" \"zero_point\": True\n",
|
| 284 |
+
" }\n",
|
| 285 |
+
" }\n",
|
| 286 |
" )\n",
|
| 287 |
+
" print(f\" β
Created QuantizationConfig with dict config\")\n",
|
| 288 |
+
" except Exception as e:\n",
|
| 289 |
+
" # Approach 3: Try AWQModifier without quantization_config (use defaults)\n",
|
| 290 |
+
" print(f\" β οΈ Could not create QuantizationConfig: {e}\")\n",
|
| 291 |
+
" print(f\" β Trying AWQModifier with default settings...\")\n",
|
| 292 |
+
" modifiers = [AWQModifier()]\n",
|
| 293 |
+
" print(f\" β
AWQModifier created with default settings\")\n",
|
| 294 |
+
" # Skip quantization_config creation and use default AWQModifier\n",
|
| 295 |
+
" quant_config = None\n",
|
| 296 |
" \n",
|
| 297 |
+
" if quant_config is not None:\n",
|
| 298 |
+
" print(f\" β Creating AWQModifier with quantization config...\")\n",
|
| 299 |
+
" modifiers = [AWQModifier(quantization_config=quant_config)]\n",
|
| 300 |
+
" print(f\" β
AWQModifier created successfully\")\n",
|
| 301 |
" \n",
|
| 302 |
" # Call oneshot with the modifier\n",
|
| 303 |
" print(f\" β Starting quantization process...\")\n",
|