Spaces:
Sleeping
Sleeping
Commit
Β·
3f08592
1
Parent(s):
f3114ba
Fix quantization_config structure: use correct AWQ format
Browse files- Use dict-based config with config_groups.group_0 structure
- Include targets, weights (num_bits, group_size, zero_point), etc.
- Remove fallback approaches - use correct structure directly
- Fixes ValidationError for AWQModifier
- quantize_to_awq_colab.ipynb +30 -47
quantize_to_awq_colab.ipynb
CHANGED
|
@@ -255,57 +255,40 @@
|
|
| 255 |
" print(f\" β This may take 30-60 minutes depending on model size...\")\n",
|
| 256 |
" \n",
|
| 257 |
" # AWQModifier quantization config\n",
|
| 258 |
-
" #
|
| 259 |
" print(f\" β Creating quantization config for 4-bit AWQ...\")\n",
|
| 260 |
" \n",
|
| 261 |
-
"
|
| 262 |
-
"
|
| 263 |
-
" \n",
|
| 264 |
-
"
|
| 265 |
-
"
|
| 266 |
-
"
|
| 267 |
-
"
|
| 268 |
-
"
|
| 269 |
-
"
|
| 270 |
-
"
|
| 271 |
-
"
|
| 272 |
-
"
|
| 273 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
" }\n",
|
| 275 |
-
"
|
| 276 |
-
"
|
| 277 |
-
"
|
| 278 |
-
"
|
| 279 |
-
"
|
| 280 |
-
"
|
| 281 |
-
" quant_config = QuantizationConfig(\n",
|
| 282 |
-
" config_groups={\n",
|
| 283 |
-
" \"default\": {\n",
|
| 284 |
-
" \"num_bits\": 4,\n",
|
| 285 |
-
" \"group_size\": 128,\n",
|
| 286 |
-
" \"zero_point\": True\n",
|
| 287 |
-
" }\n",
|
| 288 |
-
" }\n",
|
| 289 |
-
" )\n",
|
| 290 |
-
" print(f\" β
Created QuantizationConfig with dict config\")\n",
|
| 291 |
-
" except Exception as e:\n",
|
| 292 |
-
" # Approach 3: Try AWQModifier without quantization_config (use defaults)\n",
|
| 293 |
-
" print(f\" β οΈ Could not create QuantizationConfig: {e}\")\n",
|
| 294 |
-
" print(f\" β Using AWQModifier with default settings...\")\n",
|
| 295 |
-
" modifiers = [AWQModifier()]\n",
|
| 296 |
-
" print(f\" β
AWQModifier created with default settings\")\n",
|
| 297 |
" \n",
|
| 298 |
-
"
|
| 299 |
-
"
|
| 300 |
-
"
|
| 301 |
-
"
|
| 302 |
-
" modifiers = [AWQModifier(quantization_config=quant_config)]\n",
|
| 303 |
-
" print(f\" β
AWQModifier created successfully\")\n",
|
| 304 |
-
" else:\n",
|
| 305 |
-
" # Final fallback: use default AWQModifier\n",
|
| 306 |
-
" print(f\" β Using AWQModifier with default settings (no config)...\")\n",
|
| 307 |
-
" modifiers = [AWQModifier()]\n",
|
| 308 |
-
" print(f\" β
AWQModifier created with default settings\")\n",
|
| 309 |
" \n",
|
| 310 |
" # Call oneshot with the modifier\n",
|
| 311 |
" print(f\" β Starting quantization process...\")\n",
|
|
|
|
| 255 |
" print(f\" β This may take 30-60 minutes depending on model size...\")\n",
|
| 256 |
" \n",
|
| 257 |
" # AWQModifier quantization config\n",
|
| 258 |
+
" # Create quantization config with correct structure for AWQ\n",
|
| 259 |
" print(f\" β Creating quantization config for 4-bit AWQ...\")\n",
|
| 260 |
" \n",
|
| 261 |
+
" # AWQModifier requires quantization_config with proper structure:\n",
|
| 262 |
+
" # - config_groups: dict mapping group names to quantization schemes\n",
|
| 263 |
+
" # - Each group needs: targets (list of module types), weights (dict with num_bits, etc.)\n",
|
| 264 |
+
" quant_config = {\n",
|
| 265 |
+
" \"config_groups\": {\n",
|
| 266 |
+
" \"group_0\": {\n",
|
| 267 |
+
" \"targets\": [\"Linear\"], # Target Linear layers\n",
|
| 268 |
+
" \"weights\": {\n",
|
| 269 |
+
" \"num_bits\": 4, # 4-bit quantization\n",
|
| 270 |
+
" \"group_size\": 128, # Group size for quantization\n",
|
| 271 |
+
" \"zero_point\": True, # Use zero-point quantization\n",
|
| 272 |
+
" \"symmetric\": False, # Asymmetric quantization\n",
|
| 273 |
+
" \"strategy\": \"group\", # Group-wise quantization\n",
|
| 274 |
+
" \"observer\": \"minmax\", # Min-max observer\n",
|
| 275 |
+
" \"type\": \"int\", # Integer quantization\n",
|
| 276 |
+
" \"dynamic\": False # Static quantization\n",
|
| 277 |
+
" },\n",
|
| 278 |
+
" \"input_activations\": None, # No activation quantization\n",
|
| 279 |
+
" \"output_activations\": None # No activation quantization\n",
|
| 280 |
" }\n",
|
| 281 |
+
" },\n",
|
| 282 |
+
" \"ignore\": [\"lm_head\"], # Ignore language model head\n",
|
| 283 |
+
" \"quant_method\": \"compressed-tensors\",\n",
|
| 284 |
+
" \"quantization_status\": \"compressed\",\n",
|
| 285 |
+
" \"format\": \"pack-quantized\"\n",
|
| 286 |
+
" }\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
" \n",
|
| 288 |
+
" print(f\" β
Created quantization config with correct structure\")\n",
|
| 289 |
+
" print(f\" β Creating AWQModifier with quantization config...\")\n",
|
| 290 |
+
" modifiers = [AWQModifier(quantization_config=quant_config)]\n",
|
| 291 |
+
" print(f\" β
AWQModifier created successfully\")\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
" \n",
|
| 293 |
" # Call oneshot with the modifier\n",
|
| 294 |
" print(f\" β Starting quantization process...\")\n",
|