Alikestocode commited on
Commit
3f08592
Β·
1 Parent(s): f3114ba

Fix quantization_config structure: use correct AWQ format

Browse files

- Use dict-based config with config_groups.group_0 structure
- Include targets, weights (num_bits, group_size, zero_point), etc.
- Remove fallback approaches - use correct structure directly
- Fixes ValidationError for AWQModifier

Files changed (1) hide show
  1. quantize_to_awq_colab.ipynb +30 -47
quantize_to_awq_colab.ipynb CHANGED
@@ -255,57 +255,40 @@
255
  " print(f\" β†’ This may take 30-60 minutes depending on model size...\")\n",
256
  " \n",
257
  " # AWQModifier quantization config\n",
258
- " # Try multiple approaches to create the config\n",
259
  " print(f\" β†’ Creating quantization config for 4-bit AWQ...\")\n",
260
  " \n",
261
- " modifiers = None\n",
262
- " quant_config = None\n",
263
- " \n",
264
- " try:\n",
265
- " # Approach 1: Try importing BaseQuantizationConfig\n",
266
- " from compressed_tensors.quantization import QuantizationConfig, BaseQuantizationConfig\n",
267
- " quant_config = QuantizationConfig(\n",
268
- " config_groups={\n",
269
- " \"default\": BaseQuantizationConfig(\n",
270
- " num_bits=4,\n",
271
- " group_size=128,\n",
272
- " zero_point=True\n",
273
- " )\n",
 
 
 
 
 
 
274
  " }\n",
275
- " )\n",
276
- " print(f\" βœ… Created QuantizationConfig with BaseQuantizationConfig\")\n",
277
- " except ImportError:\n",
278
- " try:\n",
279
- " # Approach 2: Try using QuantizationConfig with dict directly\n",
280
- " from compressed_tensors.quantization import QuantizationConfig\n",
281
- " quant_config = QuantizationConfig(\n",
282
- " config_groups={\n",
283
- " \"default\": {\n",
284
- " \"num_bits\": 4,\n",
285
- " \"group_size\": 128,\n",
286
- " \"zero_point\": True\n",
287
- " }\n",
288
- " }\n",
289
- " )\n",
290
- " print(f\" βœ… Created QuantizationConfig with dict config\")\n",
291
- " except Exception as e:\n",
292
- " # Approach 3: Try AWQModifier without quantization_config (use defaults)\n",
293
- " print(f\" ⚠️ Could not create QuantizationConfig: {e}\")\n",
294
- " print(f\" β†’ Using AWQModifier with default settings...\")\n",
295
- " modifiers = [AWQModifier()]\n",
296
- " print(f\" βœ… AWQModifier created with default settings\")\n",
297
  " \n",
298
- " # Create modifiers if we have a quant_config\n",
299
- " if modifiers is None:\n",
300
- " if quant_config is not None:\n",
301
- " print(f\" β†’ Creating AWQModifier with quantization config...\")\n",
302
- " modifiers = [AWQModifier(quantization_config=quant_config)]\n",
303
- " print(f\" βœ… AWQModifier created successfully\")\n",
304
- " else:\n",
305
- " # Final fallback: use default AWQModifier\n",
306
- " print(f\" β†’ Using AWQModifier with default settings (no config)...\")\n",
307
- " modifiers = [AWQModifier()]\n",
308
- " print(f\" βœ… AWQModifier created with default settings\")\n",
309
  " \n",
310
  " # Call oneshot with the modifier\n",
311
  " print(f\" β†’ Starting quantization process...\")\n",
 
255
  " print(f\" β†’ This may take 30-60 minutes depending on model size...\")\n",
256
  " \n",
257
  " # AWQModifier quantization config\n",
258
+ " # Create quantization config with correct structure for AWQ\n",
259
  " print(f\" β†’ Creating quantization config for 4-bit AWQ...\")\n",
260
  " \n",
261
+ " # AWQModifier requires quantization_config with proper structure:\n",
262
+ " # - config_groups: dict mapping group names to quantization schemes\n",
263
+ " # - Each group needs: targets (list of module types), weights (dict with num_bits, etc.)\n",
264
+ " quant_config = {\n",
265
+ " \"config_groups\": {\n",
266
+ " \"group_0\": {\n",
267
+ " \"targets\": [\"Linear\"], # Target Linear layers\n",
268
+ " \"weights\": {\n",
269
+ " \"num_bits\": 4, # 4-bit quantization\n",
270
+ " \"group_size\": 128, # Group size for quantization\n",
271
+ " \"zero_point\": True, # Use zero-point quantization\n",
272
+ " \"symmetric\": False, # Asymmetric quantization\n",
273
+ " \"strategy\": \"group\", # Group-wise quantization\n",
274
+ " \"observer\": \"minmax\", # Min-max observer\n",
275
+ " \"type\": \"int\", # Integer quantization\n",
276
+ " \"dynamic\": False # Static quantization\n",
277
+ " },\n",
278
+ " \"input_activations\": None, # No activation quantization\n",
279
+ " \"output_activations\": None # No activation quantization\n",
280
  " }\n",
281
+ " },\n",
282
+ " \"ignore\": [\"lm_head\"], # Ignore language model head\n",
283
+ " \"quant_method\": \"compressed-tensors\",\n",
284
+ " \"quantization_status\": \"compressed\",\n",
285
+ " \"format\": \"pack-quantized\"\n",
286
+ " }\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  " \n",
288
+ " print(f\" βœ… Created quantization config with correct structure\")\n",
289
+ " print(f\" β†’ Creating AWQModifier with quantization config...\")\n",
290
+ " modifiers = [AWQModifier(quantization_config=quant_config)]\n",
291
+ " print(f\" βœ… AWQModifier created successfully\")\n",
 
 
 
 
 
 
 
292
  " \n",
293
  " # Call oneshot with the modifier\n",
294
  " print(f\" β†’ Starting quantization process...\")\n",