| { | |
| "quantization_method": "awq_4bit", | |
| "linear_layers": "0/115 (4-bit AWQ)", | |
| "embedding_layers": "0/0 (not quantized by AWQ)", | |
| "total_quantized": "0/115", | |
| "original_model": "luca-deandrea/MNLP_M3_mcqa_model", | |
| "calibration_data_path": "smoothquant/smoothquant/calibration_prompts.json", | |
| "calibration_samples": 100, | |
| "quantization_timestamp": "2025-06-10 20:48:02", | |
| "pytorch_version": "2.6.0+cu118", | |
| "estimated_compression_ratio": "4.0x", | |
| "estimated_size_mb": 284.08837890625, | |
| "original_size_mb": 1136.353515625, | |
| "formats_included": [ | |
| "awq_safetensors" | |
| ], | |
| "lighteval_compatible": true, | |
| "awq_config": { | |
| "zero_point": true, | |
| "q_group_size": 128, | |
| "w_bit": 4, | |
| "version": "GEMM" | |
| }, | |
| "notes": "4-bit AWQ quantization with activation-aware weight optimization" | |
| } |