Upload quantization_info.json with huggingface_hub
Browse files- quantization_info.json +66 -0
quantization_info.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"pipeline": "AWQ_4bit_then_SmoothQuant",
|
| 3 |
+
"original_model": "luca-deandrea/MNLP_M3_mcqa_model",
|
| 4 |
+
"timestamp": "2025-06-10 22:57:37",
|
| 5 |
+
"processing_time_seconds": 143.2722406387329,
|
| 6 |
+
"awq_config": {
|
| 7 |
+
"q_group_size": 32,
|
| 8 |
+
"w_bit": 4,
|
| 9 |
+
"zero_point": true
|
| 10 |
+
},
|
| 11 |
+
"smoothquant_optimizations": [
|
| 12 |
+
"mixed_precision",
|
| 13 |
+
"cache_enabled",
|
| 14 |
+
"deterministic_sampling"
|
| 15 |
+
],
|
| 16 |
+
"activation_layers_smoothed": 0,
|
| 17 |
+
"model_sizes": {
|
| 18 |
+
"awq_only_mb": 554.3882436752319,
|
| 19 |
+
"awq_smoothquant_mb": 554.3902568817139
|
| 20 |
+
},
|
| 21 |
+
"validation": {
|
| 22 |
+
"success_rate": 0.0,
|
| 23 |
+
"results": [
|
| 24 |
+
{
|
| 25 |
+
"test": "Calculate 2+2=",
|
| 26 |
+
"error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
|
| 27 |
+
"success": false
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"test": "What is the derivative of x\u00b2?",
|
| 31 |
+
"error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
|
| 32 |
+
"success": false
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"test": "Solve: 2x + 3 = 7",
|
| 36 |
+
"error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
|
| 37 |
+
"success": false
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"test": "What is F=ma?",
|
| 41 |
+
"error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
|
| 42 |
+
"success": false
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"test": "Balance: H\u2082 + O\u2082 \u2192 ?",
|
| 46 |
+
"error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
|
| 47 |
+
"success": false
|
| 48 |
+
}
|
| 49 |
+
]
|
| 50 |
+
},
|
| 51 |
+
"calibration_info": {
|
| 52 |
+
"samples_used": 105,
|
| 53 |
+
"source": "stem_calibration_data.json"
|
| 54 |
+
},
|
| 55 |
+
"lighteval_compatible": true,
|
| 56 |
+
"optimized_for": "STEM_reasoning_tasks",
|
| 57 |
+
"usage": {
|
| 58 |
+
"loading": "AutoAWQForCausalLM.from_quantized('awq_smoothquant_combined/awq_smoothquant_combined', fuse_layers=True)",
|
| 59 |
+
"library": "awq"
|
| 60 |
+
},
|
| 61 |
+
"expected_improvements": [
|
| 62 |
+
"AWQ: ~4x compression with good accuracy retention",
|
| 63 |
+
"SmoothQuant: +1-3% better activation stability",
|
| 64 |
+
"Combined: Better STEM reasoning than AWQ alone"
|
| 65 |
+
]
|
| 66 |
+
}
|