MNLP_M3_quantized_model / quantization_info.json
Thomaschtl's picture
Upload quantization_info.json with huggingface_hub
3e77439 verified
{
"pipeline": "AWQ_4bit_then_SmoothQuant",
"original_model": "luca-deandrea/MNLP_M3_mcqa_model",
"timestamp": "2025-06-10 22:57:37",
"processing_time_seconds": 143.2722406387329,
"awq_config": {
"q_group_size": 32,
"w_bit": 4,
"zero_point": true
},
"smoothquant_optimizations": [
"mixed_precision",
"cache_enabled",
"deterministic_sampling"
],
"activation_layers_smoothed": 0,
"model_sizes": {
"awq_only_mb": 554.3882436752319,
"awq_smoothquant_mb": 554.3902568817139
},
"validation": {
"success_rate": 0.0,
"results": [
{
"test": "Calculate 2+2=",
"error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
"success": false
},
{
"test": "What is the derivative of x\u00b2?",
"error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
"success": false
},
{
"test": "Solve: 2x + 3 = 7",
"error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
"success": false
},
{
"test": "What is F=ma?",
"error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
"success": false
},
{
"test": "Balance: H\u2082 + O\u2082 \u2192 ?",
"error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
"success": false
}
]
},
"calibration_info": {
"samples_used": 105,
"source": "stem_calibration_data.json"
},
"lighteval_compatible": true,
"optimized_for": "STEM_reasoning_tasks",
"usage": {
"loading": "AutoAWQForCausalLM.from_quantized('awq_smoothquant_combined/awq_smoothquant_combined', fuse_layers=True)",
"library": "awq"
},
"expected_improvements": [
"AWQ: ~4x compression with good accuracy retention",
"SmoothQuant: +1-3% better activation stability",
"Combined: Better STEM reasoning than AWQ alone"
]
}