| { | |
| "pipeline": "AWQ_4bit_then_SmoothQuant", | |
| "original_model": "luca-deandrea/MNLP_M3_mcqa_model", | |
| "timestamp": "2025-06-10 22:57:37", | |
| "processing_time_seconds": 143.2722406387329, | |
| "awq_config": { | |
| "q_group_size": 32, | |
| "w_bit": 4, | |
| "zero_point": true | |
| }, | |
| "smoothquant_optimizations": [ | |
| "mixed_precision", | |
| "cache_enabled", | |
| "deterministic_sampling" | |
| ], | |
| "activation_layers_smoothed": 0, | |
| "model_sizes": { | |
| "awq_only_mb": 554.3882436752319, | |
| "awq_smoothquant_mb": 554.3902568817139 | |
| }, | |
| "validation": { | |
| "success_rate": 0.0, | |
| "results": [ | |
| { | |
| "test": "Calculate 2+2=", | |
| "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!", | |
| "success": false | |
| }, | |
| { | |
| "test": "What is the derivative of x\u00b2?", | |
| "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!", | |
| "success": false | |
| }, | |
| { | |
| "test": "Solve: 2x + 3 = 7", | |
| "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!", | |
| "success": false | |
| }, | |
| { | |
| "test": "What is F=ma?", | |
| "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!", | |
| "success": false | |
| }, | |
| { | |
| "test": "Balance: H\u2082 + O\u2082 \u2192 ?", | |
| "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!", | |
| "success": false | |
| } | |
| ] | |
| }, | |
| "calibration_info": { | |
| "samples_used": 105, | |
| "source": "stem_calibration_data.json" | |
| }, | |
| "lighteval_compatible": true, | |
| "optimized_for": "STEM_reasoning_tasks", | |
| "usage": { | |
| "loading": "AutoAWQForCausalLM.from_quantized('awq_smoothquant_combined/awq_smoothquant_combined', fuse_layers=True)", | |
| "library": "awq" | |
| }, | |
| "expected_improvements": [ | |
| "AWQ: ~4x compression with good accuracy retention", | |
| "SmoothQuant: +1-3% better activation stability", | |
| "Combined: Better STEM reasoning than AWQ alone" | |
| ] | |
| } |