{ "pipeline": "AWQ_4bit_then_SmoothQuant", "original_model": "luca-deandrea/MNLP_M3_mcqa_model", "timestamp": "2025-06-10 22:57:37", "processing_time_seconds": 143.2722406387329, "awq_config": { "q_group_size": 32, "w_bit": 4, "zero_point": true }, "smoothquant_optimizations": [ "mixed_precision", "cache_enabled", "deterministic_sampling" ], "activation_layers_smoothed": 0, "model_sizes": { "awq_only_mb": 554.3882436752319, "awq_smoothquant_mb": 554.3902568817139 }, "validation": { "success_rate": 0.0, "results": [ { "test": "Calculate 2+2=", "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!", "success": false }, { "test": "What is the derivative of x\u00b2?", "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!", "success": false }, { "test": "Solve: 2x + 3 = 7", "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!", "success": false }, { "test": "What is F=ma?", "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!", "success": false }, { "test": "Balance: H\u2082 + O\u2082 \u2192 ?", "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!", "success": false } ] }, "calibration_info": { "samples_used": 105, "source": "stem_calibration_data.json" }, "lighteval_compatible": true, "optimized_for": "STEM_reasoning_tasks", "usage": { "loading": "AutoAWQForCausalLM.from_quantized('awq_smoothquant_combined/awq_smoothquant_combined', fuse_layers=True)", "library": "awq" }, "expected_improvements": [ "AWQ: ~4x compression with good accuracy retention", "SmoothQuant: +1-3% better activation stability", "Combined: Better STEM reasoning than AWQ alone" ] }