Thomaschtl
/

MNLP_M3_quantized_model

+{
+  "pipeline": "AWQ_4bit_then_SmoothQuant",
+  "original_model": "luca-deandrea/MNLP_M3_mcqa_model",
+  "timestamp": "2025-06-10 22:57:37",
+  "processing_time_seconds": 143.2722406387329,
+  "awq_config": {
+    "q_group_size": 32,
+    "w_bit": 4,
+    "zero_point": true
+  },
+  "smoothquant_optimizations": [
+    "mixed_precision",
+    "cache_enabled",
+    "deterministic_sampling"
+  ],
+  "activation_layers_smoothed": 0,
+  "model_sizes": {
+    "awq_only_mb": 554.3882436752319,
+    "awq_smoothquant_mb": 554.3902568817139
+  },
+  "validation": {
+    "success_rate": 0.0,
+    "results": [
+      {
+        "test": "Calculate 2+2=",
+        "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
+        "success": false
+      },
+      {
+        "test": "What is the derivative of x\u00b2?",
+        "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
+        "success": false
+      },
+      {
+        "test": "Solve: 2x + 3 = 7",
+        "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
+        "success": false
+      },
+      {
+        "test": "What is F=ma?",
+        "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
+        "success": false
+      },
+      {
+        "test": "Balance: H\u2082 + O\u2082 \u2192 ?",
+        "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
+        "success": false
+      }
+    ]
+  },
+  "calibration_info": {
+    "samples_used": 105,
+    "source": "stem_calibration_data.json"
+  },
+  "lighteval_compatible": true,
+  "optimized_for": "STEM_reasoning_tasks",
+  "usage": {
+    "loading": "AutoAWQForCausalLM.from_quantized('awq_smoothquant_combined/awq_smoothquant_combined', fuse_layers=True)",
+    "library": "awq"
+  },
+  "expected_improvements": [
+    "AWQ: ~4x compression with good accuracy retention",
+    "SmoothQuant: +1-3% better activation stability",
+    "Combined: Better STEM reasoning than AWQ alone"
+  ]
+}