File size: 2,125 Bytes
3e77439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
{
  "pipeline": "AWQ_4bit_then_SmoothQuant",
  "original_model": "luca-deandrea/MNLP_M3_mcqa_model",
  "timestamp": "2025-06-10 22:57:37",
  "processing_time_seconds": 143.2722406387329,
  "awq_config": {
    "q_group_size": 32,
    "w_bit": 4,
    "zero_point": true
  },
  "smoothquant_optimizations": [
    "mixed_precision",
    "cache_enabled",
    "deterministic_sampling"
  ],
  "activation_layers_smoothed": 0,
  "model_sizes": {
    "awq_only_mb": 554.3882436752319,
    "awq_smoothquant_mb": 554.3902568817139
  },
  "validation": {
    "success_rate": 0.0,
    "results": [
      {
        "test": "Calculate 2+2=",
        "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
        "success": false
      },
      {
        "test": "What is the derivative of x\u00b2?",
        "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
        "success": false
      },
      {
        "test": "Solve: 2x + 3 = 7",
        "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
        "success": false
      },
      {
        "test": "What is F=ma?",
        "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
        "success": false
      },
      {
        "test": "Balance: H\u2082 + O\u2082 \u2192 ?",
        "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
        "success": false
      }
    ]
  },
  "calibration_info": {
    "samples_used": 105,
    "source": "stem_calibration_data.json"
  },
  "lighteval_compatible": true,
  "optimized_for": "STEM_reasoning_tasks",
  "usage": {
    "loading": "AutoAWQForCausalLM.from_quantized('awq_smoothquant_combined/awq_smoothquant_combined', fuse_layers=True)",
    "library": "awq"
  },
  "expected_improvements": [
    "AWQ: ~4x compression with good accuracy retention",
    "SmoothQuant: +1-3% better activation stability",
    "Combined: Better STEM reasoning than AWQ alone"
  ]
}