Thomaschtl commited on
Commit
3e77439
·
verified ·
1 Parent(s): 572fb58

Upload quantization_info.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. quantization_info.json +66 -0
quantization_info.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "pipeline": "AWQ_4bit_then_SmoothQuant",
3
+ "original_model": "luca-deandrea/MNLP_M3_mcqa_model",
4
+ "timestamp": "2025-06-10 22:57:37",
5
+ "processing_time_seconds": 143.2722406387329,
6
+ "awq_config": {
7
+ "q_group_size": 32,
8
+ "w_bit": 4,
9
+ "zero_point": true
10
+ },
11
+ "smoothquant_optimizations": [
12
+ "mixed_precision",
13
+ "cache_enabled",
14
+ "deterministic_sampling"
15
+ ],
16
+ "activation_layers_smoothed": 0,
17
+ "model_sizes": {
18
+ "awq_only_mb": 554.3882436752319,
19
+ "awq_smoothquant_mb": 554.3902568817139
20
+ },
21
+ "validation": {
22
+ "success_rate": 0.0,
23
+ "results": [
24
+ {
25
+ "test": "Calculate 2+2=",
26
+ "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
27
+ "success": false
28
+ },
29
+ {
30
+ "test": "What is the derivative of x\u00b2?",
31
+ "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
32
+ "success": false
33
+ },
34
+ {
35
+ "test": "Solve: 2x + 3 = 7",
36
+ "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
37
+ "success": false
38
+ },
39
+ {
40
+ "test": "What is F=ma?",
41
+ "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
42
+ "success": false
43
+ },
44
+ {
45
+ "test": "Balance: H\u2082 + O\u2082 \u2192 ?",
46
+ "error": "Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!",
47
+ "success": false
48
+ }
49
+ ]
50
+ },
51
+ "calibration_info": {
52
+ "samples_used": 105,
53
+ "source": "stem_calibration_data.json"
54
+ },
55
+ "lighteval_compatible": true,
56
+ "optimized_for": "STEM_reasoning_tasks",
57
+ "usage": {
58
+ "loading": "AutoAWQForCausalLM.from_quantized('awq_smoothquant_combined/awq_smoothquant_combined', fuse_layers=True)",
59
+ "library": "awq"
60
+ },
61
+ "expected_improvements": [
62
+ "AWQ: ~4x compression with good accuracy retention",
63
+ "SmoothQuant: +1-3% better activation stability",
64
+ "Combined: Better STEM reasoning than AWQ alone"
65
+ ]
66
+ }