ilessio-aiflowlab commited on
Commit
ad7e9a1
·
verified ·
1 Parent(s): a097f2c

Upload benchmarks/bench_05_quantization.json with huggingface_hub

Browse files
benchmarks/bench_05_quantization.json ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "benchmark": "quantization",
3
+ "timestamp": "2026-03-19T12:05:01.774531+00:00",
4
+ "device": "NVIDIA L4",
5
+ "model": "forge-nano",
6
+ "params": 967930567,
7
+ "params_m": 967.9,
8
+ "fp32_size_mb": 3871.7,
9
+ "n_calibration_samples": 5,
10
+ "n_quality_samples": 10,
11
+ "calibration_time_s": 1.6,
12
+ "calibrated_modules": 569,
13
+ "quantization_results": {
14
+ "int8_ah8": {
15
+ "target_bits": 8,
16
+ "action_head_bits": 8,
17
+ "quantize_time_s": 119.0,
18
+ "fp32_size_mb": 3871.7,
19
+ "estimated_size_mb": 967.9,
20
+ "compression_ratio": 4.0,
21
+ "quality": {
22
+ "action_mse": 2.87556,
23
+ "temporal_coherence_delta": 0.0,
24
+ "max_step_drift": 4.812052,
25
+ "per_step_error": [
26
+ 2.87556
27
+ ]
28
+ },
29
+ "latency_p50_ms": 136.19,
30
+ "latency_p95_ms": 137.93,
31
+ "latency_mean_ms": 135.84,
32
+ "fps": 7.4,
33
+ "gpu_mem_gb": 7.83
34
+ },
35
+ "int4_ah8": {
36
+ "target_bits": 4,
37
+ "action_head_bits": 8,
38
+ "quantize_time_s": 118.2,
39
+ "fp32_size_mb": 3871.7,
40
+ "estimated_size_mb": 484.0,
41
+ "compression_ratio": 8.0,
42
+ "quality": {
43
+ "action_mse": 2.769791,
44
+ "temporal_coherence_delta": 0.0,
45
+ "max_step_drift": 4.702052,
46
+ "per_step_error": [
47
+ 2.769791
48
+ ]
49
+ },
50
+ "latency_p50_ms": 134.46,
51
+ "latency_p95_ms": 136.75,
52
+ "latency_mean_ms": 134.27,
53
+ "fps": 7.4,
54
+ "gpu_mem_gb": 7.83
55
+ },
56
+ "int4_ah4": {
57
+ "target_bits": 4,
58
+ "action_head_bits": 4,
59
+ "quantize_time_s": 118.8,
60
+ "fp32_size_mb": 3871.7,
61
+ "estimated_size_mb": 484.0,
62
+ "compression_ratio": 8.0,
63
+ "quality": {
64
+ "action_mse": 4.312251,
65
+ "temporal_coherence_delta": 0.0,
66
+ "max_step_drift": 8.544949,
67
+ "per_step_error": [
68
+ 4.312251
69
+ ]
70
+ },
71
+ "latency_p50_ms": 133.47,
72
+ "latency_p95_ms": 138.39,
73
+ "latency_mean_ms": 133.92,
74
+ "fps": 7.5,
75
+ "gpu_mem_gb": 7.83
76
+ },
77
+ "int3_ah8": {
78
+ "target_bits": 3,
79
+ "action_head_bits": 8,
80
+ "quantize_time_s": 118.1,
81
+ "fp32_size_mb": 3871.7,
82
+ "estimated_size_mb": 363.0,
83
+ "compression_ratio": 10.7,
84
+ "quality": {
85
+ "action_mse": 2.91003,
86
+ "temporal_coherence_delta": 0.0,
87
+ "max_step_drift": 8.698943,
88
+ "per_step_error": [
89
+ 2.91003
90
+ ]
91
+ },
92
+ "latency_p50_ms": 133.91,
93
+ "latency_p95_ms": 138.35,
94
+ "latency_mean_ms": 134.24,
95
+ "fps": 7.4,
96
+ "gpu_mem_gb": 7.83
97
+ }
98
+ }
99
+ }