File size: 2,508 Bytes
ad7e9a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
  "benchmark": "quantization",
  "timestamp": "2026-03-19T12:05:01.774531+00:00",
  "device": "NVIDIA L4",
  "model": "forge-nano",
  "params": 967930567,
  "params_m": 967.9,
  "fp32_size_mb": 3871.7,
  "n_calibration_samples": 5,
  "n_quality_samples": 10,
  "calibration_time_s": 1.6,
  "calibrated_modules": 569,
  "quantization_results": {
    "int8_ah8": {
      "target_bits": 8,
      "action_head_bits": 8,
      "quantize_time_s": 119.0,
      "fp32_size_mb": 3871.7,
      "estimated_size_mb": 967.9,
      "compression_ratio": 4.0,
      "quality": {
        "action_mse": 2.87556,
        "temporal_coherence_delta": 0.0,
        "max_step_drift": 4.812052,
        "per_step_error": [
          2.87556
        ]
      },
      "latency_p50_ms": 136.19,
      "latency_p95_ms": 137.93,
      "latency_mean_ms": 135.84,
      "fps": 7.4,
      "gpu_mem_gb": 7.83
    },
    "int4_ah8": {
      "target_bits": 4,
      "action_head_bits": 8,
      "quantize_time_s": 118.2,
      "fp32_size_mb": 3871.7,
      "estimated_size_mb": 484.0,
      "compression_ratio": 8.0,
      "quality": {
        "action_mse": 2.769791,
        "temporal_coherence_delta": 0.0,
        "max_step_drift": 4.702052,
        "per_step_error": [
          2.769791
        ]
      },
      "latency_p50_ms": 134.46,
      "latency_p95_ms": 136.75,
      "latency_mean_ms": 134.27,
      "fps": 7.4,
      "gpu_mem_gb": 7.83
    },
    "int4_ah4": {
      "target_bits": 4,
      "action_head_bits": 4,
      "quantize_time_s": 118.8,
      "fp32_size_mb": 3871.7,
      "estimated_size_mb": 484.0,
      "compression_ratio": 8.0,
      "quality": {
        "action_mse": 4.312251,
        "temporal_coherence_delta": 0.0,
        "max_step_drift": 8.544949,
        "per_step_error": [
          4.312251
        ]
      },
      "latency_p50_ms": 133.47,
      "latency_p95_ms": 138.39,
      "latency_mean_ms": 133.92,
      "fps": 7.5,
      "gpu_mem_gb": 7.83
    },
    "int3_ah8": {
      "target_bits": 3,
      "action_head_bits": 8,
      "quantize_time_s": 118.1,
      "fp32_size_mb": 3871.7,
      "estimated_size_mb": 363.0,
      "compression_ratio": 10.7,
      "quality": {
        "action_mse": 2.91003,
        "temporal_coherence_delta": 0.0,
        "max_step_drift": 8.698943,
        "per_step_error": [
          2.91003
        ]
      },
      "latency_p50_ms": 133.91,
      "latency_p95_ms": 138.35,
      "latency_mean_ms": 134.24,
      "fps": 7.4,
      "gpu_mem_gb": 7.83
    }
  }
}