| { |
| "benchmark": "quantization", |
| "timestamp": "2026-03-19T12:05:01.774531+00:00", |
| "device": "NVIDIA L4", |
| "model": "forge-nano", |
| "params": 967930567, |
| "params_m": 967.9, |
| "fp32_size_mb": 3871.7, |
| "n_calibration_samples": 5, |
| "n_quality_samples": 10, |
| "calibration_time_s": 1.6, |
| "calibrated_modules": 569, |
| "quantization_results": { |
| "int8_ah8": { |
| "target_bits": 8, |
| "action_head_bits": 8, |
| "quantize_time_s": 119.0, |
| "fp32_size_mb": 3871.7, |
| "estimated_size_mb": 967.9, |
| "compression_ratio": 4.0, |
| "quality": { |
| "action_mse": 2.87556, |
| "temporal_coherence_delta": 0.0, |
| "max_step_drift": 4.812052, |
| "per_step_error": [ |
| 2.87556 |
| ] |
| }, |
| "latency_p50_ms": 136.19, |
| "latency_p95_ms": 137.93, |
| "latency_mean_ms": 135.84, |
| "fps": 7.4, |
| "gpu_mem_gb": 7.83 |
| }, |
| "int4_ah8": { |
| "target_bits": 4, |
| "action_head_bits": 8, |
| "quantize_time_s": 118.2, |
| "fp32_size_mb": 3871.7, |
| "estimated_size_mb": 484.0, |
| "compression_ratio": 8.0, |
| "quality": { |
| "action_mse": 2.769791, |
| "temporal_coherence_delta": 0.0, |
| "max_step_drift": 4.702052, |
| "per_step_error": [ |
| 2.769791 |
| ] |
| }, |
| "latency_p50_ms": 134.46, |
| "latency_p95_ms": 136.75, |
| "latency_mean_ms": 134.27, |
| "fps": 7.4, |
| "gpu_mem_gb": 7.83 |
| }, |
| "int4_ah4": { |
| "target_bits": 4, |
| "action_head_bits": 4, |
| "quantize_time_s": 118.8, |
| "fp32_size_mb": 3871.7, |
| "estimated_size_mb": 484.0, |
| "compression_ratio": 8.0, |
| "quality": { |
| "action_mse": 4.312251, |
| "temporal_coherence_delta": 0.0, |
| "max_step_drift": 8.544949, |
| "per_step_error": [ |
| 4.312251 |
| ] |
| }, |
| "latency_p50_ms": 133.47, |
| "latency_p95_ms": 138.39, |
| "latency_mean_ms": 133.92, |
| "fps": 7.5, |
| "gpu_mem_gb": 7.83 |
| }, |
| "int3_ah8": { |
| "target_bits": 3, |
| "action_head_bits": 8, |
| "quantize_time_s": 118.1, |
| "fp32_size_mb": 3871.7, |
| "estimated_size_mb": 363.0, |
| "compression_ratio": 10.7, |
| "quality": { |
| "action_mse": 2.91003, |
| "temporal_coherence_delta": 0.0, |
| "max_step_drift": 8.698943, |
| "per_step_error": [ |
| 2.91003 |
| ] |
| }, |
| "latency_p50_ms": 133.91, |
| "latency_p95_ms": 138.35, |
| "latency_mean_ms": 134.24, |
| "fps": 7.4, |
| "gpu_mem_gb": 7.83 |
| } |
| } |
| } |