{ "benchmark": "quantization", "timestamp": "2026-03-19T12:05:01.774531+00:00", "device": "NVIDIA L4", "model": "forge-nano", "params": 967930567, "params_m": 967.9, "fp32_size_mb": 3871.7, "n_calibration_samples": 5, "n_quality_samples": 10, "calibration_time_s": 1.6, "calibrated_modules": 569, "quantization_results": { "int8_ah8": { "target_bits": 8, "action_head_bits": 8, "quantize_time_s": 119.0, "fp32_size_mb": 3871.7, "estimated_size_mb": 967.9, "compression_ratio": 4.0, "quality": { "action_mse": 2.87556, "temporal_coherence_delta": 0.0, "max_step_drift": 4.812052, "per_step_error": [ 2.87556 ] }, "latency_p50_ms": 136.19, "latency_p95_ms": 137.93, "latency_mean_ms": 135.84, "fps": 7.4, "gpu_mem_gb": 7.83 }, "int4_ah8": { "target_bits": 4, "action_head_bits": 8, "quantize_time_s": 118.2, "fp32_size_mb": 3871.7, "estimated_size_mb": 484.0, "compression_ratio": 8.0, "quality": { "action_mse": 2.769791, "temporal_coherence_delta": 0.0, "max_step_drift": 4.702052, "per_step_error": [ 2.769791 ] }, "latency_p50_ms": 134.46, "latency_p95_ms": 136.75, "latency_mean_ms": 134.27, "fps": 7.4, "gpu_mem_gb": 7.83 }, "int4_ah4": { "target_bits": 4, "action_head_bits": 4, "quantize_time_s": 118.8, "fp32_size_mb": 3871.7, "estimated_size_mb": 484.0, "compression_ratio": 8.0, "quality": { "action_mse": 4.312251, "temporal_coherence_delta": 0.0, "max_step_drift": 8.544949, "per_step_error": [ 4.312251 ] }, "latency_p50_ms": 133.47, "latency_p95_ms": 138.39, "latency_mean_ms": 133.92, "fps": 7.5, "gpu_mem_gb": 7.83 }, "int3_ah8": { "target_bits": 3, "action_head_bits": 8, "quantize_time_s": 118.1, "fp32_size_mb": 3871.7, "estimated_size_mb": 363.0, "compression_ratio": 10.7, "quality": { "action_mse": 2.91003, "temporal_coherence_delta": 0.0, "max_step_drift": 8.698943, "per_step_error": [ 2.91003 ] }, "latency_p50_ms": 133.91, "latency_p95_ms": 138.35, "latency_mean_ms": 134.24, "fps": 7.4, "gpu_mem_gb": 7.83 } } }