FORGE-Nano-Benchmark / benchmarks /bench_05_quantization.json
ilessio-aiflowlab's picture
Upload benchmarks/bench_05_quantization.json with huggingface_hub
ad7e9a1 verified
{
"benchmark": "quantization",
"timestamp": "2026-03-19T12:05:01.774531+00:00",
"device": "NVIDIA L4",
"model": "forge-nano",
"params": 967930567,
"params_m": 967.9,
"fp32_size_mb": 3871.7,
"n_calibration_samples": 5,
"n_quality_samples": 10,
"calibration_time_s": 1.6,
"calibrated_modules": 569,
"quantization_results": {
"int8_ah8": {
"target_bits": 8,
"action_head_bits": 8,
"quantize_time_s": 119.0,
"fp32_size_mb": 3871.7,
"estimated_size_mb": 967.9,
"compression_ratio": 4.0,
"quality": {
"action_mse": 2.87556,
"temporal_coherence_delta": 0.0,
"max_step_drift": 4.812052,
"per_step_error": [
2.87556
]
},
"latency_p50_ms": 136.19,
"latency_p95_ms": 137.93,
"latency_mean_ms": 135.84,
"fps": 7.4,
"gpu_mem_gb": 7.83
},
"int4_ah8": {
"target_bits": 4,
"action_head_bits": 8,
"quantize_time_s": 118.2,
"fp32_size_mb": 3871.7,
"estimated_size_mb": 484.0,
"compression_ratio": 8.0,
"quality": {
"action_mse": 2.769791,
"temporal_coherence_delta": 0.0,
"max_step_drift": 4.702052,
"per_step_error": [
2.769791
]
},
"latency_p50_ms": 134.46,
"latency_p95_ms": 136.75,
"latency_mean_ms": 134.27,
"fps": 7.4,
"gpu_mem_gb": 7.83
},
"int4_ah4": {
"target_bits": 4,
"action_head_bits": 4,
"quantize_time_s": 118.8,
"fp32_size_mb": 3871.7,
"estimated_size_mb": 484.0,
"compression_ratio": 8.0,
"quality": {
"action_mse": 4.312251,
"temporal_coherence_delta": 0.0,
"max_step_drift": 8.544949,
"per_step_error": [
4.312251
]
},
"latency_p50_ms": 133.47,
"latency_p95_ms": 138.39,
"latency_mean_ms": 133.92,
"fps": 7.5,
"gpu_mem_gb": 7.83
},
"int3_ah8": {
"target_bits": 3,
"action_head_bits": 8,
"quantize_time_s": 118.1,
"fp32_size_mb": 3871.7,
"estimated_size_mb": 363.0,
"compression_ratio": 10.7,
"quality": {
"action_mse": 2.91003,
"temporal_coherence_delta": 0.0,
"max_step_drift": 8.698943,
"per_step_error": [
2.91003
]
},
"latency_p50_ms": 133.91,
"latency_p95_ms": 138.35,
"latency_mean_ms": 134.24,
"fps": 7.4,
"gpu_mem_gb": 7.83
}
}
}