FORGE-Nano-Benchmark / benchmarks /bench_08_e2e_pipeline.json
ilessio-aiflowlab's picture
Upload benchmarks/bench_08_e2e_pipeline.json with huggingface_hub
881f7c6 verified
{
"benchmark": "e2e_pipeline",
"timestamp": "2026-03-19T11:21:30.482673+00:00",
"device": "NVIDIA L4",
"model": "forge-nano",
"total_pipeline_time_s": 166.8,
"steps": {
"build": {
"time_s": 5.96,
"params_m": 967.9,
"gpu_mem_gb": 3.87
},
"train": {
"n_steps": 30,
"time_s": 11.4,
"steps_per_sec": 2.64,
"loss_start": 5.3176,
"loss_end": 1.878,
"loss_reduction_pct": 64.7,
"gpu_mem_gb": 9.65
},
"prune": {
"importance_time_s": 11.32,
"prune_time_s": 0.9,
"layers_before": 27,
"layers_after": 20,
"layers_removed": [
3,
4,
7,
10,
15,
18,
20
],
"params_before_m": 967.9,
"params_after_m": 861.3,
"params_retained_pct": 89.0
},
"quantize": {
"time_s": 125.6,
"fp32_size_mb": 3445.0,
"int4_size_mb": 430.6,
"compression_ratio": 8.0
},
"inference": {
"fp32_p50_ms": 109.56,
"fp32_p95_ms": 111.62,
"fp32_fps": 9.1,
"fp16_p50_ms": 84.74,
"fp16_p95_ms": 86.35,
"fp16_fps": 11.8,
"fp16_speedup": 1.29,
"gpu_mem_gb": 16.29
}
},
"summary": {
"original_params_m": 967.9,
"pruned_params_m": 861.3,
"int4_size_mb": 430.6,
"compression_ratio": 8.0,
"loss_reduction_pct": 64.7,
"fp32_latency_ms": 109.56,
"fp16_latency_ms": 84.74,
"fp16_fps": 11.8
}
}