| { | |
| "benchmark": "e2e_pipeline", | |
| "timestamp": "2026-03-19T11:21:30.482673+00:00", | |
| "device": "NVIDIA L4", | |
| "model": "forge-nano", | |
| "total_pipeline_time_s": 166.8, | |
| "steps": { | |
| "build": { | |
| "time_s": 5.96, | |
| "params_m": 967.9, | |
| "gpu_mem_gb": 3.87 | |
| }, | |
| "train": { | |
| "n_steps": 30, | |
| "time_s": 11.4, | |
| "steps_per_sec": 2.64, | |
| "loss_start": 5.3176, | |
| "loss_end": 1.878, | |
| "loss_reduction_pct": 64.7, | |
| "gpu_mem_gb": 9.65 | |
| }, | |
| "prune": { | |
| "importance_time_s": 11.32, | |
| "prune_time_s": 0.9, | |
| "layers_before": 27, | |
| "layers_after": 20, | |
| "layers_removed": [ | |
| 3, | |
| 4, | |
| 7, | |
| 10, | |
| 15, | |
| 18, | |
| 20 | |
| ], | |
| "params_before_m": 967.9, | |
| "params_after_m": 861.3, | |
| "params_retained_pct": 89.0 | |
| }, | |
| "quantize": { | |
| "time_s": 125.6, | |
| "fp32_size_mb": 3445.0, | |
| "int4_size_mb": 430.6, | |
| "compression_ratio": 8.0 | |
| }, | |
| "inference": { | |
| "fp32_p50_ms": 109.56, | |
| "fp32_p95_ms": 111.62, | |
| "fp32_fps": 9.1, | |
| "fp16_p50_ms": 84.74, | |
| "fp16_p95_ms": 86.35, | |
| "fp16_fps": 11.8, | |
| "fp16_speedup": 1.29, | |
| "gpu_mem_gb": 16.29 | |
| } | |
| }, | |
| "summary": { | |
| "original_params_m": 967.9, | |
| "pruned_params_m": 861.3, | |
| "int4_size_mb": 430.6, | |
| "compression_ratio": 8.0, | |
| "loss_reduction_pct": 64.7, | |
| "fp32_latency_ms": 109.56, | |
| "fp16_latency_ms": 84.74, | |
| "fp16_fps": 11.8 | |
| } | |
| } |