FORGE-Nano-Benchmark / benchmarks /bench_01_vision_encoder.json
ilessio-aiflowlab's picture
Upload benchmarks/bench_01_vision_encoder.json with huggingface_hub
b27e755 verified
{
"benchmark": "vision_encoder",
"timestamp": "2026-03-19T11:52:22.651298+00:00",
"model": "google/siglip-so400m-patch14-384",
"device": "NVIDIA L4",
"params": 428225600,
"params_m": 428.2,
"load_time_cpu_s": 0.9,
"load_time_gpu_s": 3.87,
"gpu_mem_loaded_gb": 1.71,
"gpu_mem_peak_gb": 2.05,
"output_shape": [
1,
729,
1152
],
"d_output": 1152,
"n_tokens": 729,
"latency_fp32_b1": {
"mean_ms": 99.28,
"std_ms": 1.65,
"p50_ms": 99.62,
"p95_ms": 101.58,
"p99_ms": 102.49,
"min_ms": 94.69,
"max_ms": 103.6,
"fps": 10.1,
"n_samples": 100
},
"latency_fp32_b4": {
"mean_ms": 383.47,
"std_ms": 8.47,
"p50_ms": 384.3,
"p95_ms": 390.79,
"p99_ms": 406.82,
"min_ms": 358.68,
"max_ms": 422.0,
"fps": 2.6,
"n_samples": 50
},
"latency_fp32_b8": {
"mean_ms": 761.34,
"std_ms": 5.73,
"p50_ms": 759.6,
"p95_ms": 771.53,
"p99_ms": 778.34,
"min_ms": 752.19,
"max_ms": 780.31,
"fps": 1.3,
"n_samples": 25
},
"latency_fp16_b1": {
"mean_ms": 31.86,
"std_ms": 31.38,
"p50_ms": 28.61,
"p95_ms": 29.56,
"p99_ms": 32.85,
"min_ms": 24.78,
"max_ms": 344.06,
"fps": 31.4,
"n_samples": 100
},
"fp16_speedup": 3.12
}