File size: 1,285 Bytes
b27e755 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | {
"benchmark": "vision_encoder",
"timestamp": "2026-03-19T11:52:22.651298+00:00",
"model": "google/siglip-so400m-patch14-384",
"device": "NVIDIA L4",
"params": 428225600,
"params_m": 428.2,
"load_time_cpu_s": 0.9,
"load_time_gpu_s": 3.87,
"gpu_mem_loaded_gb": 1.71,
"gpu_mem_peak_gb": 2.05,
"output_shape": [
1,
729,
1152
],
"d_output": 1152,
"n_tokens": 729,
"latency_fp32_b1": {
"mean_ms": 99.28,
"std_ms": 1.65,
"p50_ms": 99.62,
"p95_ms": 101.58,
"p99_ms": 102.49,
"min_ms": 94.69,
"max_ms": 103.6,
"fps": 10.1,
"n_samples": 100
},
"latency_fp32_b4": {
"mean_ms": 383.47,
"std_ms": 8.47,
"p50_ms": 384.3,
"p95_ms": 390.79,
"p99_ms": 406.82,
"min_ms": 358.68,
"max_ms": 422.0,
"fps": 2.6,
"n_samples": 50
},
"latency_fp32_b8": {
"mean_ms": 761.34,
"std_ms": 5.73,
"p50_ms": 759.6,
"p95_ms": 771.53,
"p99_ms": 778.34,
"min_ms": 752.19,
"max_ms": 780.31,
"fps": 1.3,
"n_samples": 25
},
"latency_fp16_b1": {
"mean_ms": 31.86,
"std_ms": 31.38,
"p50_ms": 28.61,
"p95_ms": 29.56,
"p99_ms": 32.85,
"min_ms": 24.78,
"max_ms": 344.06,
"fps": 31.4,
"n_samples": 100
},
"fp16_speedup": 3.12
} |