{ "benchmark": "vision_encoder", "timestamp": "2026-03-19T11:52:22.651298+00:00", "model": "google/siglip-so400m-patch14-384", "device": "NVIDIA L4", "params": 428225600, "params_m": 428.2, "load_time_cpu_s": 0.9, "load_time_gpu_s": 3.87, "gpu_mem_loaded_gb": 1.71, "gpu_mem_peak_gb": 2.05, "output_shape": [ 1, 729, 1152 ], "d_output": 1152, "n_tokens": 729, "latency_fp32_b1": { "mean_ms": 99.28, "std_ms": 1.65, "p50_ms": 99.62, "p95_ms": 101.58, "p99_ms": 102.49, "min_ms": 94.69, "max_ms": 103.6, "fps": 10.1, "n_samples": 100 }, "latency_fp32_b4": { "mean_ms": 383.47, "std_ms": 8.47, "p50_ms": 384.3, "p95_ms": 390.79, "p99_ms": 406.82, "min_ms": 358.68, "max_ms": 422.0, "fps": 2.6, "n_samples": 50 }, "latency_fp32_b8": { "mean_ms": 761.34, "std_ms": 5.73, "p50_ms": 759.6, "p95_ms": 771.53, "p99_ms": 778.34, "min_ms": 752.19, "max_ms": 780.31, "fps": 1.3, "n_samples": 25 }, "latency_fp16_b1": { "mean_ms": 31.86, "std_ms": 31.38, "p50_ms": 28.61, "p95_ms": 29.56, "p99_ms": 32.85, "min_ms": 24.78, "max_ms": 344.06, "fps": 31.4, "n_samples": 100 }, "fp16_speedup": 3.12 }