File size: 1,285 Bytes
b27e755
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{
  "benchmark": "vision_encoder",
  "timestamp": "2026-03-19T11:52:22.651298+00:00",
  "model": "google/siglip-so400m-patch14-384",
  "device": "NVIDIA L4",
  "params": 428225600,
  "params_m": 428.2,
  "load_time_cpu_s": 0.9,
  "load_time_gpu_s": 3.87,
  "gpu_mem_loaded_gb": 1.71,
  "gpu_mem_peak_gb": 2.05,
  "output_shape": [
    1,
    729,
    1152
  ],
  "d_output": 1152,
  "n_tokens": 729,
  "latency_fp32_b1": {
    "mean_ms": 99.28,
    "std_ms": 1.65,
    "p50_ms": 99.62,
    "p95_ms": 101.58,
    "p99_ms": 102.49,
    "min_ms": 94.69,
    "max_ms": 103.6,
    "fps": 10.1,
    "n_samples": 100
  },
  "latency_fp32_b4": {
    "mean_ms": 383.47,
    "std_ms": 8.47,
    "p50_ms": 384.3,
    "p95_ms": 390.79,
    "p99_ms": 406.82,
    "min_ms": 358.68,
    "max_ms": 422.0,
    "fps": 2.6,
    "n_samples": 50
  },
  "latency_fp32_b8": {
    "mean_ms": 761.34,
    "std_ms": 5.73,
    "p50_ms": 759.6,
    "p95_ms": 771.53,
    "p99_ms": 778.34,
    "min_ms": 752.19,
    "max_ms": 780.31,
    "fps": 1.3,
    "n_samples": 25
  },
  "latency_fp16_b1": {
    "mean_ms": 31.86,
    "std_ms": 31.38,
    "p50_ms": 28.61,
    "p95_ms": 29.56,
    "p99_ms": 32.85,
    "min_ms": 24.78,
    "max_ms": 344.06,
    "fps": 31.4,
    "n_samples": 100
  },
  "fp16_speedup": 3.12
}