Upload benchmarks/bench_01_vision_encoder.json with huggingface_hub
Browse files
benchmarks/bench_01_vision_encoder.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"benchmark": "vision_encoder",
|
| 3 |
+
"timestamp": "2026-03-19T11:52:22.651298+00:00",
|
| 4 |
+
"model": "google/siglip-so400m-patch14-384",
|
| 5 |
+
"device": "NVIDIA L4",
|
| 6 |
+
"params": 428225600,
|
| 7 |
+
"params_m": 428.2,
|
| 8 |
+
"load_time_cpu_s": 0.9,
|
| 9 |
+
"load_time_gpu_s": 3.87,
|
| 10 |
+
"gpu_mem_loaded_gb": 1.71,
|
| 11 |
+
"gpu_mem_peak_gb": 2.05,
|
| 12 |
+
"output_shape": [
|
| 13 |
+
1,
|
| 14 |
+
729,
|
| 15 |
+
1152
|
| 16 |
+
],
|
| 17 |
+
"d_output": 1152,
|
| 18 |
+
"n_tokens": 729,
|
| 19 |
+
"latency_fp32_b1": {
|
| 20 |
+
"mean_ms": 99.28,
|
| 21 |
+
"std_ms": 1.65,
|
| 22 |
+
"p50_ms": 99.62,
|
| 23 |
+
"p95_ms": 101.58,
|
| 24 |
+
"p99_ms": 102.49,
|
| 25 |
+
"min_ms": 94.69,
|
| 26 |
+
"max_ms": 103.6,
|
| 27 |
+
"fps": 10.1,
|
| 28 |
+
"n_samples": 100
|
| 29 |
+
},
|
| 30 |
+
"latency_fp32_b4": {
|
| 31 |
+
"mean_ms": 383.47,
|
| 32 |
+
"std_ms": 8.47,
|
| 33 |
+
"p50_ms": 384.3,
|
| 34 |
+
"p95_ms": 390.79,
|
| 35 |
+
"p99_ms": 406.82,
|
| 36 |
+
"min_ms": 358.68,
|
| 37 |
+
"max_ms": 422.0,
|
| 38 |
+
"fps": 2.6,
|
| 39 |
+
"n_samples": 50
|
| 40 |
+
},
|
| 41 |
+
"latency_fp32_b8": {
|
| 42 |
+
"mean_ms": 761.34,
|
| 43 |
+
"std_ms": 5.73,
|
| 44 |
+
"p50_ms": 759.6,
|
| 45 |
+
"p95_ms": 771.53,
|
| 46 |
+
"p99_ms": 778.34,
|
| 47 |
+
"min_ms": 752.19,
|
| 48 |
+
"max_ms": 780.31,
|
| 49 |
+
"fps": 1.3,
|
| 50 |
+
"n_samples": 25
|
| 51 |
+
},
|
| 52 |
+
"latency_fp16_b1": {
|
| 53 |
+
"mean_ms": 31.86,
|
| 54 |
+
"std_ms": 31.38,
|
| 55 |
+
"p50_ms": 28.61,
|
| 56 |
+
"p95_ms": 29.56,
|
| 57 |
+
"p99_ms": 32.85,
|
| 58 |
+
"min_ms": 24.78,
|
| 59 |
+
"max_ms": 344.06,
|
| 60 |
+
"fps": 31.4,
|
| 61 |
+
"n_samples": 100
|
| 62 |
+
},
|
| 63 |
+
"fp16_speedup": 3.12
|
| 64 |
+
}
|