| { | |
| "benchmarked_devices": [ | |
| "cuda" | |
| ], | |
| "parameters": { | |
| "total_params": 4030606, | |
| "trainable_params": 3178798, | |
| "total_params_m": 4.030606, | |
| "trainable_params_m": 3.178798 | |
| }, | |
| "cuda": { | |
| "memory": { | |
| "baseline_memory_mb": 70.193359375, | |
| "peak_memory_mb": 80.720703125, | |
| "current_memory_mb": 70.76806640625, | |
| "inference_memory_mb": 10.52734375, | |
| "device": "cuda" | |
| }, | |
| "inference_single": { | |
| "batch_size": 1, | |
| "mean_latency_ms": 5.136066239720094, | |
| "std_latency_ms": 0.008697872240662149, | |
| "median_latency_ms": 5.135121497005457, | |
| "p95_latency_ms": 5.151860694604693, | |
| "p99_latency_ms": 5.156857259207754, | |
| "min_latency_ms": 5.121371003042441, | |
| "max_latency_ms": 5.1598530044429936, | |
| "latency_per_image_ms": 5.136066239720094, | |
| "throughput_img_per_sec": 194.70153875089005, | |
| "device": "cuda" | |
| }, | |
| "inference_batch32": { | |
| "batch_size": 32, | |
| "mean_latency_ms": 18.34433564043138, | |
| "std_latency_ms": 0.22169971671972608, | |
| "median_latency_ms": 18.217552998976316, | |
| "p95_latency_ms": 18.770105749717914, | |
| "p99_latency_ms": 18.77734032314038, | |
| "min_latency_ms": 18.15599100518739, | |
| "max_latency_ms": 18.78215800388716, | |
| "latency_per_image_ms": 0.5732604887634807, | |
| "throughput_img_per_sec": 1744.407681326501, | |
| "device": "cuda" | |
| } | |
| } | |
| } |