| { | |
| "benchmarked_devices": [ | |
| "cuda" | |
| ], | |
| "parameters": { | |
| "total_params": 20200546, | |
| "trainable_params": 18378970, | |
| "total_params_m": 20.200546, | |
| "trainable_params_m": 18.37897 | |
| }, | |
| "cuda": { | |
| "memory": { | |
| "baseline_memory_mb": 314.5068359375, | |
| "peak_memory_mb": 321.7763671875, | |
| "current_memory_mb": 315.08154296875, | |
| "inference_memory_mb": 7.26953125, | |
| "device": "cuda" | |
| }, | |
| "inference_single": { | |
| "batch_size": 1, | |
| "mean_latency_ms": 11.141735980272642, | |
| "std_latency_ms": 0.04346183998720415, | |
| "median_latency_ms": 11.13254650044837, | |
| "p95_latency_ms": 11.162551251254627, | |
| "p99_latency_ms": 11.36283751031442, | |
| "min_latency_ms": 11.105517995019909, | |
| "max_latency_ms": 11.442682000051718, | |
| "latency_per_image_ms": 11.141735980272642, | |
| "throughput_img_per_sec": 89.7526203969096, | |
| "device": "cuda" | |
| }, | |
| "inference_batch32": { | |
| "batch_size": 32, | |
| "mean_latency_ms": 41.95240021988866, | |
| "std_latency_ms": 0.12925345881619055, | |
| "median_latency_ms": 41.9302680020337, | |
| "p95_latency_ms": 41.98958034867246, | |
| "p99_latency_ms": 42.48272620359785, | |
| "min_latency_ms": 41.89117100031581, | |
| "max_latency_ms": 42.82661800243659, | |
| "latency_per_image_ms": 1.3110125068715206, | |
| "throughput_img_per_sec": 762.7692297049917, | |
| "device": "cuda" | |
| } | |
| } | |
| } |