| { | |
| "benchmarked_devices": [ | |
| "cuda" | |
| ], | |
| "parameters": { | |
| "total_params": 27833970, | |
| "trainable_params": 14304786, | |
| "total_params_m": 27.83397, | |
| "trainable_params_m": 14.304786 | |
| }, | |
| "cuda": { | |
| "memory": { | |
| "baseline_memory_mb": 288.01806640625, | |
| "peak_memory_mb": 302.52978515625, | |
| "current_memory_mb": 288.5927734375, | |
| "inference_memory_mb": 14.51171875, | |
| "device": "cuda" | |
| }, | |
| "inference_single": { | |
| "batch_size": 1, | |
| "mean_latency_ms": 3.333154039864894, | |
| "std_latency_ms": 0.017885221990080365, | |
| "median_latency_ms": 3.3323254974675365, | |
| "p95_latency_ms": 3.342447954128147, | |
| "p99_latency_ms": 3.4173157204349995, | |
| "min_latency_ms": 3.309674000774976, | |
| "max_latency_ms": 3.4711439948296174, | |
| "latency_per_image_ms": 3.333154039864894, | |
| "throughput_img_per_sec": 300.01613728015224, | |
| "device": "cuda" | |
| }, | |
| "inference_batch32": { | |
| "batch_size": 32, | |
| "mean_latency_ms": 43.753516640135786, | |
| "std_latency_ms": 0.14937650685647402, | |
| "median_latency_ms": 43.77655049756868, | |
| "p95_latency_ms": 43.96002150133427, | |
| "p99_latency_ms": 44.032208782446105, | |
| "min_latency_ms": 43.447225994896144, | |
| "max_latency_ms": 44.07967900624499, | |
| "latency_per_image_ms": 1.3672973950042433, | |
| "throughput_img_per_sec": 731.3697836723345, | |
| "device": "cuda" | |
| } | |
| } | |
| } |