{ "benchmarked_devices": [ "cuda" ], "parameters": { "total_params": 4030606, "trainable_params": 3178798, "total_params_m": 4.030606, "trainable_params_m": 3.178798 }, "cuda": { "memory": { "baseline_memory_mb": 70.193359375, "peak_memory_mb": 80.720703125, "current_memory_mb": 70.76806640625, "inference_memory_mb": 10.52734375, "device": "cuda" }, "inference_single": { "batch_size": 1, "mean_latency_ms": 5.136066239720094, "std_latency_ms": 0.008697872240662149, "median_latency_ms": 5.135121497005457, "p95_latency_ms": 5.151860694604693, "p99_latency_ms": 5.156857259207754, "min_latency_ms": 5.121371003042441, "max_latency_ms": 5.1598530044429936, "latency_per_image_ms": 5.136066239720094, "throughput_img_per_sec": 194.70153875089005, "device": "cuda" }, "inference_batch32": { "batch_size": 32, "mean_latency_ms": 18.34433564043138, "std_latency_ms": 0.22169971671972608, "median_latency_ms": 18.217552998976316, "p95_latency_ms": 18.770105749717914, "p99_latency_ms": 18.77734032314038, "min_latency_ms": 18.15599100518739, "max_latency_ms": 18.78215800388716, "latency_per_image_ms": 0.5732604887634807, "throughput_img_per_sec": 1744.407681326501, "device": "cuda" } } }