{ "benchmarked_devices": [ "cuda" ], "parameters": { "total_params": 27833970, "trainable_params": 14304786, "total_params_m": 27.83397, "trainable_params_m": 14.304786 }, "cuda": { "memory": { "baseline_memory_mb": 288.01806640625, "peak_memory_mb": 302.52978515625, "current_memory_mb": 288.5927734375, "inference_memory_mb": 14.51171875, "device": "cuda" }, "inference_single": { "batch_size": 1, "mean_latency_ms": 3.333154039864894, "std_latency_ms": 0.017885221990080365, "median_latency_ms": 3.3323254974675365, "p95_latency_ms": 3.342447954128147, "p99_latency_ms": 3.4173157204349995, "min_latency_ms": 3.309674000774976, "max_latency_ms": 3.4711439948296174, "latency_per_image_ms": 3.333154039864894, "throughput_img_per_sec": 300.01613728015224, "device": "cuda" }, "inference_batch32": { "batch_size": 32, "mean_latency_ms": 43.753516640135786, "std_latency_ms": 0.14937650685647402, "median_latency_ms": 43.77655049756868, "p95_latency_ms": 43.96002150133427, "p99_latency_ms": 44.032208782446105, "min_latency_ms": 43.447225994896144, "max_latency_ms": 44.07967900624499, "latency_per_image_ms": 1.3672973950042433, "throughput_img_per_sec": 731.3697836723345, "device": "cuda" } } }