{ "benchmarked_devices": [ "cuda" ], "parameters": { "total_params": 20200546, "trainable_params": 18378970, "total_params_m": 20.200546, "trainable_params_m": 18.37897 }, "cuda": { "memory": { "baseline_memory_mb": 314.5068359375, "peak_memory_mb": 321.7763671875, "current_memory_mb": 315.08154296875, "inference_memory_mb": 7.26953125, "device": "cuda" }, "inference_single": { "batch_size": 1, "mean_latency_ms": 11.141735980272642, "std_latency_ms": 0.04346183998720415, "median_latency_ms": 11.13254650044837, "p95_latency_ms": 11.162551251254627, "p99_latency_ms": 11.36283751031442, "min_latency_ms": 11.105517995019909, "max_latency_ms": 11.442682000051718, "latency_per_image_ms": 11.141735980272642, "throughput_img_per_sec": 89.7526203969096, "device": "cuda" }, "inference_batch32": { "batch_size": 32, "mean_latency_ms": 41.95240021988866, "std_latency_ms": 0.12925345881619055, "median_latency_ms": 41.9302680020337, "p95_latency_ms": 41.98958034867246, "p99_latency_ms": 42.48272620359785, "min_latency_ms": 41.89117100031581, "max_latency_ms": 42.82661800243659, "latency_per_image_ms": 1.3110125068715206, "throughput_img_per_sec": 762.7692297049917, "device": "cuda" } } }