{ "benchmark": "multi_gpu", "timestamp": "2026-03-19T12:21:06.000529+00:00", "n_gpus_available": 4, "gpu_names": [ "NVIDIA L4", "NVIDIA L4", "NVIDIA L4", "NVIDIA L4" ], "inference": { "gpu_1": { "n_gpus": 1, "batch_results": { "batch_1": { "p50_ms": 128.19, "p95_ms": 130.86, "mean_ms": 128.42, "fps": 7.8, "per_sample_ms": 128.42 }, "batch_4": { "p50_ms": 430.18, "p95_ms": 440.71, "mean_ms": 429.76, "fps": 9.3, "per_sample_ms": 107.44 }, "batch_8": { "p50_ms": 853.99, "p95_ms": 873.4, "mean_ms": 857.72, "fps": 9.3, "per_sample_ms": 107.21 }, "batch_16": { "p50_ms": 1734.01, "p95_ms": 1759.55, "mean_ms": 1727.97, "fps": 9.3, "per_sample_ms": 108.0 } }, "memory": { "gpu_0_allocated_gb": 3.65, "gpu_0_reserved_gb": 5.07 } }, "gpu_2": { "n_gpus": 2, "batch_results": { "batch_1": { "p50_ms": 162.23, "p95_ms": 168.32, "mean_ms": 164.65, "fps": 6.1, "per_sample_ms": 164.65 }, "batch_4": { "p50_ms": 611.69, "p95_ms": 613.66, "mean_ms": 611.27, "fps": 6.5, "per_sample_ms": 152.82 }, "batch_8": { "p50_ms": 799.07, "p95_ms": 802.8, "mean_ms": 799.2, "fps": 10.0, "per_sample_ms": 99.9 }, "batch_16": { "p50_ms": 1185.3, "p95_ms": 1190.2, "mean_ms": 1184.72, "fps": 13.5, "per_sample_ms": 74.04 } }, "memory": { "gpu_0_allocated_gb": 3.66, "gpu_0_reserved_gb": 4.47, "gpu_1_allocated_gb": 0.01, "gpu_1_reserved_gb": 4.47 } }, "gpu_4": { "n_gpus": 4, "batch_results": { "batch_1": { "p50_ms": 164.7, "p95_ms": 171.49, "mean_ms": 167.37, "fps": 6.0, "per_sample_ms": 167.37 }, "batch_4": { "p50_ms": 913.4, "p95_ms": 915.7, "mean_ms": 912.74, "fps": 4.4, "per_sample_ms": 228.19 }, "batch_8": { "p50_ms": 1003.53, "p95_ms": 1007.34, "mean_ms": 1002.41, "fps": 8.0, "per_sample_ms": 125.3 }, "batch_16": { "p50_ms": 1178.17, "p95_ms": 1182.78, "mean_ms": 1178.2, "fps": 13.6, "per_sample_ms": 73.64 } }, "memory": { "gpu_0_allocated_gb": 3.66, "gpu_0_reserved_gb": 4.49, "gpu_1_allocated_gb": 0.01, "gpu_1_reserved_gb": 4.16, "gpu_2_allocated_gb": 0.01, "gpu_2_reserved_gb": 4.16, "gpu_3_allocated_gb": 0.01, "gpu_3_reserved_gb": 4.16 } } }, "training": { "gpu_1": { "n_gpus": 1, "batch_size": 2, "n_steps": 30, "step_time_mean_ms": 432.4, "steps_per_sec": 2.31, "samples_per_sec": 4.63, "loss_start": 4.0196, "loss_end": 1.7553, "loss_reduction_pct": 56.3, "memory": { "gpu_0_peak_gb": 9.0 } }, "gpu_2": { "n_gpus": 2, "batch_size": 4, "n_steps": 30, "step_time_mean_ms": 1264.8, "steps_per_sec": 0.79, "samples_per_sec": 3.16, "loss_start": 1.3165, "loss_end": 1.4857, "loss_reduction_pct": -12.9, "memory": { "gpu_0_peak_gb": 14.59, "gpu_1_peak_gb": 4.07 } }, "gpu_4": { "n_gpus": 4, "batch_size": 8, "n_steps": 30, "step_time_mean_ms": 2005.1, "steps_per_sec": 0.5, "samples_per_sec": 3.99, "loss_start": 6.7918, "loss_end": 1.182, "loss_reduction_pct": 82.6, "memory": { "gpu_0_peak_gb": 14.6, "gpu_1_peak_gb": 4.07, "gpu_2_peak_gb": 4.07, "gpu_3_peak_gb": 4.07 } } }, "fp16": { "fp16_gpu_1": { "n_gpus": 1, "precision": "fp16", "batch_results": { "batch_4": { "p50_ms": 122.14, "fps": 32.7, "per_sample_ms": 30.56 }, "batch_8": { "p50_ms": 234.11, "fps": 34.2, "per_sample_ms": 29.26 }, "batch_16": { "p50_ms": 486.22, "fps": 32.9, "per_sample_ms": 30.43 }, "batch_32": { "p50_ms": 950.24, "fps": 33.6, "per_sample_ms": 29.73 } } }, "fp16_gpu_4": { "n_gpus": 4, "precision": "fp16", "batch_results": { "batch_4": { "p50_ms": 901.49, "fps": 4.4, "per_sample_ms": 225.52 }, "batch_8": { "p50_ms": 903.67, "fps": 8.8, "per_sample_ms": 113.02 }, "batch_16": { "p50_ms": 911.97, "fps": 17.5, "per_sample_ms": 57.07 }, "batch_32": { "p50_ms": 1013.11, "fps": 31.6, "per_sample_ms": 31.67 } } } } }