{ "implementation": "binned_results", "config": { "warmup": 10, "iters": 50, "device": "cuda", "dtype": "torch.float32", "tokens": 100 }, "stats": { "avg_ms": 105.61800455325283, "min_ms": 103.41683897422627, "max_ms": 107.80877695651725, "std_ms": 1.4579030267805855, "p50_ms": 105.04751797998324, "p95_ms": 107.72936256835237, "p99_ms": 107.78988109494094, "num_iters": 50, "tokens_per_s": 946.8082683722715, "throughput_variance": 13.040579048845922 }, "output_sum": -0.5972483158111572 }