kernels-benchmarks
/
moe_benchmarks
/megablocks_yamoe
/artifacts
/megablocks_run
/megablocks_results.json
| { | |
| "implementation": "megablocks_results", | |
| "config": { | |
| "warmup": 10, | |
| "iters": 50, | |
| "device": "cuda", | |
| "dtype": "torch.float32", | |
| "tokens": 100, | |
| "vary_inputs": true | |
| }, | |
| "stats": { | |
| "avg_ms": 3.8478457200017147, | |
| "min_ms": 0.8121239999354657, | |
| "max_ms": 8.535666000057063, | |
| "std_ms": 3.697659288553723, | |
| "p50_ms": 0.8394504999955643, | |
| "p95_ms": 8.499624499950187, | |
| "p99_ms": 8.528520820026415, | |
| "num_iters": 50, | |
| "tokens_per_s": 25988.567961595778, | |
| "throughput_variance": 53035.39729321811 | |
| }, | |
| "output_sum": 6.4738850593566895 | |
| } |