Spaces:
Running
Running
| # /// script | |
| # dependencies = [ | |
| # "torch", | |
| # "numpy", | |
| # ] | |
| # /// | |
| """Configuration for MoE benchmarks.""" | |
| import torch | |
| # Model configuration | |
| NUM_EXPERTS = 128 | |
| HIDDEN_SIZE = 1152 | |
| TOP_K = 4 | |
| # Benchmark configuration | |
| BATCH_SIZE = 8 | |
| SEQ_LEN = 512 | |
| DTYPE = "bfloat16" | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Seeds for reproducibility | |
| WEIGHT_SEED = 999 | |
| EXPERT_SEED = 777 | |
| INPUT_SEED = 123 | |
| GENERAL_SEED = 42 | |
| print(f"Configuration:") | |
| print(f" Experts: {NUM_EXPERTS}") | |
| print(f" Hidden size: {HIDDEN_SIZE}") | |
| print(f" Top-k: {TOP_K}") | |
| print(f" Batch size: {BATCH_SIZE}") | |
| print(f" Sequence length: {SEQ_LEN}") | |
| print(f" Device: {DEVICE}") | |
| print(f" Dtype: {DTYPE}") |