drbh's picture
drbh HF Staff
Upload folder using huggingface_hub
bad4ddc verified
raw
history blame contribute delete
687 Bytes
# /// script
# dependencies = [
# "torch",
# "numpy",
# ]
# ///
"""Configuration for MoE benchmarks."""
import torch
# Model configuration
NUM_EXPERTS = 128
HIDDEN_SIZE = 1152
TOP_K = 4
# Benchmark configuration
BATCH_SIZE = 8
SEQ_LEN = 512
DTYPE = "bfloat16"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# Seeds for reproducibility
WEIGHT_SEED = 999
EXPERT_SEED = 777
INPUT_SEED = 123
GENERAL_SEED = 42
print(f"Configuration:")
print(f" Experts: {NUM_EXPERTS}")
print(f" Hidden size: {HIDDEN_SIZE}")
print(f" Top-k: {TOP_K}")
print(f" Batch size: {BATCH_SIZE}")
print(f" Sequence length: {SEQ_LEN}")
print(f" Device: {DEVICE}")
print(f" Dtype: {DTYPE}")