""" Constants for FlashAttention Explorer Space. Model configurations and GPU specifications. """ # Real HuggingFace models with their configurations MODEL_CONFIGS = { "SmolLM2-360M": { "model_id": "HuggingFaceTB/SmolLM2-360M-Instruct", "layers": 32, "q_heads": 15, "kv_heads": 5, "head_dim": 64, "hidden_dim": 960, "vocab_size": 49152, "description": "Fast demos, scaling curves", "license": "Apache 2.0", "gqa_ratio": 3, # 15/5 = 3 Q heads per KV head }, "Qwen2.5-0.5B": { "model_id": "Qwen/Qwen2.5-0.5B-Instruct", "layers": 24, "q_heads": 14, "kv_heads": 2, "head_dim": 64, "hidden_dim": 896, "vocab_size": 151936, "description": "Prefill/Decode comparison", "license": "Apache 2.0", "gqa_ratio": 7, # 14/2 = 7 Q heads per KV head }, "Llama-3.2-1B": { "model_id": "meta-llama/Llama-3.2-1B-Instruct", "layers": 16, "q_heads": 32, "kv_heads": 8, "head_dim": 64, "hidden_dim": 2048, "vocab_size": 128256, "description": "GQA demonstration", "license": "Llama 3.2", "gqa_ratio": 4, # 32/8 = 4 Q heads per KV head }, } # GPU specifications for roofline analysis GPU_SPECS = { "A10G": { "name": "NVIDIA A10G", "tflops_fp16": 125, "bandwidth_gbps": 600, # GB/s "memory_gb": 24, "sram_kb": 192, # Shared memory per SM }, "A100_80GB": { "name": "NVIDIA A100 (80GB)", "tflops_fp16": 312, "bandwidth_gbps": 2000, "memory_gb": 80, "sram_kb": 192, }, "H100": { "name": "NVIDIA H100 (80GB)", "tflops_fp16": 989, "bandwidth_gbps": 3350, "memory_gb": 80, "sram_kb": 256, }, "H200": { "name": "NVIDIA H200 (141GB)", "tflops_fp16": 989, # Same compute as H100 "bandwidth_gbps": 4800, # HBM3e: 4.8 TB/s "memory_gb": 141, "sram_kb": 256, }, "L40S": { "name": "NVIDIA L40S", "tflops_fp16": 362, "bandwidth_gbps": 864, "memory_gb": 48, "sram_kb": 192, }, "L4": { "name": "NVIDIA L4", "tflops_fp16": 121, "bandwidth_gbps": 300, "memory_gb": 24, "sram_kb": 96, }, } # Default GPU for Zero GPU Spaces DEFAULT_GPU = "A10G" # Attention backend names ATTENTION_BACKENDS = { "math": "Math (Naive)", "flash": "FlashAttention", "mem_efficient": "Memory Efficient", } # Sequence length options for benchmarks SEQ_LENGTH_OPTIONS = [512, 1024, 2048, 4096] # Batch size options BATCH_SIZE_OPTIONS = [1, 2, 4, 8] # Default values DEFAULT_SEQ_LEN = 1024 DEFAULT_BATCH_SIZE = 1 DEFAULT_MODEL = "SmolLM2-360M"