"""
Constants for FlashAttention Explorer Space.
Model configurations and GPU specifications.
"""

# Real HuggingFace models with their configurations
MODEL_CONFIGS = {
    "SmolLM2-360M": {
        "model_id": "HuggingFaceTB/SmolLM2-360M-Instruct",
        "layers": 32,
        "q_heads": 15,
        "kv_heads": 5,
        "head_dim": 64,
        "hidden_dim": 960,
        "vocab_size": 49152,
        "description": "Fast demos, scaling curves",
        "license": "Apache 2.0",
        "gqa_ratio": 3,  # 15/5 = 3 Q heads per KV head
    },
    "Qwen2.5-0.5B": {
        "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
        "layers": 24,
        "q_heads": 14,
        "kv_heads": 2,
        "head_dim": 64,
        "hidden_dim": 896,
        "vocab_size": 151936,
        "description": "Prefill/Decode comparison",
        "license": "Apache 2.0",
        "gqa_ratio": 7,  # 14/2 = 7 Q heads per KV head
    },
    "Llama-3.2-1B": {
        "model_id": "meta-llama/Llama-3.2-1B-Instruct",
        "layers": 16,
        "q_heads": 32,
        "kv_heads": 8,
        "head_dim": 64,
        "hidden_dim": 2048,
        "vocab_size": 128256,
        "description": "GQA demonstration",
        "license": "Llama 3.2",
        "gqa_ratio": 4,  # 32/8 = 4 Q heads per KV head
    },
}

# GPU specifications for roofline analysis
GPU_SPECS = {
    "A10G": {
        "name": "NVIDIA A10G",
        "tflops_fp16": 125,
        "bandwidth_gbps": 600,  # GB/s
        "memory_gb": 24,
        "sram_kb": 192,  # Shared memory per SM
    },
    "A100_80GB": {
        "name": "NVIDIA A100 (80GB)",
        "tflops_fp16": 312,
        "bandwidth_gbps": 2000,
        "memory_gb": 80,
        "sram_kb": 192,
    },
    "H100": {
        "name": "NVIDIA H100 (80GB)",
        "tflops_fp16": 989,
        "bandwidth_gbps": 3350,
        "memory_gb": 80,
        "sram_kb": 256,
    },
    "H200": {
        "name": "NVIDIA H200 (141GB)",
        "tflops_fp16": 989,  # Same compute as H100
        "bandwidth_gbps": 4800,  # HBM3e: 4.8 TB/s
        "memory_gb": 141,
        "sram_kb": 256,
    },
    "L40S": {
        "name": "NVIDIA L40S",
        "tflops_fp16": 362,
        "bandwidth_gbps": 864,
        "memory_gb": 48,
        "sram_kb": 192,
    },
    "L4": {
        "name": "NVIDIA L4",
        "tflops_fp16": 121,
        "bandwidth_gbps": 300,
        "memory_gb": 24,
        "sram_kb": 96,
    },
}

# Default GPU for Zero GPU Spaces
DEFAULT_GPU = "A10G"

# Attention backend names
ATTENTION_BACKENDS = {
    "math": "Math (Naive)",
    "flash": "FlashAttention",
    "mem_efficient": "Memory Efficient",
}

# Sequence length options for benchmarks
SEQ_LENGTH_OPTIONS = [512, 1024, 2048, 4096]

# Batch size options
BATCH_SIZE_OPTIONS = [1, 2, 4, 8]

# Default values
DEFAULT_SEQ_LEN = 1024
DEFAULT_BATCH_SIZE = 1
DEFAULT_MODEL = "SmolLM2-360M"