""" Model Configuration Registry Defines metadata for all supported code generation models """ from typing import Dict, List, Optional, TypedDict from dataclasses import dataclass class ModelConfig(TypedDict): """Configuration metadata for a model""" hf_path: str display_name: str architecture: str size: str num_layers: int num_heads: int num_kv_heads: Optional[int] # For GQA models vocab_size: int context_length: int attention_type: str # "multi_head" or "grouped_query" requires_gpu: bool min_vram_gb: float min_ram_gb: float # Supported models registry SUPPORTED_MODELS: Dict[str, ModelConfig] = { "codegen-350m": { "hf_path": "Salesforce/codegen-350M-mono", "display_name": "CodeGen 350M", "architecture": "gpt_neox", "size": "350M", "num_layers": 20, "num_heads": 16, "num_kv_heads": None, # Standard MHA "vocab_size": 51200, "context_length": 2048, "attention_type": "multi_head", "requires_gpu": False, "min_vram_gb": 2.0, "min_ram_gb": 4.0 }, "code-llama-7b": { "hf_path": "codellama/CodeLlama-7b-hf", "display_name": "Code Llama 7B", "architecture": "llama", "size": "7B", "num_layers": 32, "num_heads": 32, "num_kv_heads": 32, # GQA: 32 Q heads, 32 KV heads "vocab_size": 32000, "context_length": 16384, "attention_type": "grouped_query", "requires_gpu": True, # Strongly recommended for usable performance "min_vram_gb": 14.0, # FP16 requires ~14GB VRAM "min_ram_gb": 18.0 # FP16 requires ~18GB RAM for CPU fallback } } def get_model_config(model_id: str) -> Optional[ModelConfig]: """ Get configuration for a specific model Args: model_id: Model identifier (e.g., "codegen-350m") Returns: ModelConfig dict or None if model not found """ return SUPPORTED_MODELS.get(model_id) def get_available_models(device_type: str = "cpu", available_vram_gb: float = 0) -> List[str]: """ Filter models by hardware constraints Args: device_type: "cpu", "cuda", or "mps" available_vram_gb: Available VRAM in GB (0 for CPU) Returns: List of model IDs that can run on the hardware """ available = [] for model_id, config in SUPPORTED_MODELS.items(): # Check if GPU is required but not available if config["requires_gpu"] and device_type == "cpu": continue # Check VRAM requirements if device_type in ["cuda", "mps"] and available_vram_gb > 0: if available_vram_gb < config["min_vram_gb"]: continue available.append(model_id) return available def list_all_models() -> List[Dict[str, any]]: """ List all supported models with their metadata Returns: List of model info dicts """ models = [] for model_id, config in SUPPORTED_MODELS.items(): models.append({ "id": model_id, "name": config["display_name"], "size": config["size"], "architecture": config["architecture"], "attention_type": config["attention_type"], "num_layers": config["num_layers"], "num_heads": config["num_heads"], "requires_gpu": config["requires_gpu"] }) return models