Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| """ | |
| Model Configuration Registry | |
| Defines metadata for all supported code generation models | |
| """ | |
| from typing import Dict, List, Optional, TypedDict | |
| from dataclasses import dataclass | |
| class ModelConfig(TypedDict): | |
| """Configuration metadata for a model""" | |
| hf_path: str | |
| display_name: str | |
| architecture: str | |
| size: str | |
| num_layers: int | |
| num_heads: int | |
| num_kv_heads: Optional[int] # For GQA models | |
| vocab_size: int | |
| context_length: int | |
| attention_type: str # "multi_head" or "grouped_query" | |
| requires_gpu: bool | |
| min_vram_gb: float | |
| min_ram_gb: float | |
| # Supported models registry | |
| SUPPORTED_MODELS: Dict[str, ModelConfig] = { | |
| "codegen-350m": { | |
| "hf_path": "Salesforce/codegen-350M-mono", | |
| "display_name": "CodeGen 350M", | |
| "architecture": "gpt_neox", | |
| "size": "350M", | |
| "num_layers": 20, | |
| "num_heads": 16, | |
| "num_kv_heads": None, # Standard MHA | |
| "vocab_size": 51200, | |
| "context_length": 2048, | |
| "attention_type": "multi_head", | |
| "requires_gpu": False, | |
| "min_vram_gb": 2.0, | |
| "min_ram_gb": 4.0 | |
| }, | |
| "code-llama-7b": { | |
| "hf_path": "codellama/CodeLlama-7b-hf", | |
| "display_name": "Code Llama 7B", | |
| "architecture": "llama", | |
| "size": "7B", | |
| "num_layers": 32, | |
| "num_heads": 32, | |
| "num_kv_heads": 32, # GQA: 32 Q heads, 32 KV heads | |
| "vocab_size": 32000, | |
| "context_length": 16384, | |
| "attention_type": "grouped_query", | |
| "requires_gpu": True, # Strongly recommended for usable performance | |
| "min_vram_gb": 14.0, # FP16 requires ~14GB VRAM | |
| "min_ram_gb": 18.0 # FP16 requires ~18GB RAM for CPU fallback | |
| } | |
| } | |
| def get_model_config(model_id: str) -> Optional[ModelConfig]: | |
| """ | |
| Get configuration for a specific model | |
| Args: | |
| model_id: Model identifier (e.g., "codegen-350m") | |
| Returns: | |
| ModelConfig dict or None if model not found | |
| """ | |
| return SUPPORTED_MODELS.get(model_id) | |
| def get_available_models(device_type: str = "cpu", available_vram_gb: float = 0) -> List[str]: | |
| """ | |
| Filter models by hardware constraints | |
| Args: | |
| device_type: "cpu", "cuda", or "mps" | |
| available_vram_gb: Available VRAM in GB (0 for CPU) | |
| Returns: | |
| List of model IDs that can run on the hardware | |
| """ | |
| available = [] | |
| for model_id, config in SUPPORTED_MODELS.items(): | |
| # Check if GPU is required but not available | |
| if config["requires_gpu"] and device_type == "cpu": | |
| continue | |
| # Check VRAM requirements | |
| if device_type in ["cuda", "mps"] and available_vram_gb > 0: | |
| if available_vram_gb < config["min_vram_gb"]: | |
| continue | |
| available.append(model_id) | |
| return available | |
| def list_all_models() -> List[Dict[str, any]]: | |
| """ | |
| List all supported models with their metadata | |
| Returns: | |
| List of model info dicts | |
| """ | |
| models = [] | |
| for model_id, config in SUPPORTED_MODELS.items(): | |
| models.append({ | |
| "id": model_id, | |
| "name": config["display_name"], | |
| "size": config["size"], | |
| "architecture": config["architecture"], | |
| "attention_type": config["attention_type"], | |
| "num_layers": config["num_layers"], | |
| "num_heads": config["num_heads"], | |
| "requires_gpu": config["requires_gpu"] | |
| }) | |
| return models | |