Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 4,303 Bytes
ed40a9a 62525b2 ed40a9a 62525b2 ed40a9a 62525b2 9080f28 62525b2 ed40a9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
"""
Model Configuration Registry
Defines metadata for all supported code generation models
"""
from typing import Dict, List, Optional, TypedDict
from dataclasses import dataclass
class ModelConfig(TypedDict):
"""Configuration metadata for a model"""
hf_path: str
display_name: str
architecture: str
size: str
num_layers: int
num_heads: int
num_kv_heads: Optional[int] # For GQA models
vocab_size: int
context_length: int
attention_type: str # "multi_head" or "grouped_query"
requires_gpu: bool
min_vram_gb: float
min_ram_gb: float
recommended_dtype: str # "fp16", "bf16", or "fp32"
# Supported models registry
SUPPORTED_MODELS: Dict[str, ModelConfig] = {
"codegen-350m": {
"hf_path": "Salesforce/codegen-350M-mono",
"display_name": "CodeGen 350M",
"architecture": "gpt_neox",
"size": "350M",
"num_layers": 20,
"num_heads": 16,
"num_kv_heads": None, # Standard MHA
"vocab_size": 51200,
"context_length": 2048,
"attention_type": "multi_head",
"requires_gpu": False,
"min_vram_gb": 2.0,
"min_ram_gb": 4.0,
"recommended_dtype": "fp16" # fp16 for GPU, fp32 for CPU
},
"code-llama-7b": {
"hf_path": "codellama/CodeLlama-7b-hf",
"display_name": "Code Llama 7B",
"architecture": "llama",
"size": "7B",
"num_layers": 32,
"num_heads": 32,
"num_kv_heads": 32, # GQA: 32 Q heads, 32 KV heads
"vocab_size": 32000,
"context_length": 16384,
"attention_type": "grouped_query",
"requires_gpu": True, # Strongly recommended for usable performance
"min_vram_gb": 14.0, # FP16 requires ~14GB VRAM
"min_ram_gb": 18.0, # FP16 requires ~18GB RAM for CPU fallback
"recommended_dtype": "fp16"
},
"devstral-small": {
"hf_path": "mistralai/Devstral-Small-2507",
"display_name": "Devstral Small 24B",
"architecture": "mistral",
"size": "24B",
"num_layers": 40,
"num_heads": 32,
"num_kv_heads": 8, # GQA: 32 Q heads, 8 KV heads
"vocab_size": 131072,
"context_length": 131072,
"attention_type": "grouped_query",
"requires_gpu": True, # BF16 required, GPU strongly recommended
"min_vram_gb": 48.0, # BF16 requires ~48GB VRAM
"min_ram_gb": 96.0, # BF16 requires ~96GB RAM for CPU fallback
"recommended_dtype": "bf16" # Devstral requires bfloat16
}
}
def get_model_config(model_id: str) -> Optional[ModelConfig]:
"""
Get configuration for a specific model
Args:
model_id: Model identifier (e.g., "codegen-350m")
Returns:
ModelConfig dict or None if model not found
"""
return SUPPORTED_MODELS.get(model_id)
def get_available_models(device_type: str = "cpu", available_vram_gb: float = 0) -> List[str]:
"""
Filter models by hardware constraints
Args:
device_type: "cpu", "cuda", or "mps"
available_vram_gb: Available VRAM in GB (0 for CPU)
Returns:
List of model IDs that can run on the hardware
"""
available = []
for model_id, config in SUPPORTED_MODELS.items():
# Check if GPU is required but not available
if config["requires_gpu"] and device_type == "cpu":
continue
# Check VRAM requirements
if device_type in ["cuda", "mps"] and available_vram_gb > 0:
if available_vram_gb < config["min_vram_gb"]:
continue
available.append(model_id)
return available
def list_all_models() -> List[Dict[str, any]]:
"""
List all supported models with their metadata
Returns:
List of model info dicts
"""
models = []
for model_id, config in SUPPORTED_MODELS.items():
models.append({
"id": model_id,
"name": config["display_name"],
"size": config["size"],
"architecture": config["architecture"],
"attention_type": config["attention_type"],
"num_layers": config["num_layers"],
"num_heads": config["num_heads"],
"requires_gpu": config["requires_gpu"]
})
return models
|