Spaces:
Sleeping
Sleeping
File size: 3,470 Bytes
ed40a9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
"""
Model Configuration Registry
Defines metadata for all supported code generation models
"""
from typing import Dict, List, Optional, TypedDict
from dataclasses import dataclass
class ModelConfig(TypedDict):
"""Configuration metadata for a model"""
hf_path: str
display_name: str
architecture: str
size: str
num_layers: int
num_heads: int
num_kv_heads: Optional[int] # For GQA models
vocab_size: int
context_length: int
attention_type: str # "multi_head" or "grouped_query"
requires_gpu: bool
min_vram_gb: float
min_ram_gb: float
# Supported models registry
SUPPORTED_MODELS: Dict[str, ModelConfig] = {
"codegen-350m": {
"hf_path": "Salesforce/codegen-350M-mono",
"display_name": "CodeGen 350M",
"architecture": "gpt_neox",
"size": "350M",
"num_layers": 20,
"num_heads": 16,
"num_kv_heads": None, # Standard MHA
"vocab_size": 51200,
"context_length": 2048,
"attention_type": "multi_head",
"requires_gpu": False,
"min_vram_gb": 2.0,
"min_ram_gb": 4.0
},
"code-llama-7b": {
"hf_path": "codellama/CodeLlama-7b-hf",
"display_name": "Code Llama 7B",
"architecture": "llama",
"size": "7B",
"num_layers": 32,
"num_heads": 32,
"num_kv_heads": 32, # GQA: 32 Q heads, 32 KV heads
"vocab_size": 32000,
"context_length": 16384,
"attention_type": "grouped_query",
"requires_gpu": True, # Strongly recommended for usable performance
"min_vram_gb": 14.0, # FP16 requires ~14GB VRAM
"min_ram_gb": 18.0 # FP16 requires ~18GB RAM for CPU fallback
}
}
def get_model_config(model_id: str) -> Optional[ModelConfig]:
"""
Get configuration for a specific model
Args:
model_id: Model identifier (e.g., "codegen-350m")
Returns:
ModelConfig dict or None if model not found
"""
return SUPPORTED_MODELS.get(model_id)
def get_available_models(device_type: str = "cpu", available_vram_gb: float = 0) -> List[str]:
"""
Filter models by hardware constraints
Args:
device_type: "cpu", "cuda", or "mps"
available_vram_gb: Available VRAM in GB (0 for CPU)
Returns:
List of model IDs that can run on the hardware
"""
available = []
for model_id, config in SUPPORTED_MODELS.items():
# Check if GPU is required but not available
if config["requires_gpu"] and device_type == "cpu":
continue
# Check VRAM requirements
if device_type in ["cuda", "mps"] and available_vram_gb > 0:
if available_vram_gb < config["min_vram_gb"]:
continue
available.append(model_id)
return available
def list_all_models() -> List[Dict[str, any]]:
"""
List all supported models with their metadata
Returns:
List of model info dicts
"""
models = []
for model_id, config in SUPPORTED_MODELS.items():
models.append({
"id": model_id,
"name": config["display_name"],
"size": config["size"],
"architecture": config["architecture"],
"attention_type": config["attention_type"],
"num_layers": config["num_layers"],
"num_heads": config["num_heads"],
"requires_gpu": config["requires_gpu"]
})
return models
|