import torch # ─── Model Registry ─────────────────────────────────────────────────────────── # loader_type: # "multimodal" → AutoModelForMultimodalLM + AutoProcessor (Gemma 4) # "vision_causal" → AutoModelForCausalLM + AutoProcessor (Gemma 3 vision) # "causal" → AutoModelForCausalLM + AutoTokenizer (text-only) MODELS = { # ── Gemma 4 ─────────────────────────────────────────────────────────────── "google/gemma-4-E2B-it": { "name": "Gemma 4 E2B", "short": "E2B", "family": "Gemma 4", "family_color": "#1a73e8", "params": "2.3B active / 5.1B total", "params_short": "2.3B", "context": "128K", "context_k": 128, "gpu_size": "large", "supports_vision": True, "loader_type": "multimodal", "torch_dtype": torch.bfloat16, "description": "Most compact Gemma 4. PLE architecture with image support. Fast and efficient.", "release_year": 2026, "license": "Apache 2.0", "license_open": True, "vram": "~10 GB", "lmarena": None, "architecture": "Transformer + PLE", "badge": "NEW", }, "google/gemma-4-E4B-it": { "name": "Gemma 4 E4B", "short": "E4B", "family": "Gemma 4", "family_color": "#1a73e8", "params": "4.5B active / 8B total", "params_short": "4.5B", "context": "128K", "context_k": 128, "gpu_size": "large", "supports_vision": True, "loader_type": "multimodal", "torch_dtype": torch.bfloat16, "description": "Greater capacity with PLE and Shared KV Cache. Image + text. Great balance.", "release_year": 2026, "license": "Apache 2.0", "license_open": True, "vram": "~16 GB", "lmarena": None, "architecture": "Transformer + PLE", "badge": "NEW", }, "google/gemma-4-26B-A4B-it": { "name": "Gemma 4 26B MoE", "short": "26B MoE", "family": "Gemma 4", "family_color": "#1a73e8", "params": "4B active / 26B total", "params_short": "26B MoE", "context": "256K", "context_k": 256, "gpu_size": "large", "supports_vision": True, "loader_type": "multimodal", "torch_dtype": torch.bfloat16, "description": "Mixture-of-Experts with only 4B active parameters. LMArena ~1441. Image + text.", "release_year": 2026, "license": "Apache 2.0", "license_open": True, "vram": "~52 GB", "lmarena": 1441, "architecture": "MoE Transformer", "badge": "NEW", }, "google/gemma-4-31B-it": { "name": "Gemma 4 31B", "short": "31B", "family": "Gemma 4", "family_color": "#1a73e8", "params": "31B parameters", "params_short": "31B", "context": "256K", "context_k": 256, "gpu_size": "xlarge", "supports_vision": True, "loader_type": "multimodal", "torch_dtype": torch.bfloat16, "description": "Most powerful Gemma 4. Dense Transformer. LMArena ~1452. On par with models 30× larger.", "release_year": 2026, "license": "Apache 2.0", "license_open": True, "vram": "~62 GB", "lmarena": 1452, "architecture": "Dense Transformer", "badge": "FLAGSHIP", }, # ── Gemma 3 ─────────────────────────────────────────────────────────────── "google/gemma-3-1b-it": { "name": "Gemma 3 1B", "short": "1B", "family": "Gemma 3", "family_color": "#137333", "params": "1B parameters", "params_short": "1B", "context": "32K", "context_k": 32, "gpu_size": "large", "supports_vision": False, "loader_type": "causal", "torch_dtype": torch.bfloat16, "description": "Ultra-lightweight. Ideal for edge devices and low-latency tasks. Text only.", "release_year": 2025, "license": "Gemma", "license_open": False, "vram": "~2 GB", "lmarena": None, "architecture": "Transformer", "badge": None, }, "google/gemma-3-4b-it": { "name": "Gemma 3 4B", "short": "4B", "family": "Gemma 3", "family_color": "#137333", "params": "4B parameters", "params_short": "4B", "context": "128K", "context_k": 128, "gpu_size": "large", "supports_vision": True, "loader_type": "vision_causal", "torch_dtype": torch.bfloat16, "description": "Perfect balance between size and capability. Image + text. 128K context.", "release_year": 2025, "license": "Gemma", "license_open": False, "vram": "~8 GB", "lmarena": None, "architecture": "Transformer", "badge": None, }, "google/gemma-3-12b-it": { "name": "Gemma 3 12B", "short": "12B", "family": "Gemma 3", "family_color": "#137333", "params": "12B parameters", "params_short": "12B", "context": "128K", "context_k": 128, "gpu_size": "large", "supports_vision": True, "loader_type": "vision_causal", "torch_dtype": torch.bfloat16, "description": "High-capacity multimodal. Complex reasoning and image analysis.", "release_year": 2025, "license": "Gemma", "license_open": False, "vram": "~24 GB", "lmarena": None, "architecture": "Transformer", "badge": None, }, "google/gemma-3-27b-it": { "name": "Gemma 3 27B", "short": "27B", "family": "Gemma 3", "family_color": "#137333", "params": "27B parameters", "params_short": "27B", "context": "128K", "context_k": 128, "gpu_size": "large", "supports_vision": True, "loader_type": "vision_causal", "torch_dtype": torch.bfloat16, "description": "Most capable Gemma 3. Advanced vision and high-level reasoning.", "release_year": 2025, "license": "Gemma", "license_open": False, "vram": "~54 GB", "lmarena": None, "architecture": "Transformer", "badge": None, }, # ── Gemma 2 ─────────────────────────────────────────────────────────────── "google/gemma-2-2b-it": { "name": "Gemma 2 2B", "short": "2B", "family": "Gemma 2", "family_color": "#e37400", "params": "2B parameters", "params_short": "2B", "context": "8K", "context_k": 8, "gpu_size": "large", "supports_vision": False, "loader_type": "causal", "torch_dtype": torch.bfloat16, "description": "Fast and efficient. Sliding Window Attention. Text only.", "release_year": 2024, "license": "Gemma", "license_open": False, "vram": "~4 GB", "lmarena": None, "architecture": "Sliding Window Attn", "badge": None, }, "google/gemma-2-9b-it": { "name": "Gemma 2 9B", "short": "9B", "family": "Gemma 2", "family_color": "#e37400", "params": "9B parameters", "params_short": "9B", "context": "8K", "context_k": 8, "gpu_size": "large", "supports_vision": False, "loader_type": "causal", "torch_dtype": torch.bfloat16, "description": "Solid text performance. Efficient architecture with sliding window.", "release_year": 2024, "license": "Gemma", "license_open": False, "vram": "~18 GB", "lmarena": None, "architecture": "Sliding Window Attn", "badge": None, }, "google/gemma-2-27b-it": { "name": "Gemma 2 27B", "short": "27B", "family": "Gemma 2", "family_color": "#e37400", "params": "27B parameters", "params_short": "27B", "context": "8K", "context_k": 8, "gpu_size": "large", "supports_vision": False, "loader_type": "causal", "torch_dtype": torch.bfloat16, "description": "Largest Gemma 2. High performance on complex text tasks.", "release_year": 2024, "license": "Gemma", "license_open": False, "vram": "~54 GB", "lmarena": None, "architecture": "Sliding Window Attn", "badge": None, }, # ── Gemma 1 ─────────────────────────────────────────────────────────────── "google/gemma-1.1-2b-it": { "name": "Gemma 1.1 2B", "short": "2B", "family": "Gemma 1", "family_color": "#c5221f", "params": "2B parameters", "params_short": "2B", "context": "8K", "context_k": 8, "gpu_size": "large", "supports_vision": False, "loader_type": "causal", "torch_dtype": torch.float16, "description": "The original foundation model. Where it all began. Text only.", "release_year": 2024, "license": "Gemma", "license_open": False, "vram": "~4 GB", "lmarena": None, "architecture": "Transformer", "badge": None, }, "google/gemma-1.1-7b-it": { "name": "Gemma 1.1 7B", "short": "7B", "family": "Gemma 1", "family_color": "#c5221f", "params": "7B parameters", "params_short": "7B", "context": "8K", "context_k": 8, "gpu_size": "large", "supports_vision": False, "loader_type": "causal", "torch_dtype": torch.float16, "description": "The original 7B. The historical base of the entire Gemma family.", "release_year": 2024, "license": "Gemma", "license_open": False, "vram": "~14 GB", "lmarena": None, "architecture": "Transformer", "badge": None, }, } FAMILIES = { "Gemma 4": { "color": "#1a73e8", "bg": "#e8f0fe", "year": 2026, "description": "The newest generation. Full multimodal (image + text). Apache 2.0. Just launched!", "icon": "✦", "new": True, }, "Gemma 3": { "color": "#137333", "bg": "#e6f4ea", "year": 2025, "description": "Second generation with vision. Long contexts up to 128K tokens.", "icon": "◆", "new": False, }, "Gemma 2": { "color": "#e37400", "bg": "#fef7e0", "year": 2024, "description": "Optimized for text with Sliding Window Attention. Efficient and fast.", "icon": "●", "new": False, }, "Gemma 1": { "color": "#c5221f", "bg": "#fce8e6", "year": 2024, "description": "The original foundation models from Google DeepMind.", "icon": "◉", "new": False, }, } def get_models_by_family(family: str): return {k: v for k, v in MODELS.items() if v["family"] == family}