""" Priority Models List - 70 high-signal models for initial registry This list is used programmatically by the ingestion pipeline """ # Tier 1: Frontier Closed Models FRONTIER_CLOSED = { "OpenAI": [ "GPT-3", "GPT-3.5", "GPT-4", "GPT-4 Turbo", "GPT-4o", "GPT-4.1", "GPT-4.1 Preview", "GPT-4.1 Mini", "o1", "o3", ], "Anthropic": [ "Claude 1", "Claude 2", "Claude 2.1", "Claude 3 Haiku", "Claude 3 Sonnet", "Claude 3 Opus", "Claude 3.5 Haiku", "Claude 3.5 Sonnet", "Claude 3.5 Opus", ], "Google DeepMind": [ "PaLM", "PaLM-2", "Gemini 1.0 Nano", "Gemini 1.0 Pro", "Gemini 1.0 Ultra", "Gemini 1.5 Flash", "Gemini 1.5 Pro", "Gemini 1.5 Ultra", "Gemini 2.0", "Gemini Next", ], } # Tier 1B: Major Open-Weight Models OPEN_WEIGHT = { "Meta": [ "Llama-1-7B", "Llama-1-13B", "Llama-1-30B", "Llama-1-65B", "Llama-2-7B", "Llama-2-13B", "Llama-2-70B", "Llama-3-8B", "Llama-3-70B", "Llama-3.1-8B", "Llama-3.1-70B", "Llama-3.1-405B", ], "Mistral AI": [ "Mistral-7B", "Mixtral-8x7B", "Mixtral-8x22B", "Mistral Nemo", "Mistral Small", "Mistral Medium", "Mistral Large", ], "xAI": [ "Grok-1", "Grok-1.5", "Grok-1.5 Vision", "Grok-2", ], } # Tier 2: Chinese Frontier Labs CHINESE_FRONTIER = { "Alibaba / Qwen": [ "Qwen-1", "Qwen-1.5", "Qwen-2-7B", "Qwen-2-57B", "Qwen-2-70B", "Qwen-2.5", "Qwen-VL", ], "DeepSeek": [ "DeepSeek LLM", "DeepSeek V2", "DeepSeek V3", "DeepSeek Coder", ], "Baidu / ERNIE": [ "ERNIE 3.0", "ERNIE 4.0", "ERNIE 4.0 Turbo", ], "SenseTime": [ "SenseNova 5.0", ], "Other Chinese": [ "Baichuan-2-7B", "Baichuan-2-13B", "Baichuan-3", "Yi-34B", "Yi-1.5", ], } # Tier 3: Regional Open Models REGIONAL_OPEN = { "Middle East": [ "Falcon-7B", "Falcon-40B", "Falcon-180B", ], "Korea": [ "Exaone-2.0", ], "Japan": [ "NICT LLM", "Sakana", ], "EU / UK": [ "BLOOM-560B", "BLOOMZ", "T5-XXL", "OPT-175B", "Gopher", "Chinchilla", "U-PALM", ], } def get_all_priority_models() -> list[dict]: """ Returns a flat list of all priority models with provider information Returns: List of dicts with 'model_id', 'provider', 'tier', 'family' keys """ models = [] # Tier 1: Frontier Closed for provider, model_list in FRONTIER_CLOSED.items(): for model in model_list: models.append({ "model_id": model, "provider": provider, "tier": "Tier 1: Frontier Closed", "family": _extract_family(model, provider), }) # Tier 1B: Open Weight for provider, model_list in OPEN_WEIGHT.items(): for model in model_list: models.append({ "model_id": model, "provider": provider, "tier": "Tier 1B: Open Weight", "family": _extract_family(model, provider), }) # Tier 2: Chinese Frontier for provider, model_list in CHINESE_FRONTIER.items(): for model in model_list: models.append({ "model_id": model, "provider": provider, "tier": "Tier 2: Chinese Frontier", "family": _extract_family(model, provider), }) # Tier 3: Regional Open for provider, model_list in REGIONAL_OPEN.items(): for model in model_list: models.append({ "model_id": model, "provider": provider, "tier": "Tier 3: Regional Open", "family": _extract_family(model, provider), }) return models def _extract_family(model_id: str, provider: str) -> str: """Extract model family from model ID""" # GPT family if "GPT" in model_id: if "GPT-4" in model_id: return "GPT-4" elif "GPT-3" in model_id: return "GPT-3" return "GPT" # Claude family if "Claude" in model_id: if "3.5" in model_id: return "Claude 3.5" elif "3" in model_id: return "Claude 3" elif "2" in model_id: return "Claude 2" return "Claude" # Gemini family if "Gemini" in model_id: if "2.0" in model_id or "Next" in model_id: return "Gemini 2.0" elif "1.5" in model_id: return "Gemini 1.5" return "Gemini 1.0" # Llama family if "Llama" in model_id: if "3.1" in model_id: return "Llama 3.1" elif "3" in model_id: return "Llama 3" elif "2" in model_id: return "Llama 2" return "Llama 1" # Qwen family if "Qwen" in model_id: if "2.5" in model_id: return "Qwen 2.5" elif "2" in model_id: return "Qwen 2" elif "1.5" in model_id: return "Qwen 1.5" return "Qwen 1" # Mistral/Mixtral if "Mixtral" in model_id: return "Mixtral" if "Mistral" in model_id: return "Mistral" # Grok if "Grok" in model_id: return "Grok" # DeepSeek if "DeepSeek" in model_id: return "DeepSeek" # ERNIE if "ERNIE" in model_id: return "ERNIE" # Falcon if "Falcon" in model_id: return "Falcon" # Default: use provider as family return provider def get_model_count() -> int: """Get total count of priority models""" return len(get_all_priority_models()) if __name__ == "__main__": models = get_all_priority_models() print(f"Total priority models: {len(models)}") print("\nBreakdown by tier:") from collections import Counter tier_counts = Counter(m["tier"] for m in models) for tier, count in tier_counts.items(): print(f" {tier}: {count}") print("\nBreakdown by provider:") provider_counts = Counter(m["provider"] for m in models) for provider, count in provider_counts.most_common(): print(f" {provider}: {count}")