import argparse import os from typing import Any import requests from dotenv import load_dotenv load_dotenv() GROQ_MODELS_URL = "https://api.groq.com/openai/v1/models" # Models selected from Groq's rate-limit table for this workload: relevant # chat/reasoning models with at least 1K requests per day. PREFERRED_PRODUCTION_CHAT_MODELS = [ "llama-3.3-70b-versatile", "openai/gpt-oss-120b", "qwen/qwen3-32b", "llama-3.1-8b-instant", "openai/gpt-oss-20b", "meta-llama/llama-4-scout-17b-16e-instruct", "meta-llama/llama-prompt-guard-2-22m", "meta-llama/llama-prompt-guard-2-86m" ] EXCLUDED_MODEL_TERMS = [ "whisper", "tts", "audio", "moderation", "embed", "compound", "safeguard" ] PREFERRED_MODEL_IDS = {model_id.lower() for model_id in PREFERRED_PRODUCTION_CHAT_MODELS} def fetch_groq_models(api_key: str) -> list[dict[str, Any]]: """Fetch the current Groq model catalog using the OpenAI-compatible API.""" headers = { "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", } response = requests.get(GROQ_MODELS_URL, headers=headers, timeout=30) response.raise_for_status() return response.json().get("data", []) def is_active_chat_model(model: dict[str, Any]) -> bool: """Keep only active preferred chat models that are suitable for judging.""" model_id = str(model.get("id", "")).lower() if not model_id: return False if model.get("active") is False: return False if model_id not in PREFERRED_MODEL_IDS: return False return not any(term in model_id for term in EXCLUDED_MODEL_TERMS) def rank_model(model: dict[str, Any]) -> tuple[int, int, int, str]: """Sort models by preferred production order, then by recency/capacity.""" model_id = str(model.get("id", "")) model_id_lower = model_id.lower() try: preferred_rank = [item.lower() for item in PREFERRED_PRODUCTION_CHAT_MODELS].index(model_id_lower) except ValueError: preferred_rank = len(PREFERRED_PRODUCTION_CHAT_MODELS) created = int(model.get("created") or 0) context_window = int(model.get("context_window") or 0) # Lower tuple wins. Known production models come first; unknown active chat # models then prefer newer creation dates and larger context windows. return (preferred_rank, -created, -context_window, model_id) def select_groq_chat_models(limit: int = len(PREFERRED_PRODUCTION_CHAT_MODELS), strategy: str = "stable") -> list[str]: """Return a comma-ready fallback list for GROQ_MODEL.""" api_key = os.getenv("GROQ_API_KEY") if not api_key: raise RuntimeError("GROQ_API_KEY is missing. Add it to .env first.") models = fetch_groq_models(api_key) chat_models = [model for model in models if is_active_chat_model(model)] if strategy == "latest": ranked_models = sorted( chat_models, key=lambda model: ( -int(model.get("created") or 0), -int(model.get("context_window") or 0), str(model.get("id", "")), ), ) else: ranked_models = sorted(chat_models, key=rank_model) return [model["id"] for model in ranked_models[:limit]] def main() -> None: """CLI entry point used when refreshing the recommended Groq model list.""" parser = argparse.ArgumentParser(description="Select currently available Groq chat models.") parser.add_argument("--limit", type=int, default=len(PREFERRED_PRODUCTION_CHAT_MODELS), help="Number of fallback models to print.") parser.add_argument( "--strategy", choices=["stable", "latest"], default="stable", help="stable prefers known production chat models; latest picks newest active chat models.", ) args = parser.parse_args() models = select_groq_chat_models(limit=args.limit, strategy=args.strategy) if not models: raise RuntimeError("No active Groq chat models were returned by the Models API.") print("Recommended GROQ_MODEL value:") print(f"GROQ_MODEL={','.join(models)}") print("\nSelected fallback order:") for index, model_id in enumerate(models, start=1): print(f"{index}. {model_id}") if __name__ == "__main__": main()