Spaces:
Running
Running
| import argparse | |
| import os | |
| from typing import Any | |
| import requests | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| GROQ_MODELS_URL = "https://api.groq.com/openai/v1/models" | |
| # Models selected from Groq's rate-limit table for this workload: relevant | |
| # chat/reasoning models with at least 1K requests per day. | |
| PREFERRED_PRODUCTION_CHAT_MODELS = [ | |
| "llama-3.3-70b-versatile", | |
| "openai/gpt-oss-120b", | |
| "qwen/qwen3-32b", | |
| "llama-3.1-8b-instant", | |
| "openai/gpt-oss-20b", | |
| "meta-llama/llama-4-scout-17b-16e-instruct", | |
| "meta-llama/llama-prompt-guard-2-22m", | |
| "meta-llama/llama-prompt-guard-2-86m" | |
| ] | |
| EXCLUDED_MODEL_TERMS = [ | |
| "whisper", | |
| "tts", | |
| "audio", | |
| "moderation", | |
| "embed", | |
| "compound", | |
| "safeguard" | |
| ] | |
| PREFERRED_MODEL_IDS = {model_id.lower() for model_id in PREFERRED_PRODUCTION_CHAT_MODELS} | |
| def fetch_groq_models(api_key: str) -> list[dict[str, Any]]: | |
| """Fetch the current Groq model catalog using the OpenAI-compatible API.""" | |
| headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json", | |
| } | |
| response = requests.get(GROQ_MODELS_URL, headers=headers, timeout=30) | |
| response.raise_for_status() | |
| return response.json().get("data", []) | |
| def is_active_chat_model(model: dict[str, Any]) -> bool: | |
| """Keep only active preferred chat models that are suitable for judging.""" | |
| model_id = str(model.get("id", "")).lower() | |
| if not model_id: | |
| return False | |
| if model.get("active") is False: | |
| return False | |
| if model_id not in PREFERRED_MODEL_IDS: | |
| return False | |
| return not any(term in model_id for term in EXCLUDED_MODEL_TERMS) | |
| def rank_model(model: dict[str, Any]) -> tuple[int, int, int, str]: | |
| """Sort models by preferred production order, then by recency/capacity.""" | |
| model_id = str(model.get("id", "")) | |
| model_id_lower = model_id.lower() | |
| try: | |
| preferred_rank = [item.lower() for item in PREFERRED_PRODUCTION_CHAT_MODELS].index(model_id_lower) | |
| except ValueError: | |
| preferred_rank = len(PREFERRED_PRODUCTION_CHAT_MODELS) | |
| created = int(model.get("created") or 0) | |
| context_window = int(model.get("context_window") or 0) | |
| # Lower tuple wins. Known production models come first; unknown active chat | |
| # models then prefer newer creation dates and larger context windows. | |
| return (preferred_rank, -created, -context_window, model_id) | |
| def select_groq_chat_models(limit: int = len(PREFERRED_PRODUCTION_CHAT_MODELS), strategy: str = "stable") -> list[str]: | |
| """Return a comma-ready fallback list for GROQ_MODEL.""" | |
| api_key = os.getenv("GROQ_API_KEY") | |
| if not api_key: | |
| raise RuntimeError("GROQ_API_KEY is missing. Add it to .env first.") | |
| models = fetch_groq_models(api_key) | |
| chat_models = [model for model in models if is_active_chat_model(model)] | |
| if strategy == "latest": | |
| ranked_models = sorted( | |
| chat_models, | |
| key=lambda model: ( | |
| -int(model.get("created") or 0), | |
| -int(model.get("context_window") or 0), | |
| str(model.get("id", "")), | |
| ), | |
| ) | |
| else: | |
| ranked_models = sorted(chat_models, key=rank_model) | |
| return [model["id"] for model in ranked_models[:limit]] | |
| def main() -> None: | |
| """CLI entry point used when refreshing the recommended Groq model list.""" | |
| parser = argparse.ArgumentParser(description="Select currently available Groq chat models.") | |
| parser.add_argument("--limit", type=int, default=len(PREFERRED_PRODUCTION_CHAT_MODELS), help="Number of fallback models to print.") | |
| parser.add_argument( | |
| "--strategy", | |
| choices=["stable", "latest"], | |
| default="stable", | |
| help="stable prefers known production chat models; latest picks newest active chat models.", | |
| ) | |
| args = parser.parse_args() | |
| models = select_groq_chat_models(limit=args.limit, strategy=args.strategy) | |
| if not models: | |
| raise RuntimeError("No active Groq chat models were returned by the Models API.") | |
| print("Recommended GROQ_MODEL value:") | |
| print(f"GROQ_MODEL={','.join(models)}") | |
| print("\nSelected fallback order:") | |
| for index, model_id in enumerate(models, start=1): | |
| print(f"{index}. {model_id}") | |
| if __name__ == "__main__": | |
| main() | |