Spaces:
Running
Running
File size: 4,310 Bytes
dc06d4c c6a3f44 dc06d4c c6a3f44 dc06d4c c6a3f44 dc06d4c c6a3f44 dc06d4c c6a3f44 dc06d4c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | import argparse
import os
from typing import Any
import requests
from dotenv import load_dotenv
load_dotenv()
GROQ_MODELS_URL = "https://api.groq.com/openai/v1/models"
# Models selected from Groq's rate-limit table for this workload: relevant
# chat/reasoning models with at least 1K requests per day.
PREFERRED_PRODUCTION_CHAT_MODELS = [
"llama-3.3-70b-versatile",
"openai/gpt-oss-120b",
"qwen/qwen3-32b",
"llama-3.1-8b-instant",
"openai/gpt-oss-20b",
"meta-llama/llama-4-scout-17b-16e-instruct",
"meta-llama/llama-prompt-guard-2-22m",
"meta-llama/llama-prompt-guard-2-86m"
]
EXCLUDED_MODEL_TERMS = [
"whisper",
"tts",
"audio",
"moderation",
"embed",
"compound",
"safeguard"
]
PREFERRED_MODEL_IDS = {model_id.lower() for model_id in PREFERRED_PRODUCTION_CHAT_MODELS}
def fetch_groq_models(api_key: str) -> list[dict[str, Any]]:
"""Fetch the current Groq model catalog using the OpenAI-compatible API."""
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
response = requests.get(GROQ_MODELS_URL, headers=headers, timeout=30)
response.raise_for_status()
return response.json().get("data", [])
def is_active_chat_model(model: dict[str, Any]) -> bool:
"""Keep only active preferred chat models that are suitable for judging."""
model_id = str(model.get("id", "")).lower()
if not model_id:
return False
if model.get("active") is False:
return False
if model_id not in PREFERRED_MODEL_IDS:
return False
return not any(term in model_id for term in EXCLUDED_MODEL_TERMS)
def rank_model(model: dict[str, Any]) -> tuple[int, int, int, str]:
"""Sort models by preferred production order, then by recency/capacity."""
model_id = str(model.get("id", ""))
model_id_lower = model_id.lower()
try:
preferred_rank = [item.lower() for item in PREFERRED_PRODUCTION_CHAT_MODELS].index(model_id_lower)
except ValueError:
preferred_rank = len(PREFERRED_PRODUCTION_CHAT_MODELS)
created = int(model.get("created") or 0)
context_window = int(model.get("context_window") or 0)
# Lower tuple wins. Known production models come first; unknown active chat
# models then prefer newer creation dates and larger context windows.
return (preferred_rank, -created, -context_window, model_id)
def select_groq_chat_models(limit: int = len(PREFERRED_PRODUCTION_CHAT_MODELS), strategy: str = "stable") -> list[str]:
"""Return a comma-ready fallback list for GROQ_MODEL."""
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
raise RuntimeError("GROQ_API_KEY is missing. Add it to .env first.")
models = fetch_groq_models(api_key)
chat_models = [model for model in models if is_active_chat_model(model)]
if strategy == "latest":
ranked_models = sorted(
chat_models,
key=lambda model: (
-int(model.get("created") or 0),
-int(model.get("context_window") or 0),
str(model.get("id", "")),
),
)
else:
ranked_models = sorted(chat_models, key=rank_model)
return [model["id"] for model in ranked_models[:limit]]
def main() -> None:
"""CLI entry point used when refreshing the recommended Groq model list."""
parser = argparse.ArgumentParser(description="Select currently available Groq chat models.")
parser.add_argument("--limit", type=int, default=len(PREFERRED_PRODUCTION_CHAT_MODELS), help="Number of fallback models to print.")
parser.add_argument(
"--strategy",
choices=["stable", "latest"],
default="stable",
help="stable prefers known production chat models; latest picks newest active chat models.",
)
args = parser.parse_args()
models = select_groq_chat_models(limit=args.limit, strategy=args.strategy)
if not models:
raise RuntimeError("No active Groq chat models were returned by the Models API.")
print("Recommended GROQ_MODEL value:")
print(f"GROQ_MODEL={','.join(models)}")
print("\nSelected fallback order:")
for index, model_id in enumerate(models, start=1):
print(f"{index}. {model_id}")
if __name__ == "__main__":
main()
|