File size: 4,310 Bytes
dc06d4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6a3f44
dc06d4c
 
 
 
 
 
 
 
 
 
c6a3f44
dc06d4c
 
 
 
 
 
 
 
 
 
 
c6a3f44
dc06d4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6a3f44
dc06d4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6a3f44
dc06d4c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import argparse
import os
from typing import Any

import requests
from dotenv import load_dotenv


load_dotenv()

GROQ_MODELS_URL = "https://api.groq.com/openai/v1/models"

# Models selected from Groq's rate-limit table for this workload: relevant
# chat/reasoning models with at least 1K requests per day.
PREFERRED_PRODUCTION_CHAT_MODELS = [
    "llama-3.3-70b-versatile",
    "openai/gpt-oss-120b",
    "qwen/qwen3-32b",
    "llama-3.1-8b-instant",
    "openai/gpt-oss-20b",
    "meta-llama/llama-4-scout-17b-16e-instruct",
    "meta-llama/llama-prompt-guard-2-22m",
    "meta-llama/llama-prompt-guard-2-86m"
]

EXCLUDED_MODEL_TERMS = [
    "whisper",
    "tts",
    "audio",
    "moderation",
    "embed",
    "compound",
    "safeguard"
]

PREFERRED_MODEL_IDS = {model_id.lower() for model_id in PREFERRED_PRODUCTION_CHAT_MODELS}


def fetch_groq_models(api_key: str) -> list[dict[str, Any]]:
    """Fetch the current Groq model catalog using the OpenAI-compatible API."""
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
    }
    response = requests.get(GROQ_MODELS_URL, headers=headers, timeout=30)
    response.raise_for_status()
    return response.json().get("data", [])


def is_active_chat_model(model: dict[str, Any]) -> bool:
    """Keep only active preferred chat models that are suitable for judging."""
    model_id = str(model.get("id", "")).lower()
    if not model_id:
        return False
    if model.get("active") is False:
        return False
    if model_id not in PREFERRED_MODEL_IDS:
        return False
    return not any(term in model_id for term in EXCLUDED_MODEL_TERMS)


def rank_model(model: dict[str, Any]) -> tuple[int, int, int, str]:
    """Sort models by preferred production order, then by recency/capacity."""
    model_id = str(model.get("id", ""))
    model_id_lower = model_id.lower()

    try:
        preferred_rank = [item.lower() for item in PREFERRED_PRODUCTION_CHAT_MODELS].index(model_id_lower)
    except ValueError:
        preferred_rank = len(PREFERRED_PRODUCTION_CHAT_MODELS)

    created = int(model.get("created") or 0)
    context_window = int(model.get("context_window") or 0)

    # Lower tuple wins. Known production models come first; unknown active chat
    # models then prefer newer creation dates and larger context windows.
    return (preferred_rank, -created, -context_window, model_id)


def select_groq_chat_models(limit: int = len(PREFERRED_PRODUCTION_CHAT_MODELS), strategy: str = "stable") -> list[str]:
    """Return a comma-ready fallback list for GROQ_MODEL."""
    api_key = os.getenv("GROQ_API_KEY")
    if not api_key:
        raise RuntimeError("GROQ_API_KEY is missing. Add it to .env first.")

    models = fetch_groq_models(api_key)
    chat_models = [model for model in models if is_active_chat_model(model)]

    if strategy == "latest":
        ranked_models = sorted(
            chat_models,
            key=lambda model: (
                -int(model.get("created") or 0),
                -int(model.get("context_window") or 0),
                str(model.get("id", "")),
            ),
        )
    else:
        ranked_models = sorted(chat_models, key=rank_model)

    return [model["id"] for model in ranked_models[:limit]]


def main() -> None:
    """CLI entry point used when refreshing the recommended Groq model list."""
    parser = argparse.ArgumentParser(description="Select currently available Groq chat models.")
    parser.add_argument("--limit", type=int, default=len(PREFERRED_PRODUCTION_CHAT_MODELS), help="Number of fallback models to print.")
    parser.add_argument(
        "--strategy",
        choices=["stable", "latest"],
        default="stable",
        help="stable prefers known production chat models; latest picks newest active chat models.",
    )
    args = parser.parse_args()

    models = select_groq_chat_models(limit=args.limit, strategy=args.strategy)
    if not models:
        raise RuntimeError("No active Groq chat models were returned by the Models API.")

    print("Recommended GROQ_MODEL value:")
    print(f"GROQ_MODEL={','.join(models)}")
    print("\nSelected fallback order:")
    for index, model_id in enumerate(models, start=1):
        print(f"{index}. {model_id}")


if __name__ == "__main__":
    main()