"""Minimal Groq client for benchmark-only LLM baselines.""" from __future__ import annotations import json import time from dataclasses import dataclass from typing import cast import httpx from tenacity import retry, retry_if_exception, stop_after_attempt, wait_fixed def _is_rate_limit_error(exc: BaseException) -> bool: """Return whether an exception is a Groq 429 response.""" return isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 429 @dataclass(frozen=True, kw_only=True) class GroqCompletion: """Completion payload plus conservative usage accounting.""" text: str prompt_tokens: int completion_tokens: int warnings: tuple[str, ...] class GroqBenchClient: """Sequential Groq client with fixed 429 retry and spacing.""" def __init__( self, *, api_key: str, model: str = "llama-3.3-70b-versatile", min_interval_s: float = 2.0, ) -> None: self._api_key = api_key self._model = model self._min_interval_s = min_interval_s self._last_success_at: float | None = None @property def model(self) -> str: """Return the configured Groq model name.""" return self._model def _respect_spacing(self) -> None: """Sleep long enough to keep requests sequential with a fixed gap.""" if self._last_success_at is None: return elapsed = time.monotonic() - self._last_success_at remaining = self._min_interval_s - elapsed if remaining > 0: time.sleep(remaining) @retry( retry=retry_if_exception(_is_rate_limit_error), wait=wait_fixed(2), stop=stop_after_attempt(3), reraise=True, ) def _post(self, messages: list[dict[str, str]]) -> dict[str, object]: """Issue the underlying Groq chat-completions request.""" payload = { "model": self._model, "messages": messages, "temperature": 0.0, } with httpx.Client(timeout=60.0) as client: response = client.post( "https://api.groq.com/openai/v1/chat/completions", json=payload, headers={ "Authorization": f"Bearer {self._api_key}", "Content-Type": "application/json", }, ) response.raise_for_status() return dict(response.json()) def complete(self, messages: list[dict[str, str]]) -> GroqCompletion: """Send one benchmark completion request to Groq.""" self._respect_spacing() payload = self._post(messages) self._last_success_at = time.monotonic() warnings: list[str] = [] usage = payload.get("usage", {}) prompt_tokens = int(usage.get("prompt_tokens", 0)) if isinstance(usage, dict) else 0 completion_tokens = int(usage.get("completion_tokens", 0)) if isinstance(usage, dict) else 0 if not usage: warnings.append("missing_usage_payload") try: choices = cast(list[dict[str, object]], payload["choices"]) message = cast(dict[str, object], choices[0]["message"]) content = str(message["content"]) except (KeyError, IndexError, TypeError) as exc: raise ValueError(f"Unexpected Groq response payload: {json.dumps(payload)}") from exc return GroqCompletion( text=content, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, warnings=tuple(warnings), )