import os import logging import asyncio import re from groq import AsyncGroq from app.utils.key_manager import key_manager logger = logging.getLogger(__name__) def extract_wait_time(error_msg: str) -> float: """Extract wait time from Groq error message.""" match = re.search(r"try again in ([\d\.]+)s", error_msg.lower()) if match: return float(match.group(1)) return 1.0 # fallback async def get_groq_completion(messages: list, model: str = None) -> str: if model is None: model = os.getenv("GROQ_MODEL", "llama3-70b-8192") max_retries = max(key_manager.key_count(), 3) last_error = None for attempt in range(max_retries): try: api_key = key_manager.get_next_key() client = AsyncGroq(api_key=api_key) response = await client.chat.completions.create( messages=messages, model=model, temperature=0.2, max_tokens=800, # ✅ reduced from 2048 ) return response.choices[0].message.content except Exception as e: error_msg = str(e).lower() logger.warning(f"[Groq] Attempt {attempt + 1}/{max_retries} failed: {e}") last_error = e # ✅ Handle rate limit properly if "rate_limit" in error_msg: wait_time = extract_wait_time(error_msg) wait_time = max(wait_time, 0.5) logger.warning(f"[Groq] Rate limited. Waiting {wait_time:.2f}s...") await asyncio.sleep(wait_time) else: # small delay for other errors await asyncio.sleep(0.5) continue raise Exception(f"[Groq] All retries failed. Last error: {last_error}")