Spaces:
Sleeping
Sleeping
| import os | |
| import logging | |
| import asyncio | |
| import re | |
| from groq import AsyncGroq | |
| from app.utils.key_manager import key_manager | |
| logger = logging.getLogger(__name__) | |
| def extract_wait_time(error_msg: str) -> float: | |
| """Extract wait time from Groq error message.""" | |
| match = re.search(r"try again in ([\d\.]+)s", error_msg.lower()) | |
| if match: | |
| return float(match.group(1)) | |
| return 1.0 # fallback | |
| async def get_groq_completion(messages: list, model: str = None) -> str: | |
| if model is None: | |
| model = os.getenv("GROQ_MODEL", "llama3-70b-8192") | |
| max_retries = max(key_manager.key_count(), 3) | |
| last_error = None | |
| for attempt in range(max_retries): | |
| try: | |
| api_key = key_manager.get_next_key() | |
| client = AsyncGroq(api_key=api_key) | |
| response = await client.chat.completions.create( | |
| messages=messages, | |
| model=model, | |
| temperature=0.2, | |
| max_tokens=800, # ✅ reduced from 2048 | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| error_msg = str(e).lower() | |
| logger.warning(f"[Groq] Attempt {attempt + 1}/{max_retries} failed: {e}") | |
| last_error = e | |
| # ✅ Handle rate limit properly | |
| if "rate_limit" in error_msg: | |
| wait_time = extract_wait_time(error_msg) | |
| wait_time = max(wait_time, 0.5) | |
| logger.warning(f"[Groq] Rate limited. Waiting {wait_time:.2f}s...") | |
| await asyncio.sleep(wait_time) | |
| else: | |
| # small delay for other errors | |
| await asyncio.sleep(0.5) | |
| continue | |
| raise Exception(f"[Groq] All retries failed. Last error: {last_error}") |