Spaces:
Sleeping
Sleeping
File size: 1,790 Bytes
ba18317 394c1b4 ba18317 394c1b4 ba18317 394c1b4 ba18317 394c1b4 ba18317 394c1b4 ba18317 394c1b4 ba18317 394c1b4 ba18317 394c1b4 ba18317 394c1b4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | import os
import logging
import asyncio
import re
from groq import AsyncGroq
from app.utils.key_manager import key_manager
logger = logging.getLogger(__name__)
def extract_wait_time(error_msg: str) -> float:
"""Extract wait time from Groq error message."""
match = re.search(r"try again in ([\d\.]+)s", error_msg.lower())
if match:
return float(match.group(1))
return 1.0 # fallback
async def get_groq_completion(messages: list, model: str = None) -> str:
if model is None:
model = os.getenv("GROQ_MODEL", "llama3-70b-8192")
max_retries = max(key_manager.key_count(), 3)
last_error = None
for attempt in range(max_retries):
try:
api_key = key_manager.get_next_key()
client = AsyncGroq(api_key=api_key)
response = await client.chat.completions.create(
messages=messages,
model=model,
temperature=0.2,
max_tokens=800, # ✅ reduced from 2048
)
return response.choices[0].message.content
except Exception as e:
error_msg = str(e).lower()
logger.warning(f"[Groq] Attempt {attempt + 1}/{max_retries} failed: {e}")
last_error = e
# ✅ Handle rate limit properly
if "rate_limit" in error_msg:
wait_time = extract_wait_time(error_msg)
wait_time = max(wait_time, 0.5)
logger.warning(f"[Groq] Rate limited. Waiting {wait_time:.2f}s...")
await asyncio.sleep(wait_time)
else:
# small delay for other errors
await asyncio.sleep(0.5)
continue
raise Exception(f"[Groq] All retries failed. Last error: {last_error}") |