cloud450 commited on
Commit
394c1b4
·
verified ·
1 Parent(s): 112ba22

Update app/utils/groq_client.py

Browse files
Files changed (1) hide show
  1. app/utils/groq_client.py +30 -8
app/utils/groq_client.py CHANGED
@@ -1,37 +1,59 @@
1
  import os
2
  import logging
 
 
3
  from groq import AsyncGroq
4
  from app.utils.key_manager import key_manager
5
 
6
  logger = logging.getLogger(__name__)
7
 
8
 
 
 
 
 
 
 
 
 
9
  async def get_groq_completion(messages: list, model: str = None) -> str:
10
- """
11
- Calls Groq API with automatic key rotation on failure.
12
- Retries across all available keys before raising.
13
- """
14
  if model is None:
15
  model = os.getenv("GROQ_MODEL", "llama3-70b-8192")
16
 
17
- max_retries = max(key_manager.key_count(), 1)
18
  last_error = None
19
 
20
  for attempt in range(max_retries):
21
  try:
22
  api_key = key_manager.get_next_key()
23
  client = AsyncGroq(api_key=api_key)
 
24
  response = await client.chat.completions.create(
25
  messages=messages,
26
  model=model,
27
- temperature=0.2, # Low temp for deterministic structured output
28
- max_tokens=2048,
29
  )
 
30
  return response.choices[0].message.content
31
 
32
  except Exception as e:
 
33
  logger.warning(f"[Groq] Attempt {attempt + 1}/{max_retries} failed: {e}")
34
  last_error = e
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  continue
36
 
37
- raise Exception(f"[Groq] All API keys exhausted. Last error: {last_error}")
 
1
  import os
2
  import logging
3
+ import asyncio
4
+ import re
5
  from groq import AsyncGroq
6
  from app.utils.key_manager import key_manager
7
 
8
  logger = logging.getLogger(__name__)
9
 
10
 
11
+ def extract_wait_time(error_msg: str) -> float:
12
+ """Extract wait time from Groq error message."""
13
+ match = re.search(r"try again in ([\d\.]+)s", error_msg.lower())
14
+ if match:
15
+ return float(match.group(1))
16
+ return 1.0 # fallback
17
+
18
+
19
  async def get_groq_completion(messages: list, model: str = None) -> str:
 
 
 
 
20
  if model is None:
21
  model = os.getenv("GROQ_MODEL", "llama3-70b-8192")
22
 
23
+ max_retries = max(key_manager.key_count(), 3)
24
  last_error = None
25
 
26
  for attempt in range(max_retries):
27
  try:
28
  api_key = key_manager.get_next_key()
29
  client = AsyncGroq(api_key=api_key)
30
+
31
  response = await client.chat.completions.create(
32
  messages=messages,
33
  model=model,
34
+ temperature=0.2,
35
+ max_tokens=800, # ✅ reduced from 2048
36
  )
37
+
38
  return response.choices[0].message.content
39
 
40
  except Exception as e:
41
+ error_msg = str(e).lower()
42
  logger.warning(f"[Groq] Attempt {attempt + 1}/{max_retries} failed: {e}")
43
  last_error = e
44
+
45
+ # ✅ Handle rate limit properly
46
+ if "rate_limit" in error_msg:
47
+ wait_time = extract_wait_time(error_msg)
48
+ wait_time = max(wait_time, 0.5)
49
+
50
+ logger.warning(f"[Groq] Rate limited. Waiting {wait_time:.2f}s...")
51
+ await asyncio.sleep(wait_time)
52
+
53
+ else:
54
+ # small delay for other errors
55
+ await asyncio.sleep(0.5)
56
+
57
  continue
58
 
59
+ raise Exception(f"[Groq] All retries failed. Last error: {last_error}")