Pulastya B commited on
Commit
3e672a1
Β·
1 Parent(s): 7c2ff18

Fix TPM rate limiting: Add automatic retry with delay parsing

Browse files

- Catch 429 TPM errors and automatically retry after delay
- Parse retry delay from Groq error message (e.g., 'retry in 45s')
- Default to 60s wait for TPM limits
- Distinguish TPM (retry) from TPD (fail immediately)
- Explain rolling window: previous requests in 60s window still count

Root cause: Container starts with tokens_this_minute=0, but Groq uses account-wide rolling window. Previous requests from any session count toward limit.

Solution: Graceful retry instead of failing on first request.

Files changed (1) hide show
  1. src/orchestrator.py +47 -12
src/orchestrator.py CHANGED
@@ -1690,20 +1690,55 @@ You are a DOER. Complete workflows based on user intent."""
1690
  # Check if it's a rate limit error (429)
1691
  error_str = str(groq_error)
1692
  if "rate_limit" in error_str.lower() or "429" in error_str:
1693
- # Detailed rate limit error
1694
- if "tokens per day" in error_str or "TPD" in error_str:
1695
- print(f"❌ GROQ DAILY TOKEN LIMIT EXHAUSTED (100K tokens/day)")
1696
- print(f" Your daily quota resets in a few hours")
1697
- print(f" Error: {error_str[:300]}")
 
 
 
1698
  elif "tokens per minute" in error_str or "TPM" in error_str:
1699
- print(f"❌ GROQ TOKENS PER MINUTE LIMIT (12K tokens/min)")
1700
- print(f" Wait 60 seconds and try again")
1701
- print(f" Error: {error_str[:300]}")
1702
- else:
1703
- print(f"❌ GROQ RATE LIMIT")
1704
- print(f" Error: {error_str[:300]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1705
 
1706
- raise ValueError(f"Groq rate limit exceeded. Please wait and try again.\n{error_str[:500]}")
 
 
1707
  else:
1708
  # Not a rate limit error, re-raise
1709
  raise
 
1690
  # Check if it's a rate limit error (429)
1691
  error_str = str(groq_error)
1692
  if "rate_limit" in error_str.lower() or "429" in error_str:
1693
+ # Parse retry delay from error message if available
1694
+ retry_delay = 60 # Default to 60s for TPM limit
1695
+
1696
+ # Try to extract retry delay from error
1697
+ import re
1698
+ delay_match = re.search(r'retry.*?(\d+).*?second', error_str, re.IGNORECASE)
1699
+ if delay_match:
1700
+ retry_delay = int(delay_match.group(1))
1701
  elif "tokens per minute" in error_str or "TPM" in error_str:
1702
+ retry_delay = 60
1703
+ elif "tokens per day" in error_str or "TPD" in error_str:
1704
+ # Daily limit - give up immediately
1705
+ print(f"❌ GROQ DAILY TOKEN LIMIT EXHAUSTED (100K tokens/day)")
1706
+ print(f" Your daily quota resets at UTC midnight")
1707
+ print(f" Error: {error_str[:400]}")
1708
+ raise ValueError(f"Groq daily quota exhausted. Please wait for reset.\n{error_str[:500]}")
1709
+
1710
+ # TPM limit - wait and retry
1711
+ print(f"⚠️ GROQ TPM RATE LIMIT (rolling 60s window)")
1712
+ print(f" Groq uses account-wide rolling window - previous requests still count")
1713
+ print(f" Waiting {retry_delay}s and retrying...")
1714
+ print(f" Error: {error_str[:300]}")
1715
+
1716
+ time.sleep(retry_delay)
1717
+
1718
+ # Retry the request
1719
+ print(f"πŸ”„ Retrying after {retry_delay}s delay...")
1720
+ response = self.groq_client.chat.completions.create(
1721
+ model=self.model,
1722
+ messages=messages,
1723
+ tools=tools_to_use,
1724
+ tool_choice="auto",
1725
+ parallel_tool_calls=False,
1726
+ temperature=0.1,
1727
+ max_tokens=4096
1728
+ )
1729
+
1730
+ self.api_calls_made += 1
1731
+ self.last_api_call_time = time.time()
1732
+
1733
+ # Track tokens used
1734
+ if hasattr(response, 'usage') and response.usage:
1735
+ tokens_used = response.usage.total_tokens
1736
+ self.tokens_this_minute += tokens_used
1737
+ print(f"πŸ“Š Tokens: {tokens_used} this call | {self.tokens_this_minute}/{self.tpm_limit} this minute")
1738
 
1739
+ response_message = response.choices[0].message
1740
+ tool_calls = response_message.tool_calls
1741
+ final_content = response_message.content
1742
  else:
1743
  # Not a rate limit error, re-raise
1744
  raise