Pulastya B commited on
Commit
7c2ff18
·
1 Parent(s): 523d661

Remove Gemini fallback and improve rate limit error messages

Browse files

- Removed automatic Gemini fallback (was causing more errors than helping)
- Added detailed rate limit detection:
* Daily token limit (TPD): 100K tokens/day
* Per-minute limit (TPM): 12K tokens/minute
* Request limit (RPM): 30 requests/minute
- Clear error messages explaining which limit was hit
- Raise ValueError immediately instead of trying fallback

Groq free tier daily limit exhausts quickly with testing. Users should wait for reset instead of cascading to Gemini which has even lower limits (5 RPM).

Files changed (1) hide show
  1. src/orchestrator.py +15 -131
src/orchestrator.py CHANGED
@@ -1688,141 +1688,25 @@ You are a DOER. Complete workflows based on user intent."""
1688
 
1689
  except Exception as groq_error:
1690
  # Check if it's a rate limit error (429)
1691
- if "rate_limit" in str(groq_error).lower() or "429" in str(groq_error):
1692
- print(f"⚠️ Groq rate limit exceeded! Automatically switching to Gemini...")
1693
- print(f" Groq error: {str(groq_error)[:200]}")
1694
-
1695
- # Switch to Gemini fallback
1696
- if not hasattr(self, 'gemini_model') or self.gemini_model is None:
1697
- # Initialize Gemini if not already done
1698
- import google.generativeai as genai
1699
- api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
1700
- if not api_key:
1701
- raise ValueError("Groq exhausted and no Gemini API key available for fallback")
1702
-
1703
- genai.configure(api_key=api_key)
1704
- gemini_model_name = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
1705
-
1706
- # Safety settings
1707
- safety_settings = [
1708
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
1709
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
1710
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
1711
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}
1712
- ]
1713
-
1714
- self.gemini_model = genai.GenerativeModel(
1715
- model_name=gemini_model_name,
1716
- safety_settings=safety_settings
1717
- )
1718
- print(f" ✅ Gemini fallback initialized: {gemini_model_name}")
1719
-
1720
- # Switch provider for this session
1721
- self.provider = "gemini"
1722
- self.use_compact_prompts = False # Gemini has large context
1723
- gemini_chat = self.gemini_model.start_chat(history=[])
1724
- print(f" 🔄 Now using Gemini for remaining workflow")
1725
 
1726
- # Make the Gemini API call immediately
1727
- try:
1728
- if iteration == 1:
1729
- # First iteration: send system + user message
1730
- combined_message = f"{messages[0]['content']}\n\n{messages[1]['content']}"
1731
- response = gemini_chat.send_message(combined_message)
1732
- else:
1733
- # Subsequent iterations: send tool results as plain text
1734
- last_tool_msg = messages[-1]
1735
- if last_tool_msg.get("role") == "tool":
1736
- # Format tool result as text for Gemini
1737
- result_message = f"Tool '{last_tool_msg['name']}' executed successfully.\n\nResult:\n{last_tool_msg['content']}\n\nWhat's the next step?"
1738
- response = gemini_chat.send_message(result_message)
1739
- else:
1740
- # Fallback
1741
- response = gemini_chat.send_message("Continue with the next step.")
1742
-
1743
- self.api_calls_made += 1
1744
- self.last_api_call_time = time.time()
1745
-
1746
- # Extract tool calls from Gemini TEXT response
1747
- tool_calls = []
1748
- if response.candidates and response.candidates[0].content.parts:
1749
- for part in response.candidates[0].content.parts:
1750
- if hasattr(part, 'text') and part.text:
1751
- text_response = part.text
1752
- final_content = text_response
1753
-
1754
- # Parse tool calls from text
1755
- parsed_calls = self._parse_text_tool_calls(text_response)
1756
- if parsed_calls:
1757
- for call in parsed_calls:
1758
- tool_call_obj = type('ToolCall', (), {
1759
- 'id': call['id'],
1760
- 'name': call['function']['name'],
1761
- 'args': json.loads(call['function']['arguments']) if isinstance(call['function']['arguments'], str) else call['function']['arguments']
1762
- })()
1763
- tool_calls.append(tool_call_obj)
1764
- except Exception as gemini_error:
1765
- # If Gemini also fails, log and continue with empty response
1766
- print(f"⚠️ Gemini fallback also failed: {str(gemini_error)[:200]}")
1767
- final_content = "Analysis interrupted due to API errors."
1768
- tool_calls = []
1769
  else:
1770
  # Not a rate limit error, re-raise
1771
  raise
1772
-
1773
- elif self.provider == "gemini":
1774
- # Send messages WITHOUT tools parameter (tools already configured on model)
1775
- try:
1776
- if iteration == 1:
1777
- # First iteration: send system + user message
1778
- combined_message = f"{messages[0]['content']}\n\n{messages[1]['content']}"
1779
- response = gemini_chat.send_message(combined_message)
1780
- else:
1781
- # Subsequent iterations: send tool results as plain text
1782
- last_tool_msg = messages[-1]
1783
- if last_tool_msg.get("role") == "tool":
1784
- # Format tool result as text for Gemini
1785
- result_message = f"Tool '{last_tool_msg['name']}' executed successfully.\n\nResult:\n{last_tool_msg['content']}\n\nWhat's the next step?"
1786
- response = gemini_chat.send_message(result_message)
1787
- else:
1788
- # Fallback
1789
- response = gemini_chat.send_message("Continue with the next step.")
1790
- except Exception as gemini_error:
1791
- # Handle StopCandidateException (finish_reason: 12 = blocked/filtered)
1792
- error_str = str(gemini_error)
1793
- if "finish_reason" in error_str or "StopCandidateException" in str(type(gemini_error)):
1794
- print(f"⚠️ Gemini response blocked (safety filter/content policy). Retrying with simplified prompt...")
1795
- # Retry with a much shorter message
1796
- simplified_msg = "Please provide the next step in data analysis using available tools."
1797
- response = gemini_chat.send_message(simplified_msg)
1798
- else:
1799
- raise
1800
-
1801
- self.api_calls_made += 1
1802
- self.last_api_call_time = time.time()
1803
-
1804
- # Extract tool calls from Gemini TEXT response (text-based tool calling)
1805
- tool_calls = []
1806
- final_content = None
1807
-
1808
- if response.candidates and response.candidates[0].content.parts:
1809
- for part in response.candidates[0].content.parts:
1810
- if hasattr(part, 'text') and part.text:
1811
- text_response = part.text
1812
- final_content = text_response
1813
-
1814
- # Parse tool calls from text using JSON blocks or function syntax
1815
- parsed_calls = self._parse_text_tool_calls(text_response)
1816
- if parsed_calls:
1817
- # Convert to tool_call objects matching Gemini expected format
1818
- for call in parsed_calls:
1819
- # Create object with attributes matching line 1543: tool_call.name and tool_call.args
1820
- tool_call_obj = type('ToolCall', (), {
1821
- 'id': call['id'],
1822
- 'name': call['function']['name'],
1823
- 'args': json.loads(call['function']['arguments']) if isinstance(call['function']['arguments'], str) else call['function']['arguments']
1824
- })()
1825
- tool_calls.append(tool_call_obj)
1826
 
1827
  # Check if done (no tool calls)
1828
  if not tool_calls:
 
1688
 
1689
  except Exception as groq_error:
1690
  # Check if it's a rate limit error (429)
1691
+ error_str = str(groq_error)
1692
+ if "rate_limit" in error_str.lower() or "429" in error_str:
1693
+ # Detailed rate limit error
1694
+ if "tokens per day" in error_str or "TPD" in error_str:
1695
+ print(f"❌ GROQ DAILY TOKEN LIMIT EXHAUSTED (100K tokens/day)")
1696
+ print(f" Your daily quota resets in a few hours")
1697
+ print(f" Error: {error_str[:300]}")
1698
+ elif "tokens per minute" in error_str or "TPM" in error_str:
1699
+ print(f" GROQ TOKENS PER MINUTE LIMIT (12K tokens/min)")
1700
+ print(f" Wait 60 seconds and try again")
1701
+ print(f" Error: {error_str[:300]}")
1702
+ else:
1703
+ print(f"❌ GROQ RATE LIMIT")
1704
+ print(f" Error: {error_str[:300]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1705
 
1706
+ raise ValueError(f"Groq rate limit exceeded. Please wait and try again.\n{error_str[:500]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1707
  else:
1708
  # Not a rate limit error, re-raise
1709
  raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1710
 
1711
  # Check if done (no tool calls)
1712
  if not tool_calls: