Spaces:
Running
Running
Pulastya B
commited on
Commit
·
7c2ff18
1
Parent(s):
523d661
Remove Gemini fallback and improve rate limit error messages
Browse files- Removed automatic Gemini fallback (was causing more errors than helping)
- Added detailed rate limit detection:
* Daily token limit (TPD): 100K tokens/day
* Per-minute limit (TPM): 12K tokens/minute
* Request limit (RPM): 30 requests/minute
- Clear error messages explaining which limit was hit
- Raise ValueError immediately instead of trying fallback
Groq free tier daily limit exhausts quickly with testing. Users should wait for reset instead of cascading to Gemini which has even lower limits (5 RPM).
- src/orchestrator.py +15 -131
src/orchestrator.py
CHANGED
|
@@ -1688,141 +1688,25 @@ You are a DOER. Complete workflows based on user intent."""
|
|
| 1688 |
|
| 1689 |
except Exception as groq_error:
|
| 1690 |
# Check if it's a rate limit error (429)
|
| 1691 |
-
|
| 1692 |
-
|
| 1693 |
-
|
| 1694 |
-
|
| 1695 |
-
|
| 1696 |
-
|
| 1697 |
-
|
| 1698 |
-
|
| 1699 |
-
|
| 1700 |
-
|
| 1701 |
-
|
| 1702 |
-
|
| 1703 |
-
|
| 1704 |
-
|
| 1705 |
-
|
| 1706 |
-
# Safety settings
|
| 1707 |
-
safety_settings = [
|
| 1708 |
-
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
|
| 1709 |
-
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
|
| 1710 |
-
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
|
| 1711 |
-
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}
|
| 1712 |
-
]
|
| 1713 |
-
|
| 1714 |
-
self.gemini_model = genai.GenerativeModel(
|
| 1715 |
-
model_name=gemini_model_name,
|
| 1716 |
-
safety_settings=safety_settings
|
| 1717 |
-
)
|
| 1718 |
-
print(f" ✅ Gemini fallback initialized: {gemini_model_name}")
|
| 1719 |
-
|
| 1720 |
-
# Switch provider for this session
|
| 1721 |
-
self.provider = "gemini"
|
| 1722 |
-
self.use_compact_prompts = False # Gemini has large context
|
| 1723 |
-
gemini_chat = self.gemini_model.start_chat(history=[])
|
| 1724 |
-
print(f" 🔄 Now using Gemini for remaining workflow")
|
| 1725 |
|
| 1726 |
-
|
| 1727 |
-
try:
|
| 1728 |
-
if iteration == 1:
|
| 1729 |
-
# First iteration: send system + user message
|
| 1730 |
-
combined_message = f"{messages[0]['content']}\n\n{messages[1]['content']}"
|
| 1731 |
-
response = gemini_chat.send_message(combined_message)
|
| 1732 |
-
else:
|
| 1733 |
-
# Subsequent iterations: send tool results as plain text
|
| 1734 |
-
last_tool_msg = messages[-1]
|
| 1735 |
-
if last_tool_msg.get("role") == "tool":
|
| 1736 |
-
# Format tool result as text for Gemini
|
| 1737 |
-
result_message = f"Tool '{last_tool_msg['name']}' executed successfully.\n\nResult:\n{last_tool_msg['content']}\n\nWhat's the next step?"
|
| 1738 |
-
response = gemini_chat.send_message(result_message)
|
| 1739 |
-
else:
|
| 1740 |
-
# Fallback
|
| 1741 |
-
response = gemini_chat.send_message("Continue with the next step.")
|
| 1742 |
-
|
| 1743 |
-
self.api_calls_made += 1
|
| 1744 |
-
self.last_api_call_time = time.time()
|
| 1745 |
-
|
| 1746 |
-
# Extract tool calls from Gemini TEXT response
|
| 1747 |
-
tool_calls = []
|
| 1748 |
-
if response.candidates and response.candidates[0].content.parts:
|
| 1749 |
-
for part in response.candidates[0].content.parts:
|
| 1750 |
-
if hasattr(part, 'text') and part.text:
|
| 1751 |
-
text_response = part.text
|
| 1752 |
-
final_content = text_response
|
| 1753 |
-
|
| 1754 |
-
# Parse tool calls from text
|
| 1755 |
-
parsed_calls = self._parse_text_tool_calls(text_response)
|
| 1756 |
-
if parsed_calls:
|
| 1757 |
-
for call in parsed_calls:
|
| 1758 |
-
tool_call_obj = type('ToolCall', (), {
|
| 1759 |
-
'id': call['id'],
|
| 1760 |
-
'name': call['function']['name'],
|
| 1761 |
-
'args': json.loads(call['function']['arguments']) if isinstance(call['function']['arguments'], str) else call['function']['arguments']
|
| 1762 |
-
})()
|
| 1763 |
-
tool_calls.append(tool_call_obj)
|
| 1764 |
-
except Exception as gemini_error:
|
| 1765 |
-
# If Gemini also fails, log and continue with empty response
|
| 1766 |
-
print(f"⚠️ Gemini fallback also failed: {str(gemini_error)[:200]}")
|
| 1767 |
-
final_content = "Analysis interrupted due to API errors."
|
| 1768 |
-
tool_calls = []
|
| 1769 |
else:
|
| 1770 |
# Not a rate limit error, re-raise
|
| 1771 |
raise
|
| 1772 |
-
|
| 1773 |
-
elif self.provider == "gemini":
|
| 1774 |
-
# Send messages WITHOUT tools parameter (tools already configured on model)
|
| 1775 |
-
try:
|
| 1776 |
-
if iteration == 1:
|
| 1777 |
-
# First iteration: send system + user message
|
| 1778 |
-
combined_message = f"{messages[0]['content']}\n\n{messages[1]['content']}"
|
| 1779 |
-
response = gemini_chat.send_message(combined_message)
|
| 1780 |
-
else:
|
| 1781 |
-
# Subsequent iterations: send tool results as plain text
|
| 1782 |
-
last_tool_msg = messages[-1]
|
| 1783 |
-
if last_tool_msg.get("role") == "tool":
|
| 1784 |
-
# Format tool result as text for Gemini
|
| 1785 |
-
result_message = f"Tool '{last_tool_msg['name']}' executed successfully.\n\nResult:\n{last_tool_msg['content']}\n\nWhat's the next step?"
|
| 1786 |
-
response = gemini_chat.send_message(result_message)
|
| 1787 |
-
else:
|
| 1788 |
-
# Fallback
|
| 1789 |
-
response = gemini_chat.send_message("Continue with the next step.")
|
| 1790 |
-
except Exception as gemini_error:
|
| 1791 |
-
# Handle StopCandidateException (finish_reason: 12 = blocked/filtered)
|
| 1792 |
-
error_str = str(gemini_error)
|
| 1793 |
-
if "finish_reason" in error_str or "StopCandidateException" in str(type(gemini_error)):
|
| 1794 |
-
print(f"⚠️ Gemini response blocked (safety filter/content policy). Retrying with simplified prompt...")
|
| 1795 |
-
# Retry with a much shorter message
|
| 1796 |
-
simplified_msg = "Please provide the next step in data analysis using available tools."
|
| 1797 |
-
response = gemini_chat.send_message(simplified_msg)
|
| 1798 |
-
else:
|
| 1799 |
-
raise
|
| 1800 |
-
|
| 1801 |
-
self.api_calls_made += 1
|
| 1802 |
-
self.last_api_call_time = time.time()
|
| 1803 |
-
|
| 1804 |
-
# Extract tool calls from Gemini TEXT response (text-based tool calling)
|
| 1805 |
-
tool_calls = []
|
| 1806 |
-
final_content = None
|
| 1807 |
-
|
| 1808 |
-
if response.candidates and response.candidates[0].content.parts:
|
| 1809 |
-
for part in response.candidates[0].content.parts:
|
| 1810 |
-
if hasattr(part, 'text') and part.text:
|
| 1811 |
-
text_response = part.text
|
| 1812 |
-
final_content = text_response
|
| 1813 |
-
|
| 1814 |
-
# Parse tool calls from text using JSON blocks or function syntax
|
| 1815 |
-
parsed_calls = self._parse_text_tool_calls(text_response)
|
| 1816 |
-
if parsed_calls:
|
| 1817 |
-
# Convert to tool_call objects matching Gemini expected format
|
| 1818 |
-
for call in parsed_calls:
|
| 1819 |
-
# Create object with attributes matching line 1543: tool_call.name and tool_call.args
|
| 1820 |
-
tool_call_obj = type('ToolCall', (), {
|
| 1821 |
-
'id': call['id'],
|
| 1822 |
-
'name': call['function']['name'],
|
| 1823 |
-
'args': json.loads(call['function']['arguments']) if isinstance(call['function']['arguments'], str) else call['function']['arguments']
|
| 1824 |
-
})()
|
| 1825 |
-
tool_calls.append(tool_call_obj)
|
| 1826 |
|
| 1827 |
# Check if done (no tool calls)
|
| 1828 |
if not tool_calls:
|
|
|
|
| 1688 |
|
| 1689 |
except Exception as groq_error:
|
| 1690 |
# Check if it's a rate limit error (429)
|
| 1691 |
+
error_str = str(groq_error)
|
| 1692 |
+
if "rate_limit" in error_str.lower() or "429" in error_str:
|
| 1693 |
+
# Detailed rate limit error
|
| 1694 |
+
if "tokens per day" in error_str or "TPD" in error_str:
|
| 1695 |
+
print(f"❌ GROQ DAILY TOKEN LIMIT EXHAUSTED (100K tokens/day)")
|
| 1696 |
+
print(f" Your daily quota resets in a few hours")
|
| 1697 |
+
print(f" Error: {error_str[:300]}")
|
| 1698 |
+
elif "tokens per minute" in error_str or "TPM" in error_str:
|
| 1699 |
+
print(f"❌ GROQ TOKENS PER MINUTE LIMIT (12K tokens/min)")
|
| 1700 |
+
print(f" Wait 60 seconds and try again")
|
| 1701 |
+
print(f" Error: {error_str[:300]}")
|
| 1702 |
+
else:
|
| 1703 |
+
print(f"❌ GROQ RATE LIMIT")
|
| 1704 |
+
print(f" Error: {error_str[:300]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1705 |
|
| 1706 |
+
raise ValueError(f"Groq rate limit exceeded. Please wait and try again.\n{error_str[:500]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1707 |
else:
|
| 1708 |
# Not a rate limit error, re-raise
|
| 1709 |
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1710 |
|
| 1711 |
# Check if done (no tool calls)
|
| 1712 |
if not tool_calls:
|