Spaces:
Sleeping
Sleeping
Jose-Maria Segui commited on
Commit ·
d161c86
1
Parent(s): 4c8ff37
Switch to llama-3.3-70b-versatile and add rate limit handling
Browse files
agent.py
CHANGED
|
@@ -742,9 +742,9 @@ def build_graph():
|
|
| 742 |
"""Build the graph"""
|
| 743 |
|
| 744 |
# Use Groq (fast, reliable, free tier)
|
| 745 |
-
# Model:
|
| 746 |
llm = ChatGroq(
|
| 747 |
-
model="
|
| 748 |
temperature=0,
|
| 749 |
api_key=os.environ.get("GROQ_API_KEY")
|
| 750 |
)
|
|
@@ -755,26 +755,34 @@ def build_graph():
|
|
| 755 |
# Node
|
| 756 |
def assistant(state: MessagesState):
|
| 757 |
"""Assistant node"""
|
|
|
|
| 758 |
messages = state["messages"]
|
| 759 |
# Ensure system prompt is first
|
| 760 |
if not messages or not isinstance(messages[0], SystemMessage):
|
| 761 |
messages = [sys_msg] + messages
|
| 762 |
|
| 763 |
-
# Retry mechanism for 504
|
| 764 |
-
max_retries =
|
| 765 |
for attempt in range(max_retries):
|
| 766 |
try:
|
| 767 |
response = llm_with_tools.invoke(messages)
|
| 768 |
return {"messages": [response]}
|
| 769 |
except Exception as e:
|
| 770 |
error_str = str(e)
|
| 771 |
-
|
|
|
|
| 772 |
if attempt < max_retries - 1:
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
time.sleep(
|
| 776 |
continue
|
| 777 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 778 |
return {"messages": [HumanMessage(content=f"Error communicating with LLM: {e}")]}
|
| 779 |
|
| 780 |
def retriever(state: MessagesState):
|
|
|
|
| 742 |
"""Build the graph"""
|
| 743 |
|
| 744 |
# Use Groq (fast, reliable, free tier)
|
| 745 |
+
# Model: llama-3.3-70b-versatile has higher rate limits than qwen
|
| 746 |
llm = ChatGroq(
|
| 747 |
+
model="llama-3.3-70b-versatile",
|
| 748 |
temperature=0,
|
| 749 |
api_key=os.environ.get("GROQ_API_KEY")
|
| 750 |
)
|
|
|
|
| 755 |
# Node
|
| 756 |
def assistant(state: MessagesState):
|
| 757 |
"""Assistant node"""
|
| 758 |
+
import time
|
| 759 |
messages = state["messages"]
|
| 760 |
# Ensure system prompt is first
|
| 761 |
if not messages or not isinstance(messages[0], SystemMessage):
|
| 762 |
messages = [sys_msg] + messages
|
| 763 |
|
| 764 |
+
# Retry mechanism for errors (504, 429 rate limit, etc)
|
| 765 |
+
max_retries = 5
|
| 766 |
for attempt in range(max_retries):
|
| 767 |
try:
|
| 768 |
response = llm_with_tools.invoke(messages)
|
| 769 |
return {"messages": [response]}
|
| 770 |
except Exception as e:
|
| 771 |
error_str = str(e)
|
| 772 |
+
# Handle rate limits with longer waits
|
| 773 |
+
if "429" in error_str or "rate_limit" in error_str.lower():
|
| 774 |
if attempt < max_retries - 1:
|
| 775 |
+
wait_time = 30 * (attempt + 1) # 30s, 60s, 90s...
|
| 776 |
+
print(f"⚠️ Rate limit hit (Attempt {attempt+1}/{max_retries}). Waiting {wait_time}s...")
|
| 777 |
+
time.sleep(wait_time)
|
| 778 |
continue
|
| 779 |
+
# Handle server errors
|
| 780 |
+
elif "504" in error_str or "Gateway Time-out" in error_str or "500" in error_str:
|
| 781 |
+
if attempt < max_retries - 1:
|
| 782 |
+
print(f"⚠️ Server error (Attempt {attempt+1}/{max_retries}). Retrying in 5s...")
|
| 783 |
+
time.sleep(5)
|
| 784 |
+
continue
|
| 785 |
+
# If we can't recover, return the error
|
| 786 |
return {"messages": [HumanMessage(content=f"Error communicating with LLM: {e}")]}
|
| 787 |
|
| 788 |
def retriever(state: MessagesState):
|