Jose-Maria Segui commited on
Commit
d161c86
·
1 Parent(s): 4c8ff37

Switch to llama-3.3-70b-versatile and add rate limit handling

Browse files
Files changed (1) hide show
  1. agent.py +17 -9
agent.py CHANGED
@@ -742,9 +742,9 @@ def build_graph():
742
  """Build the graph"""
743
 
744
  # Use Groq (fast, reliable, free tier)
745
- # Model: qwen/qwen3-32b (same as reference implementation)
746
  llm = ChatGroq(
747
- model="qwen/qwen3-32b",
748
  temperature=0,
749
  api_key=os.environ.get("GROQ_API_KEY")
750
  )
@@ -755,26 +755,34 @@ def build_graph():
755
  # Node
756
  def assistant(state: MessagesState):
757
  """Assistant node"""
 
758
  messages = state["messages"]
759
  # Ensure system prompt is first
760
  if not messages or not isinstance(messages[0], SystemMessage):
761
  messages = [sys_msg] + messages
762
 
763
- # Retry mechanism for 504 errors
764
- max_retries = 3
765
  for attempt in range(max_retries):
766
  try:
767
  response = llm_with_tools.invoke(messages)
768
  return {"messages": [response]}
769
  except Exception as e:
770
  error_str = str(e)
771
- if "504" in error_str or "Gateway Time-out" in error_str or "500" in error_str:
 
772
  if attempt < max_retries - 1:
773
- print(f"⚠️ LLM Timeout (Attempt {attempt+1}/{max_retries}). Retrying...")
774
- import time
775
- time.sleep(2)
776
  continue
777
- # If it's not a timeout or we ran out of retries, raise or return error
 
 
 
 
 
 
778
  return {"messages": [HumanMessage(content=f"Error communicating with LLM: {e}")]}
779
 
780
  def retriever(state: MessagesState):
 
742
  """Build the graph"""
743
 
744
  # Use Groq (fast, reliable, free tier)
745
+ # Model: llama-3.3-70b-versatile has higher rate limits than qwen
746
  llm = ChatGroq(
747
+ model="llama-3.3-70b-versatile",
748
  temperature=0,
749
  api_key=os.environ.get("GROQ_API_KEY")
750
  )
 
755
  # Node
756
  def assistant(state: MessagesState):
757
  """Assistant node"""
758
+ import time
759
  messages = state["messages"]
760
  # Ensure system prompt is first
761
  if not messages or not isinstance(messages[0], SystemMessage):
762
  messages = [sys_msg] + messages
763
 
764
+ # Retry mechanism for errors (504, 429 rate limit, etc)
765
+ max_retries = 5
766
  for attempt in range(max_retries):
767
  try:
768
  response = llm_with_tools.invoke(messages)
769
  return {"messages": [response]}
770
  except Exception as e:
771
  error_str = str(e)
772
+ # Handle rate limits with longer waits
773
+ if "429" in error_str or "rate_limit" in error_str.lower():
774
  if attempt < max_retries - 1:
775
+ wait_time = 30 * (attempt + 1) # 30s, 60s, 90s...
776
+ print(f"⚠️ Rate limit hit (Attempt {attempt+1}/{max_retries}). Waiting {wait_time}s...")
777
+ time.sleep(wait_time)
778
  continue
779
+ # Handle server errors
780
+ elif "504" in error_str or "Gateway Time-out" in error_str or "500" in error_str:
781
+ if attempt < max_retries - 1:
782
+ print(f"⚠️ Server error (Attempt {attempt+1}/{max_retries}). Retrying in 5s...")
783
+ time.sleep(5)
784
+ continue
785
+ # If we can't recover, return the error
786
  return {"messages": [HumanMessage(content=f"Error communicating with LLM: {e}")]}
787
 
788
  def retriever(state: MessagesState):