AgileAndy Claude commited on
Commit
43ccb47
Β·
1 Parent(s): 86e609e

Add exponential backoff retry logic for rate limiting

Browse files

- Added retry_with_backoff function with up to 60s delay and 5 attempts
- Applied retry logic to all OpenRouter LLM calls
- Applied retry logic to Tavily API calls
- Applied retry logic to Exa API calls
- Includes jitter to prevent thundering herd issues

πŸ€– Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show
  1. .DS_Store +0 -0
  2. speed_optimized_gaia_agent.py +59 -18
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
speed_optimized_gaia_agent.py CHANGED
@@ -14,6 +14,7 @@ import pandas as pd
14
  from datetime import datetime
15
  import time
16
  import hashlib
 
17
 
18
  # Core imports
19
  from ddgs import DDGS
@@ -58,10 +59,11 @@ class SpeedOptimizedGAIAAgent:
58
  - Reduced search overhead
59
  - Vector similarity for answer retrieval
60
  - Parallel processing optimizations
 
61
  """
62
 
63
  def __init__(self):
64
- print("πŸš€ Initializing Speed-Optimized GAIA Agent")
65
 
66
  # API setup
67
  self.openrouter_key = os.getenv("OPENROUTER_API_KEY")
@@ -86,7 +88,7 @@ class SpeedOptimizedGAIAAgent:
86
  }
87
  }
88
 
89
- print("πŸ€– Using 2 optimized models for speed")
90
 
91
  # Initialize vector similarity if available
92
  self.vector_cache = {}
@@ -112,6 +114,26 @@ class SpeedOptimizedGAIAAgent:
112
  base_url="https://openrouter.ai/api/v1"
113
  )
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  def setup_search_engines(self):
116
  """Setup search engines in priority order"""
117
  print("πŸ” Setting up optimized search engines...")
@@ -157,22 +179,39 @@ class SpeedOptimizedGAIAAgent:
157
  self.answer_cache[question] = answer
158
 
159
  def fast_search(self, query: str, max_results: int = 3) -> str:
160
- """Optimized search using only the fastest engines"""
161
  print(f"πŸ” Fast search: {query[:50]}...")
162
  all_results = []
163
 
164
- # Try Tavily first (usually fastest)
165
  if self.tavily:
166
  try:
167
- tavily_results = self.tavily.search(query[:350], max_results=2)
 
 
 
168
  if tavily_results and 'results' in tavily_results:
169
  for result in tavily_results['results']:
170
  all_results.append(f"Source: {result.get('title', '')}\n{result.get('content', '')}")
171
  print(f"πŸ“Š Tavily: {len(tavily_results.get('results', []))} results")
172
  except Exception as e:
173
- print(f"❌ Tavily error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- # If not enough results, try DuckDuckGo (skip Exa for speed)
176
  if len(all_results) < max_results:
177
  try:
178
  remaining = max_results - len(all_results)
@@ -204,7 +243,7 @@ class SpeedOptimizedGAIAAgent:
204
  return "standard"
205
 
206
  def get_fast_response(self, model_key: str, question: str, context: str = "") -> Dict[str, Any]:
207
- """Get response with optimized parameters for speed"""
208
  model = self.models[model_key]
209
 
210
  print(f"πŸ€– {model_key} processing...")
@@ -221,16 +260,18 @@ Respond with ONLY the answer, no explanation unless specifically requested."""
221
  user_prompt = f"Question: {question}\n\nContext: {context}\n\nAnswer:"
222
 
223
  try:
224
- response = model["client"].chat.completions.create(
225
- model=model["name"],
226
- messages=[
227
- {"role": "system", "content": system_prompt},
228
- {"role": "user", "content": user_prompt}
229
- ],
230
- max_tokens=100, # Reduced for speed
231
- temperature=0.1
232
- )
 
233
 
 
234
  answer = response.choices[0].message.content.strip()
235
 
236
  return {
@@ -240,7 +281,7 @@ Respond with ONLY the answer, no explanation unless specifically requested."""
240
  }
241
 
242
  except Exception as e:
243
- print(f"❌ {model_key} error: {e}")
244
  return {
245
  "model": model_key,
246
  "answer": f"Error: {e}",
 
14
  from datetime import datetime
15
  import time
16
  import hashlib
17
+ import random
18
 
19
  # Core imports
20
  from ddgs import DDGS
 
59
  - Reduced search overhead
60
  - Vector similarity for answer retrieval
61
  - Parallel processing optimizations
62
+ - Exponential backoff retry for rate limiting
63
  """
64
 
65
  def __init__(self):
66
+ print("πŸš€ Initializing Speed-Optimized GAIA Agent with Retry Logic")
67
 
68
  # API setup
69
  self.openrouter_key = os.getenv("OPENROUTER_API_KEY")
 
88
  }
89
  }
90
 
91
+ print("πŸ€– Using 2 optimized models with retry logic")
92
 
93
  # Initialize vector similarity if available
94
  self.vector_cache = {}
 
114
  base_url="https://openrouter.ai/api/v1"
115
  )
116
 
117
+ def retry_with_backoff(self, func, *args, max_attempts=5, max_delay=60, **kwargs):
118
+ """Exponential backoff retry with jitter"""
119
+ for attempt in range(max_attempts):
120
+ try:
121
+ return func(*args, **kwargs)
122
+ except Exception as e:
123
+ if attempt == max_attempts - 1:
124
+ print(f"❌ Final attempt failed: {e}")
125
+ raise e
126
+
127
+ # Calculate delay with exponential backoff + jitter
128
+ base_delay = min(2 ** attempt, max_delay // 4) # Cap base delay
129
+ jitter = random.uniform(0.1, 0.3) * base_delay
130
+ delay = min(base_delay + jitter, max_delay)
131
+
132
+ print(f"⏳ Rate limited (attempt {attempt + 1}/{max_attempts}), retrying in {delay:.1f}s...")
133
+ time.sleep(delay)
134
+
135
+ raise Exception("Max retry attempts exceeded")
136
+
137
  def setup_search_engines(self):
138
  """Setup search engines in priority order"""
139
  print("πŸ” Setting up optimized search engines...")
 
179
  self.answer_cache[question] = answer
180
 
181
  def fast_search(self, query: str, max_results: int = 3) -> str:
182
+ """Optimized search using only the fastest engines with retry logic"""
183
  print(f"πŸ” Fast search: {query[:50]}...")
184
  all_results = []
185
 
186
+ # Try Tavily first (usually fastest) with retry
187
  if self.tavily:
188
  try:
189
+ def tavily_search():
190
+ return self.tavily.search(query[:350], max_results=2)
191
+
192
+ tavily_results = self.retry_with_backoff(tavily_search)
193
  if tavily_results and 'results' in tavily_results:
194
  for result in tavily_results['results']:
195
  all_results.append(f"Source: {result.get('title', '')}\n{result.get('content', '')}")
196
  print(f"πŸ“Š Tavily: {len(tavily_results.get('results', []))} results")
197
  except Exception as e:
198
+ print(f"❌ Tavily error after retries: {e}")
199
+
200
+ # If not enough results, try Exa with retry
201
+ if self.exa and len(all_results) < max_results:
202
+ try:
203
+ def exa_search():
204
+ return self.exa.search_and_contents(query, num_results=max_results-len(all_results))
205
+
206
+ exa_results = self.retry_with_backoff(exa_search)
207
+ if exa_results and hasattr(exa_results, 'results'):
208
+ for result in exa_results.results:
209
+ all_results.append(f"Source: {getattr(result, 'title', '')}\n{getattr(result, 'text', '')}")
210
+ print(f"πŸ“Š Exa: {len(exa_results.results)} results")
211
+ except Exception as e:
212
+ print(f"❌ Exa error after retries: {e}")
213
 
214
+ # If still not enough results, try DuckDuckGo (no API limits)
215
  if len(all_results) < max_results:
216
  try:
217
  remaining = max_results - len(all_results)
 
243
  return "standard"
244
 
245
  def get_fast_response(self, model_key: str, question: str, context: str = "") -> Dict[str, Any]:
246
+ """Get response with optimized parameters for speed and retry logic"""
247
  model = self.models[model_key]
248
 
249
  print(f"πŸ€– {model_key} processing...")
 
260
  user_prompt = f"Question: {question}\n\nContext: {context}\n\nAnswer:"
261
 
262
  try:
263
+ def make_llm_call():
264
+ return model["client"].chat.completions.create(
265
+ model=model["name"],
266
+ messages=[
267
+ {"role": "system", "content": system_prompt},
268
+ {"role": "user", "content": user_prompt}
269
+ ],
270
+ max_tokens=100, # Reduced for speed
271
+ temperature=0.1
272
+ )
273
 
274
+ response = self.retry_with_backoff(make_llm_call)
275
  answer = response.choices[0].message.content.strip()
276
 
277
  return {
 
281
  }
282
 
283
  except Exception as e:
284
+ print(f"❌ {model_key} error after retries: {e}")
285
  return {
286
  "model": model_key,
287
  "answer": f"Error: {e}",