Spaces:
Sleeping
Sleeping
Add exponential backoff retry logic for rate limiting
Browse files- Added retry_with_backoff function with up to 60s delay and 5 attempts
- Applied retry logic to all OpenRouter LLM calls
- Applied retry logic to Tavily API calls
- Applied retry logic to Exa API calls
- Includes jitter to prevent thundering herd issues
π€ Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
- .DS_Store +0 -0
- speed_optimized_gaia_agent.py +59 -18
.DS_Store
CHANGED
|
Binary files a/.DS_Store and b/.DS_Store differ
|
|
|
speed_optimized_gaia_agent.py
CHANGED
|
@@ -14,6 +14,7 @@ import pandas as pd
|
|
| 14 |
from datetime import datetime
|
| 15 |
import time
|
| 16 |
import hashlib
|
|
|
|
| 17 |
|
| 18 |
# Core imports
|
| 19 |
from ddgs import DDGS
|
|
@@ -58,10 +59,11 @@ class SpeedOptimizedGAIAAgent:
|
|
| 58 |
- Reduced search overhead
|
| 59 |
- Vector similarity for answer retrieval
|
| 60 |
- Parallel processing optimizations
|
|
|
|
| 61 |
"""
|
| 62 |
|
| 63 |
def __init__(self):
|
| 64 |
-
print("π Initializing Speed-Optimized GAIA Agent")
|
| 65 |
|
| 66 |
# API setup
|
| 67 |
self.openrouter_key = os.getenv("OPENROUTER_API_KEY")
|
|
@@ -86,7 +88,7 @@ class SpeedOptimizedGAIAAgent:
|
|
| 86 |
}
|
| 87 |
}
|
| 88 |
|
| 89 |
-
print("π€ Using 2 optimized models
|
| 90 |
|
| 91 |
# Initialize vector similarity if available
|
| 92 |
self.vector_cache = {}
|
|
@@ -112,6 +114,26 @@ class SpeedOptimizedGAIAAgent:
|
|
| 112 |
base_url="https://openrouter.ai/api/v1"
|
| 113 |
)
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
def setup_search_engines(self):
|
| 116 |
"""Setup search engines in priority order"""
|
| 117 |
print("π Setting up optimized search engines...")
|
|
@@ -157,22 +179,39 @@ class SpeedOptimizedGAIAAgent:
|
|
| 157 |
self.answer_cache[question] = answer
|
| 158 |
|
| 159 |
def fast_search(self, query: str, max_results: int = 3) -> str:
|
| 160 |
-
"""Optimized search using only the fastest engines"""
|
| 161 |
print(f"π Fast search: {query[:50]}...")
|
| 162 |
all_results = []
|
| 163 |
|
| 164 |
-
# Try Tavily first (usually fastest)
|
| 165 |
if self.tavily:
|
| 166 |
try:
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
| 168 |
if tavily_results and 'results' in tavily_results:
|
| 169 |
for result in tavily_results['results']:
|
| 170 |
all_results.append(f"Source: {result.get('title', '')}\n{result.get('content', '')}")
|
| 171 |
print(f"π Tavily: {len(tavily_results.get('results', []))} results")
|
| 172 |
except Exception as e:
|
| 173 |
-
print(f"β Tavily error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
-
# If not enough results, try DuckDuckGo (
|
| 176 |
if len(all_results) < max_results:
|
| 177 |
try:
|
| 178 |
remaining = max_results - len(all_results)
|
|
@@ -204,7 +243,7 @@ class SpeedOptimizedGAIAAgent:
|
|
| 204 |
return "standard"
|
| 205 |
|
| 206 |
def get_fast_response(self, model_key: str, question: str, context: str = "") -> Dict[str, Any]:
|
| 207 |
-
"""Get response with optimized parameters for speed"""
|
| 208 |
model = self.models[model_key]
|
| 209 |
|
| 210 |
print(f"π€ {model_key} processing...")
|
|
@@ -221,16 +260,18 @@ Respond with ONLY the answer, no explanation unless specifically requested."""
|
|
| 221 |
user_prompt = f"Question: {question}\n\nContext: {context}\n\nAnswer:"
|
| 222 |
|
| 223 |
try:
|
| 224 |
-
|
| 225 |
-
model
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
|
|
|
| 233 |
|
|
|
|
| 234 |
answer = response.choices[0].message.content.strip()
|
| 235 |
|
| 236 |
return {
|
|
@@ -240,7 +281,7 @@ Respond with ONLY the answer, no explanation unless specifically requested."""
|
|
| 240 |
}
|
| 241 |
|
| 242 |
except Exception as e:
|
| 243 |
-
print(f"β {model_key} error: {e}")
|
| 244 |
return {
|
| 245 |
"model": model_key,
|
| 246 |
"answer": f"Error: {e}",
|
|
|
|
| 14 |
from datetime import datetime
|
| 15 |
import time
|
| 16 |
import hashlib
|
| 17 |
+
import random
|
| 18 |
|
| 19 |
# Core imports
|
| 20 |
from ddgs import DDGS
|
|
|
|
| 59 |
- Reduced search overhead
|
| 60 |
- Vector similarity for answer retrieval
|
| 61 |
- Parallel processing optimizations
|
| 62 |
+
- Exponential backoff retry for rate limiting
|
| 63 |
"""
|
| 64 |
|
| 65 |
def __init__(self):
|
| 66 |
+
print("π Initializing Speed-Optimized GAIA Agent with Retry Logic")
|
| 67 |
|
| 68 |
# API setup
|
| 69 |
self.openrouter_key = os.getenv("OPENROUTER_API_KEY")
|
|
|
|
| 88 |
}
|
| 89 |
}
|
| 90 |
|
| 91 |
+
print("π€ Using 2 optimized models with retry logic")
|
| 92 |
|
| 93 |
# Initialize vector similarity if available
|
| 94 |
self.vector_cache = {}
|
|
|
|
| 114 |
base_url="https://openrouter.ai/api/v1"
|
| 115 |
)
|
| 116 |
|
| 117 |
+
def retry_with_backoff(self, func, *args, max_attempts=5, max_delay=60, **kwargs):
|
| 118 |
+
"""Exponential backoff retry with jitter"""
|
| 119 |
+
for attempt in range(max_attempts):
|
| 120 |
+
try:
|
| 121 |
+
return func(*args, **kwargs)
|
| 122 |
+
except Exception as e:
|
| 123 |
+
if attempt == max_attempts - 1:
|
| 124 |
+
print(f"β Final attempt failed: {e}")
|
| 125 |
+
raise e
|
| 126 |
+
|
| 127 |
+
# Calculate delay with exponential backoff + jitter
|
| 128 |
+
base_delay = min(2 ** attempt, max_delay // 4) # Cap base delay
|
| 129 |
+
jitter = random.uniform(0.1, 0.3) * base_delay
|
| 130 |
+
delay = min(base_delay + jitter, max_delay)
|
| 131 |
+
|
| 132 |
+
print(f"β³ Rate limited (attempt {attempt + 1}/{max_attempts}), retrying in {delay:.1f}s...")
|
| 133 |
+
time.sleep(delay)
|
| 134 |
+
|
| 135 |
+
raise Exception("Max retry attempts exceeded")
|
| 136 |
+
|
| 137 |
def setup_search_engines(self):
|
| 138 |
"""Setup search engines in priority order"""
|
| 139 |
print("π Setting up optimized search engines...")
|
|
|
|
| 179 |
self.answer_cache[question] = answer
|
| 180 |
|
| 181 |
def fast_search(self, query: str, max_results: int = 3) -> str:
|
| 182 |
+
"""Optimized search using only the fastest engines with retry logic"""
|
| 183 |
print(f"π Fast search: {query[:50]}...")
|
| 184 |
all_results = []
|
| 185 |
|
| 186 |
+
# Try Tavily first (usually fastest) with retry
|
| 187 |
if self.tavily:
|
| 188 |
try:
|
| 189 |
+
def tavily_search():
|
| 190 |
+
return self.tavily.search(query[:350], max_results=2)
|
| 191 |
+
|
| 192 |
+
tavily_results = self.retry_with_backoff(tavily_search)
|
| 193 |
if tavily_results and 'results' in tavily_results:
|
| 194 |
for result in tavily_results['results']:
|
| 195 |
all_results.append(f"Source: {result.get('title', '')}\n{result.get('content', '')}")
|
| 196 |
print(f"π Tavily: {len(tavily_results.get('results', []))} results")
|
| 197 |
except Exception as e:
|
| 198 |
+
print(f"β Tavily error after retries: {e}")
|
| 199 |
+
|
| 200 |
+
# If not enough results, try Exa with retry
|
| 201 |
+
if self.exa and len(all_results) < max_results:
|
| 202 |
+
try:
|
| 203 |
+
def exa_search():
|
| 204 |
+
return self.exa.search_and_contents(query, num_results=max_results-len(all_results))
|
| 205 |
+
|
| 206 |
+
exa_results = self.retry_with_backoff(exa_search)
|
| 207 |
+
if exa_results and hasattr(exa_results, 'results'):
|
| 208 |
+
for result in exa_results.results:
|
| 209 |
+
all_results.append(f"Source: {getattr(result, 'title', '')}\n{getattr(result, 'text', '')}")
|
| 210 |
+
print(f"π Exa: {len(exa_results.results)} results")
|
| 211 |
+
except Exception as e:
|
| 212 |
+
print(f"β Exa error after retries: {e}")
|
| 213 |
|
| 214 |
+
# If still not enough results, try DuckDuckGo (no API limits)
|
| 215 |
if len(all_results) < max_results:
|
| 216 |
try:
|
| 217 |
remaining = max_results - len(all_results)
|
|
|
|
| 243 |
return "standard"
|
| 244 |
|
| 245 |
def get_fast_response(self, model_key: str, question: str, context: str = "") -> Dict[str, Any]:
|
| 246 |
+
"""Get response with optimized parameters for speed and retry logic"""
|
| 247 |
model = self.models[model_key]
|
| 248 |
|
| 249 |
print(f"π€ {model_key} processing...")
|
|
|
|
| 260 |
user_prompt = f"Question: {question}\n\nContext: {context}\n\nAnswer:"
|
| 261 |
|
| 262 |
try:
|
| 263 |
+
def make_llm_call():
|
| 264 |
+
return model["client"].chat.completions.create(
|
| 265 |
+
model=model["name"],
|
| 266 |
+
messages=[
|
| 267 |
+
{"role": "system", "content": system_prompt},
|
| 268 |
+
{"role": "user", "content": user_prompt}
|
| 269 |
+
],
|
| 270 |
+
max_tokens=100, # Reduced for speed
|
| 271 |
+
temperature=0.1
|
| 272 |
+
)
|
| 273 |
|
| 274 |
+
response = self.retry_with_backoff(make_llm_call)
|
| 275 |
answer = response.choices[0].message.content.strip()
|
| 276 |
|
| 277 |
return {
|
|
|
|
| 281 |
}
|
| 282 |
|
| 283 |
except Exception as e:
|
| 284 |
+
print(f"β {model_key} error after retries: {e}")
|
| 285 |
return {
|
| 286 |
"model": model_key,
|
| 287 |
"answer": f"Error: {e}",
|