HuggingFace_Agent_Cert

Sleeping

AgileAndy Claude commited on Jul 13, 2025

Commit

43ccb47

1 Parent(s): 86e609e

Add exponential backoff retry logic for rate limiting

- Added retry_with_backoff function with up to 60s delay and 5 attempts
- Applied retry logic to all OpenRouter LLM calls
- Applied retry logic to Tavily API calls
- Applied retry logic to Exa API calls
- Includes jitter to prevent thundering herd issues

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show

.DS_Store +0 -0
speed_optimized_gaia_agent.py +59 -18

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

speed_optimized_gaia_agent.py CHANGED Viewed

@@ -14,6 +14,7 @@ import pandas as pd
 from datetime import datetime
 import time
 import hashlib
 # Core imports
 from ddgs import DDGS
@@ -58,10 +59,11 @@ class SpeedOptimizedGAIAAgent:
     - Reduced search overhead
     - Vector similarity for answer retrieval
     - Parallel processing optimizations
     """
     def __init__(self):
-        print("🚀 Initializing Speed-Optimized GAIA Agent")
         # API setup
         self.openrouter_key = os.getenv("OPENROUTER_API_KEY")
@@ -86,7 +88,7 @@ class SpeedOptimizedGAIAAgent:
             }
         }
-        print("🤖 Using 2 optimized models for speed")
         # Initialize vector similarity if available
         self.vector_cache = {}
@@ -112,6 +114,26 @@ class SpeedOptimizedGAIAAgent:
             base_url="https://openrouter.ai/api/v1"
         )
     def setup_search_engines(self):
         """Setup search engines in priority order"""
         print("🔍 Setting up optimized search engines...")
@@ -157,22 +179,39 @@ class SpeedOptimizedGAIAAgent:
             self.answer_cache[question] = answer
     def fast_search(self, query: str, max_results: int = 3) -> str:
-        """Optimized search using only the fastest engines"""
         print(f"🔍 Fast search: {query[:50]}...")
         all_results = []
-        # Try Tavily first (usually fastest)
         if self.tavily:
             try:
-                tavily_results = self.tavily.search(query[:350], max_results=2)
                 if tavily_results and 'results' in tavily_results:
                     for result in tavily_results['results']:
                         all_results.append(f"Source: {result.get('title', '')}\n{result.get('content', '')}")
                 print(f"📊 Tavily: {len(tavily_results.get('results', []))} results")
             except Exception as e:
-                print(f"❌ Tavily error: {e}")
-        # If not enough results, try DuckDuckGo (skip Exa for speed)
         if len(all_results) < max_results:
             try:
                 remaining = max_results - len(all_results)
@@ -204,7 +243,7 @@ class SpeedOptimizedGAIAAgent:
         return "standard"
     def get_fast_response(self, model_key: str, question: str, context: str = "") -> Dict[str, Any]:
-        """Get response with optimized parameters for speed"""
         model = self.models[model_key]
         print(f"🤖 {model_key} processing...")
@@ -221,16 +260,18 @@ Respond with ONLY the answer, no explanation unless specifically requested."""
         user_prompt = f"Question: {question}\n\nContext: {context}\n\nAnswer:"
         try:
-            response = model["client"].chat.completions.create(
-                model=model["name"],
-                messages=[
-                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": user_prompt}
-                ],
-                max_tokens=100,  # Reduced for speed
-                temperature=0.1
-            )
             answer = response.choices[0].message.content.strip()
             return {
@@ -240,7 +281,7 @@ Respond with ONLY the answer, no explanation unless specifically requested."""
             }
         except Exception as e:
-            print(f"❌ {model_key} error: {e}")
             return {
                 "model": model_key,
                 "answer": f"Error: {e}",

 from datetime import datetime
 import time
 import hashlib
+import random
 # Core imports
 from ddgs import DDGS
     - Reduced search overhead
     - Vector similarity for answer retrieval
     - Parallel processing optimizations
+    - Exponential backoff retry for rate limiting
     """
     def __init__(self):
+        print("🚀 Initializing Speed-Optimized GAIA Agent with Retry Logic")
         # API setup
         self.openrouter_key = os.getenv("OPENROUTER_API_KEY")
             }
         }
+        print("🤖 Using 2 optimized models with retry logic")
         # Initialize vector similarity if available
         self.vector_cache = {}
             base_url="https://openrouter.ai/api/v1"
         )
+    def retry_with_backoff(self, func, *args, max_attempts=5, max_delay=60, **kwargs):
+        """Exponential backoff retry with jitter"""
+        for attempt in range(max_attempts):
+            try:
+                return func(*args, **kwargs)
+            except Exception as e:
+                if attempt == max_attempts - 1:
+                    print(f"❌ Final attempt failed: {e}")
+                    raise e
+                # Calculate delay with exponential backoff + jitter
+                base_delay = min(2 ** attempt, max_delay // 4)  # Cap base delay
+                jitter = random.uniform(0.1, 0.3) * base_delay
+                delay = min(base_delay + jitter, max_delay)
+                print(f"⏳ Rate limited (attempt {attempt + 1}/{max_attempts}), retrying in {delay:.1f}s...")
+                time.sleep(delay)
+        raise Exception("Max retry attempts exceeded")
     def setup_search_engines(self):
         """Setup search engines in priority order"""
         print("🔍 Setting up optimized search engines...")
             self.answer_cache[question] = answer
     def fast_search(self, query: str, max_results: int = 3) -> str:
+        """Optimized search using only the fastest engines with retry logic"""
         print(f"🔍 Fast search: {query[:50]}...")
         all_results = []
+        # Try Tavily first (usually fastest) with retry
         if self.tavily:
             try:
+                def tavily_search():
+                    return self.tavily.search(query[:350], max_results=2)
+                tavily_results = self.retry_with_backoff(tavily_search)
                 if tavily_results and 'results' in tavily_results:
                     for result in tavily_results['results']:
                         all_results.append(f"Source: {result.get('title', '')}\n{result.get('content', '')}")
                 print(f"📊 Tavily: {len(tavily_results.get('results', []))} results")
             except Exception as e:
+                print(f"❌ Tavily error after retries: {e}")
+        # If not enough results, try Exa with retry
+        if self.exa and len(all_results) < max_results:
+            try:
+                def exa_search():
+                    return self.exa.search_and_contents(query, num_results=max_results-len(all_results))
+                exa_results = self.retry_with_backoff(exa_search)
+                if exa_results and hasattr(exa_results, 'results'):
+                    for result in exa_results.results:
+                        all_results.append(f"Source: {getattr(result, 'title', '')}\n{getattr(result, 'text', '')}")
+                print(f"📊 Exa: {len(exa_results.results)} results")
+            except Exception as e:
+                print(f"❌ Exa error after retries: {e}")
+        # If still not enough results, try DuckDuckGo (no API limits)
         if len(all_results) < max_results:
             try:
                 remaining = max_results - len(all_results)
         return "standard"
     def get_fast_response(self, model_key: str, question: str, context: str = "") -> Dict[str, Any]:
+        """Get response with optimized parameters for speed and retry logic"""
         model = self.models[model_key]
         print(f"🤖 {model_key} processing...")
         user_prompt = f"Question: {question}\n\nContext: {context}\n\nAnswer:"
         try:
+            def make_llm_call():
+                return model["client"].chat.completions.create(
+                    model=model["name"],
+                    messages=[
+                        {"role": "system", "content": system_prompt},
+                        {"role": "user", "content": user_prompt}
+                    ],
+                    max_tokens=100,  # Reduced for speed
+                    temperature=0.1
+                )
+            response = self.retry_with_backoff(make_llm_call)
             answer = response.choices[0].message.content.strip()
             return {
             }
         except Exception as e:
+            print(f"❌ {model_key} error after retries: {e}")
             return {
                 "model": model_key,
                 "answer": f"Error: {e}",