Spaces:

vn6295337
/

Instant-SWOT-Agent

Sleeping

vn6295337 Claude Opus 4.5 commited on Jan 12

Commit

de66f3b

1 Parent(s): 379f970

Fix: Add rate limit handling with retry and delays

1. LLM client: Add exponential backoff retry on 429 errors
- MAX_RETRIES=3, delays of 2s, 4s, 8s
- New _request_with_retry() helper method

2. Critic: Add 2s delay before LLM call
- Avoids hitting rate limit after Analyzer's call

3. Analyzer: Add 2s delay before revision LLM call
- Avoids hitting rate limit after Critic's call

This prevents 429 errors from cascading through the workflow.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (3) hide show

src/llm_client.py +37 -7
src/nodes/analyzer.py +6 -0
src/nodes/critic.py +5 -0

src/llm_client.py CHANGED Viewed

@@ -6,8 +6,14 @@ Adopts pattern from Enterprise-AI-Gateway for resilient LLM access.
 import os
 import time
 import requests
 from typing import Optional, Tuple
 class LLMClient:
     """LLM client with automatic provider fallback."""
@@ -93,8 +99,35 @@ class LLMClient:
         return None, None, f"All LLM providers failed: {'; '.join(errors)}", providers_failed
     def _call_provider(self, provider: dict, prompt: str, temperature: float, max_tokens: int) -> Tuple[Optional[str], Optional[str]]:
-        """Call a specific LLM provider."""
         headers = {"Content-Type": "application/json"}
         if provider["name"] == "groq":
@@ -105,8 +138,7 @@ class LLMClient:
                 "max_tokens": max_tokens,
                 "temperature": temperature,
             }
-            response = requests.post(provider["url"], headers=headers, json=payload, timeout=30)
-            response.raise_for_status()
             data = response.json()
             if data and "choices" in data and data["choices"]:
                 return data["choices"][0]["message"]["content"], None
@@ -121,8 +153,7 @@ class LLMClient:
                     "maxOutputTokens": max_tokens,
                 }
             }
-            response = requests.post(url, headers=headers, json=payload, timeout=30)
-            response.raise_for_status()
             data = response.json()
             if data and "candidates" in data and data["candidates"]:
                 first_candidate = data["candidates"][0]
@@ -142,8 +173,7 @@ class LLMClient:
                 "max_tokens": max_tokens,
                 "temperature": temperature,
             }
-            response = requests.post(provider["url"], headers=headers, json=payload, timeout=30)
-            response.raise_for_status()
             data = response.json()
             if data and "choices" in data and data["choices"]:
                 return data["choices"][0]["message"]["content"], None

 import os
 import time
 import requests
+from requests.exceptions import HTTPError
 from typing import Optional, Tuple
+# Retry configuration for rate limits
+MAX_RETRIES = 3
+INITIAL_BACKOFF = 2  # seconds
 class LLMClient:
     """LLM client with automatic provider fallback."""
         return None, None, f"All LLM providers failed: {'; '.join(errors)}", providers_failed
+    def _request_with_retry(self, url: str, headers: dict, payload: dict, provider_name: str) -> requests.Response:
+        """Make HTTP request with exponential backoff retry on 429 rate limit."""
+        last_error = None
+        for attempt in range(MAX_RETRIES):
+            try:
+                response = requests.post(url, headers=headers, json=payload, timeout=30)
+                response.raise_for_status()
+                return response
+            except HTTPError as e:
+                if e.response is not None and e.response.status_code == 429:
+                    last_error = e
+                    if attempt < MAX_RETRIES - 1:
+                        backoff = INITIAL_BACKOFF * (2 ** attempt)  # 2s, 4s, 8s
+                        print(f"Rate limited by {provider_name}, retrying in {backoff}s (attempt {attempt + 1}/{MAX_RETRIES})...")
+                        time.sleep(backoff)
+                        continue
+                # Re-raise non-429 errors or final 429
+                raise
+            except Exception:
+                raise
+        # Should not reach here, but just in case
+        if last_error:
+            raise last_error
+        raise Exception(f"Request failed after {MAX_RETRIES} attempts")
     def _call_provider(self, provider: dict, prompt: str, temperature: float, max_tokens: int) -> Tuple[Optional[str], Optional[str]]:
+        """Call a specific LLM provider with retry on rate limit."""
         headers = {"Content-Type": "application/json"}
         if provider["name"] == "groq":
                 "max_tokens": max_tokens,
                 "temperature": temperature,
             }
+            response = self._request_with_retry(provider["url"], headers, payload, provider["name"])
             data = response.json()
             if data and "choices" in data and data["choices"]:
                 return data["choices"][0]["message"]["content"], None
                     "maxOutputTokens": max_tokens,
                 }
             }
+            response = self._request_with_retry(url, headers, payload, provider["name"])
             data = response.json()
             if data and "candidates" in data and data["candidates"]:
                 first_candidate = data["candidates"][0]
                 "max_tokens": max_tokens,
                 "temperature": temperature,
             }
+            response = self._request_with_retry(provider["url"], headers, payload, provider["name"])
             data = response.json()
             if data and "choices" in data and data["choices"]:
                 return data["choices"][0]["message"]["content"], None

src/nodes/analyzer.py CHANGED Viewed

@@ -1019,6 +1019,12 @@ def analyzer_node(state, workflow_id=None, progress_store=None):
         prompt = _build_analyzer_prompt(company, ticker, formatted_data, is_financial)
         current_revision = 0
     start_time = time.time()
     response, provider, error, providers_failed = llm.query(prompt, temperature=0)
     elapsed = time.time() - start_time

         prompt = _build_analyzer_prompt(company, ticker, formatted_data, is_financial)
         current_revision = 0
+    # In revision mode, add delay before LLM call to avoid rate limits
+    # (Critic just called LLM, so we need to wait)
+    if is_revision:
+        print("Waiting 2s before revision LLM call (rate limit buffer)...")
+        time.sleep(2)
     start_time = time.time()
     response, provider, error, providers_failed = llm.query(prompt, temperature=0)
     elapsed = time.time() - start_time

src/nodes/critic.py CHANGED Viewed

@@ -320,6 +320,11 @@ def critic_node(state, workflow_id=None, progress_store=None):
     # Run LLM evaluation
     print(f"Running LLM evaluation (iteration {iteration})...")
     llm = get_llm_client()
     _add_activity_log(workflow_id, progress_store, "critic", "Calling LLM for quality evaluation...")
     start_time = time.time()

     # Run LLM evaluation
     print(f"Running LLM evaluation (iteration {iteration})...")
     llm = get_llm_client()
+    # Add delay before LLM call to avoid rate limits (Analyzer just called LLM)
+    print("Waiting 2s before Critic LLM call (rate limit buffer)...")
+    time.sleep(2)
     _add_activity_log(workflow_id, progress_store, "critic", "Calling LLM for quality evaluation...")
     start_time = time.time()