galbendavids commited on
Commit
2aea9f9
·
1 Parent(s): 126332e

Generation: use only OpenRouter when OPENROUTER_API_KEY set (no Gemini fallback)

Browse files
Files changed (1) hide show
  1. rag_engine.py +16 -6
rag_engine.py CHANGED
@@ -682,15 +682,25 @@ class RAGEngine:
682
  return None
683
 
684
  def _call_api_with_backoff(self, system_prompt: str, prompt: str, models: List[str]):
685
- """Try OpenRouter first (fast), then Gemini API with retry + backoff."""
686
  PIPELINE_LOG.info("_call_api_with_backoff START models=%s prompt_len=%d", models, len(prompt))
687
 
688
- # Try OpenRouter first for speed (when key is set)
689
- openrouter_result = self._call_openrouter(system_prompt, prompt)
690
- if openrouter_result:
691
- return openrouter_result
 
 
 
 
 
 
 
 
 
692
 
693
- # Fall back to Gemini
 
694
  max_attempts_per_model = 8
695
  max_rate_limit_wait_s = 180 # wait up to 3 minutes per attempt before retry
696
 
 
682
  return None
683
 
684
  def _call_api_with_backoff(self, system_prompt: str, prompt: str, models: List[str]):
685
+ """When OPENROUTER_API_KEY is set: use only OpenRouter (no Gemini). Else: use Gemini with backoff."""
686
  PIPELINE_LOG.info("_call_api_with_backoff START models=%s prompt_len=%d", models, len(prompt))
687
 
688
+ openrouter_key = self._get_openrouter_key()
689
+ if openrouter_key:
690
+ # Generation: use only OpenRouter when key is set (avoid Gemini rate limit)
691
+ PIPELINE_LOG.info("OpenRouter key present - using OpenRouter only for generation (no Gemini)")
692
+ for attempt in range(2):
693
+ result = self._call_openrouter(system_prompt, prompt, timeout_seconds=35)
694
+ if result:
695
+ return result
696
+ PIPELINE_LOG.warning("OpenRouter attempt %d failed, retrying...", attempt + 1)
697
+ return (
698
+ "❌ OpenRouter request failed after retries. Check OPENROUTER_API_KEY and OPENROUTER_MODEL in Space secrets. "
699
+ "See logs for details."
700
+ )
701
 
702
+ # No OpenRouter key: use Gemini
703
+ PIPELINE_LOG.info("OpenRouter key not set - using Gemini for generation")
704
  max_attempts_per_model = 8
705
  max_rate_limit_wait_s = 180 # wait up to 3 minutes per attempt before retry
706