Spaces:
Sleeping
Sleeping
Commit ·
2aea9f9
1
Parent(s): 126332e
Generation: use only OpenRouter when OPENROUTER_API_KEY set (no Gemini fallback)
Browse files- rag_engine.py +16 -6
rag_engine.py
CHANGED
|
@@ -682,15 +682,25 @@ class RAGEngine:
|
|
| 682 |
return None
|
| 683 |
|
| 684 |
def _call_api_with_backoff(self, system_prompt: str, prompt: str, models: List[str]):
|
| 685 |
-
"""
|
| 686 |
PIPELINE_LOG.info("_call_api_with_backoff START models=%s prompt_len=%d", models, len(prompt))
|
| 687 |
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 692 |
|
| 693 |
-
#
|
|
|
|
| 694 |
max_attempts_per_model = 8
|
| 695 |
max_rate_limit_wait_s = 180 # wait up to 3 minutes per attempt before retry
|
| 696 |
|
|
|
|
| 682 |
return None
|
| 683 |
|
| 684 |
def _call_api_with_backoff(self, system_prompt: str, prompt: str, models: List[str]):
|
| 685 |
+
"""When OPENROUTER_API_KEY is set: use only OpenRouter (no Gemini). Else: use Gemini with backoff."""
|
| 686 |
PIPELINE_LOG.info("_call_api_with_backoff START models=%s prompt_len=%d", models, len(prompt))
|
| 687 |
|
| 688 |
+
openrouter_key = self._get_openrouter_key()
|
| 689 |
+
if openrouter_key:
|
| 690 |
+
# Generation: use only OpenRouter when key is set (avoid Gemini rate limit)
|
| 691 |
+
PIPELINE_LOG.info("OpenRouter key present - using OpenRouter only for generation (no Gemini)")
|
| 692 |
+
for attempt in range(2):
|
| 693 |
+
result = self._call_openrouter(system_prompt, prompt, timeout_seconds=35)
|
| 694 |
+
if result:
|
| 695 |
+
return result
|
| 696 |
+
PIPELINE_LOG.warning("OpenRouter attempt %d failed, retrying...", attempt + 1)
|
| 697 |
+
return (
|
| 698 |
+
"❌ OpenRouter request failed after retries. Check OPENROUTER_API_KEY and OPENROUTER_MODEL in Space secrets. "
|
| 699 |
+
"See logs for details."
|
| 700 |
+
)
|
| 701 |
|
| 702 |
+
# No OpenRouter key: use Gemini
|
| 703 |
+
PIPELINE_LOG.info("OpenRouter key not set - using Gemini for generation")
|
| 704 |
max_attempts_per_model = 8
|
| 705 |
max_rate_limit_wait_s = 180 # wait up to 3 minutes per attempt before retry
|
| 706 |
|