Spaces:
Running
Running
Asish Karthikeya Gogineni commited on
Commit ·
5d60881
1
Parent(s): 27fbe66
fix: Robust 429 handling in Linear RAG
Browse files- Added automatic rate limit handling to standard chat
- Swaps to next Gemini model (e.g. 2.5-flash -> 1.5-flash) on RESOURCE_EXHAUSTED
- Prevents user-facing errors when free tier quota is hit
- code_chatbot/rag.py +7 -0
code_chatbot/rag.py
CHANGED
|
@@ -374,6 +374,13 @@ class ChatEngine:
|
|
| 374 |
response_msg = self.llm.invoke(messages)
|
| 375 |
answer = response_msg.content
|
| 376 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
logger.error(f"Error in linear chat invoke: {e}")
|
| 378 |
return f"Error consuming LLM: {e}", []
|
| 379 |
|
|
|
|
| 374 |
response_msg = self.llm.invoke(messages)
|
| 375 |
answer = response_msg.content
|
| 376 |
except Exception as e:
|
| 377 |
+
# Check for Rate Limit in Linear Chat
|
| 378 |
+
error_str = str(e)
|
| 379 |
+
if any(err in error_str for err in ["429", "RESOURCE_EXHAUSTED", "quota"]):
|
| 380 |
+
if self.provider == "gemini" and self._try_next_gemini_model():
|
| 381 |
+
logger.info(f"Linear RAG: Switched to {self.model_name} due to rate limit, retrying...")
|
| 382 |
+
return self._linear_chat(question) # Retry with new model
|
| 383 |
+
|
| 384 |
logger.error(f"Error in linear chat invoke: {e}")
|
| 385 |
return f"Error consuming LLM: {e}", []
|
| 386 |
|