Asish Karthikeya Gogineni commited on
Commit
5d60881
·
1 Parent(s): 27fbe66

fix: Robust 429 handling in Linear RAG

Browse files

- Added automatic rate limit handling to standard chat
- Swaps to next Gemini model (e.g. 2.5-flash -> 1.5-flash) on RESOURCE_EXHAUSTED
- Prevents user-facing errors when free tier quota is hit

Files changed (1) hide show
  1. code_chatbot/rag.py +7 -0
code_chatbot/rag.py CHANGED
@@ -374,6 +374,13 @@ class ChatEngine:
374
  response_msg = self.llm.invoke(messages)
375
  answer = response_msg.content
376
  except Exception as e:
 
 
 
 
 
 
 
377
  logger.error(f"Error in linear chat invoke: {e}")
378
  return f"Error consuming LLM: {e}", []
379
 
 
374
  response_msg = self.llm.invoke(messages)
375
  answer = response_msg.content
376
  except Exception as e:
377
+ # Check for Rate Limit in Linear Chat
378
+ error_str = str(e)
379
+ if any(err in error_str for err in ["429", "RESOURCE_EXHAUSTED", "quota"]):
380
+ if self.provider == "gemini" and self._try_next_gemini_model():
381
+ logger.info(f"Linear RAG: Switched to {self.model_name} due to rate limit, retrying...")
382
+ return self._linear_chat(question) # Retry with new model
383
+
384
  logger.error(f"Error in linear chat invoke: {e}")
385
  return f"Error consuming LLM: {e}", []
386