Asish Karthikeya Gogineni commited on
Commit
5f1e9e9
·
1 Parent(s): f10ec60

feat: Add runtime Gemini model switching on rate limits - automatically tries next model

Browse files
Files changed (1) hide show
  1. code_chatbot/rag.py +97 -5
code_chatbot/rag.py CHANGED
@@ -15,6 +15,22 @@ import os
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  class ChatEngine:
19
  def __init__(
20
  self,
@@ -40,6 +56,9 @@ class ChatEngine:
40
  self.repo_files = repo_files
41
  self.repo_dir = repo_dir
42
 
 
 
 
43
  # Initialize LLM
44
  self.llm = self._get_llm()
45
 
@@ -148,6 +167,8 @@ class ChatEngine:
148
 
149
  # Try each model until one works
150
  last_error = None
 
 
151
  for model_name in GEMINI_MODELS_TO_TRY:
152
  try:
153
  logger.info(f"Attempting to use Gemini model: {model_name}")
@@ -157,12 +178,19 @@ class ChatEngine:
157
  temperature=0.2,
158
  convert_system_message_to_human=True
159
  )
160
- # Test the model with a simple call
161
- llm.invoke("test")
162
- logger.info(f"Successfully initialized Gemini model: {model_name}")
163
  return llm
164
  except Exception as e:
165
- logger.warning(f"Model {model_name} failed: {str(e)[:100]}")
 
 
 
 
 
 
 
166
  last_error = e
167
  continue
168
 
@@ -181,6 +209,49 @@ class ChatEngine:
181
  else:
182
  raise ValueError(f"Provider {self.provider} not supported. Only 'groq' and 'gemini' are supported.")
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  def _build_rag_chain(self):
186
  """Builds a simplified RAG chain with history-aware retrieval."""
@@ -258,7 +329,21 @@ class ChatEngine:
258
  except Exception as e:
259
  # Fallback for Groq/LLM Tool Errors & Rate Limits
260
  error_str = str(e)
261
- if any(err in error_str for err in ["tool_use_failed", "invalid_request_error", "400", "429", "RESOURCE_EXHAUSTED"]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  logger.warning(f"Agent failed ({error_str}), falling back to Linear RAG.")
263
  return self._linear_chat(question)
264
  raise e
@@ -267,6 +352,13 @@ class ChatEngine:
267
  return self._linear_chat(question)
268
 
269
  except Exception as e:
 
 
 
 
 
 
 
270
  logger.error(f"Error during chat: {e}", exc_info=True)
271
  return f"Error: {str(e)}", []
272
 
 
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
+ # Gemini models fallback list (tried in order)
19
+ GEMINI_FALLBACK_MODELS = [
20
+ "gemini-3-flash-preview",
21
+ "gemini-3-pro-preview",
22
+ "gemini-2.5-flash",
23
+ "gemini-2.5-pro",
24
+ "gemini-2.5-flash-preview-09-2025",
25
+ "gemini-2.5-flash-lite",
26
+ "gemini-2.5-flash-lite-preview-09-2025",
27
+ "gemini-2.0-flash",
28
+ "gemini-2.0-flash-lite",
29
+ "gemini-1.5-flash",
30
+ "gemini-1.5-pro",
31
+ "gemini-pro",
32
+ ]
33
+
34
  class ChatEngine:
35
  def __init__(
36
  self,
 
56
  self.repo_files = repo_files
57
  self.repo_dir = repo_dir
58
 
59
+ # Track current model index for fallback
60
+ self._gemini_model_index = 0
61
+
62
  # Initialize LLM
63
  self.llm = self._get_llm()
64
 
 
167
 
168
  # Try each model until one works
169
  last_error = None
170
+ last_working_model = None
171
+
172
  for model_name in GEMINI_MODELS_TO_TRY:
173
  try:
174
  logger.info(f"Attempting to use Gemini model: {model_name}")
 
178
  temperature=0.2,
179
  convert_system_message_to_human=True
180
  )
181
+ # Don't test the model here - it uses up quota!
182
+ # Just return it and let the actual call determine if it works
183
+ logger.info(f"Initialized Gemini model: {model_name}")
184
  return llm
185
  except Exception as e:
186
+ error_str = str(e).lower()
187
+ # Check for specific error types
188
+ if "not_found" in error_str or "404" in error_str:
189
+ logger.warning(f"Model {model_name} not found, trying next...")
190
+ elif "resource_exhausted" in error_str or "429" in error_str or "quota" in error_str:
191
+ logger.warning(f"Model {model_name} rate limited, trying next...")
192
+ else:
193
+ logger.warning(f"Model {model_name} failed: {str(e)[:100]}")
194
  last_error = e
195
  continue
196
 
 
209
  else:
210
  raise ValueError(f"Provider {self.provider} not supported. Only 'groq' and 'gemini' are supported.")
211
 
212
+ def _try_next_gemini_model(self) -> bool:
213
+ """
214
+ Try to switch to the next Gemini model in the fallback list.
215
+ Returns True if a new model was set, False if all models exhausted.
216
+ """
217
+ if self.provider != "gemini":
218
+ return False
219
+
220
+ self._gemini_model_index += 1
221
+
222
+ if self._gemini_model_index >= len(GEMINI_FALLBACK_MODELS):
223
+ logger.error("All Gemini models exhausted!")
224
+ return False
225
+
226
+ next_model = GEMINI_FALLBACK_MODELS[self._gemini_model_index]
227
+ logger.info(f"Switching to next Gemini model: {next_model} (index {self._gemini_model_index})")
228
+
229
+ api_key = self.api_key or os.getenv("GOOGLE_API_KEY")
230
+ try:
231
+ self.llm = ChatGoogleGenerativeAI(
232
+ model=next_model,
233
+ google_api_key=api_key,
234
+ temperature=0.2,
235
+ convert_system_message_to_human=True
236
+ )
237
+ self.model_name = next_model
238
+
239
+ # Rebuild agent if using agents
240
+ if self.use_agent:
241
+ try:
242
+ from code_chatbot.agent_workflow import create_agent_graph
243
+ self.agent_executor = create_agent_graph(
244
+ llm=self.llm,
245
+ retriever=self.vector_retriever,
246
+ code_analyzer=self.code_analyzer
247
+ )
248
+ except Exception as e:
249
+ logger.warning(f"Could not rebuild agent: {e}")
250
+
251
+ return True
252
+ except Exception as e:
253
+ logger.error(f"Failed to switch to model {next_model}: {e}")
254
+ return self._try_next_gemini_model() # Recursively try next
255
 
256
  def _build_rag_chain(self):
257
  """Builds a simplified RAG chain with history-aware retrieval."""
 
329
  except Exception as e:
330
  # Fallback for Groq/LLM Tool Errors & Rate Limits
331
  error_str = str(e)
332
+
333
+ # Check if it's a rate limit error
334
+ if any(err in error_str for err in ["429", "RESOURCE_EXHAUSTED", "quota"]):
335
+ logger.warning(f"Rate limit hit on {self.model_name}: {error_str[:100]}")
336
+
337
+ # Try switching to next Gemini model
338
+ if self.provider == "gemini" and self._try_next_gemini_model():
339
+ logger.info(f"Switched to {self.model_name}, retrying...")
340
+ return self.chat(question) # Retry with new model
341
+ else:
342
+ logger.warning("No more models to try, falling back to Linear RAG")
343
+ return self._linear_chat(question)
344
+
345
+ # Handle tool use errors
346
+ if any(err in error_str for err in ["tool_use_failed", "invalid_request_error", "400"]):
347
  logger.warning(f"Agent failed ({error_str}), falling back to Linear RAG.")
348
  return self._linear_chat(question)
349
  raise e
 
352
  return self._linear_chat(question)
353
 
354
  except Exception as e:
355
+ # Check for rate limits in outer exception too
356
+ error_str = str(e)
357
+ if any(err in error_str for err in ["429", "RESOURCE_EXHAUSTED", "quota"]):
358
+ if self.provider == "gemini" and self._try_next_gemini_model():
359
+ logger.info(f"Switched to {self.model_name} after outer error, retrying...")
360
+ return self.chat(question)
361
+
362
  logger.error(f"Error during chat: {e}", exc_info=True)
363
  return f"Error: {str(e)}", []
364