Spaces:
Running
Running
Asish Karthikeya Gogineni commited on
Commit ·
5f1e9e9
1
Parent(s): f10ec60
feat: Add runtime Gemini model switching on rate limits - automatically tries next model
Browse files- code_chatbot/rag.py +97 -5
code_chatbot/rag.py
CHANGED
|
@@ -15,6 +15,22 @@ import os
|
|
| 15 |
logging.basicConfig(level=logging.INFO)
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
class ChatEngine:
|
| 19 |
def __init__(
|
| 20 |
self,
|
|
@@ -40,6 +56,9 @@ class ChatEngine:
|
|
| 40 |
self.repo_files = repo_files
|
| 41 |
self.repo_dir = repo_dir
|
| 42 |
|
|
|
|
|
|
|
|
|
|
| 43 |
# Initialize LLM
|
| 44 |
self.llm = self._get_llm()
|
| 45 |
|
|
@@ -148,6 +167,8 @@ class ChatEngine:
|
|
| 148 |
|
| 149 |
# Try each model until one works
|
| 150 |
last_error = None
|
|
|
|
|
|
|
| 151 |
for model_name in GEMINI_MODELS_TO_TRY:
|
| 152 |
try:
|
| 153 |
logger.info(f"Attempting to use Gemini model: {model_name}")
|
|
@@ -157,12 +178,19 @@ class ChatEngine:
|
|
| 157 |
temperature=0.2,
|
| 158 |
convert_system_message_to_human=True
|
| 159 |
)
|
| 160 |
-
#
|
| 161 |
-
|
| 162 |
-
logger.info(f"
|
| 163 |
return llm
|
| 164 |
except Exception as e:
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
last_error = e
|
| 167 |
continue
|
| 168 |
|
|
@@ -181,6 +209,49 @@ class ChatEngine:
|
|
| 181 |
else:
|
| 182 |
raise ValueError(f"Provider {self.provider} not supported. Only 'groq' and 'gemini' are supported.")
|
| 183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
def _build_rag_chain(self):
|
| 186 |
"""Builds a simplified RAG chain with history-aware retrieval."""
|
|
@@ -258,7 +329,21 @@ class ChatEngine:
|
|
| 258 |
except Exception as e:
|
| 259 |
# Fallback for Groq/LLM Tool Errors & Rate Limits
|
| 260 |
error_str = str(e)
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
logger.warning(f"Agent failed ({error_str}), falling back to Linear RAG.")
|
| 263 |
return self._linear_chat(question)
|
| 264 |
raise e
|
|
@@ -267,6 +352,13 @@ class ChatEngine:
|
|
| 267 |
return self._linear_chat(question)
|
| 268 |
|
| 269 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
logger.error(f"Error during chat: {e}", exc_info=True)
|
| 271 |
return f"Error: {str(e)}", []
|
| 272 |
|
|
|
|
| 15 |
logging.basicConfig(level=logging.INFO)
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
| 18 |
+
# Gemini models fallback list (tried in order)
|
| 19 |
+
GEMINI_FALLBACK_MODELS = [
|
| 20 |
+
"gemini-3-flash-preview",
|
| 21 |
+
"gemini-3-pro-preview",
|
| 22 |
+
"gemini-2.5-flash",
|
| 23 |
+
"gemini-2.5-pro",
|
| 24 |
+
"gemini-2.5-flash-preview-09-2025",
|
| 25 |
+
"gemini-2.5-flash-lite",
|
| 26 |
+
"gemini-2.5-flash-lite-preview-09-2025",
|
| 27 |
+
"gemini-2.0-flash",
|
| 28 |
+
"gemini-2.0-flash-lite",
|
| 29 |
+
"gemini-1.5-flash",
|
| 30 |
+
"gemini-1.5-pro",
|
| 31 |
+
"gemini-pro",
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
class ChatEngine:
|
| 35 |
def __init__(
|
| 36 |
self,
|
|
|
|
| 56 |
self.repo_files = repo_files
|
| 57 |
self.repo_dir = repo_dir
|
| 58 |
|
| 59 |
+
# Track current model index for fallback
|
| 60 |
+
self._gemini_model_index = 0
|
| 61 |
+
|
| 62 |
# Initialize LLM
|
| 63 |
self.llm = self._get_llm()
|
| 64 |
|
|
|
|
| 167 |
|
| 168 |
# Try each model until one works
|
| 169 |
last_error = None
|
| 170 |
+
last_working_model = None
|
| 171 |
+
|
| 172 |
for model_name in GEMINI_MODELS_TO_TRY:
|
| 173 |
try:
|
| 174 |
logger.info(f"Attempting to use Gemini model: {model_name}")
|
|
|
|
| 178 |
temperature=0.2,
|
| 179 |
convert_system_message_to_human=True
|
| 180 |
)
|
| 181 |
+
# Don't test the model here - it uses up quota!
|
| 182 |
+
# Just return it and let the actual call determine if it works
|
| 183 |
+
logger.info(f"Initialized Gemini model: {model_name}")
|
| 184 |
return llm
|
| 185 |
except Exception as e:
|
| 186 |
+
error_str = str(e).lower()
|
| 187 |
+
# Check for specific error types
|
| 188 |
+
if "not_found" in error_str or "404" in error_str:
|
| 189 |
+
logger.warning(f"Model {model_name} not found, trying next...")
|
| 190 |
+
elif "resource_exhausted" in error_str or "429" in error_str or "quota" in error_str:
|
| 191 |
+
logger.warning(f"Model {model_name} rate limited, trying next...")
|
| 192 |
+
else:
|
| 193 |
+
logger.warning(f"Model {model_name} failed: {str(e)[:100]}")
|
| 194 |
last_error = e
|
| 195 |
continue
|
| 196 |
|
|
|
|
| 209 |
else:
|
| 210 |
raise ValueError(f"Provider {self.provider} not supported. Only 'groq' and 'gemini' are supported.")
|
| 211 |
|
| 212 |
+
def _try_next_gemini_model(self) -> bool:
|
| 213 |
+
"""
|
| 214 |
+
Try to switch to the next Gemini model in the fallback list.
|
| 215 |
+
Returns True if a new model was set, False if all models exhausted.
|
| 216 |
+
"""
|
| 217 |
+
if self.provider != "gemini":
|
| 218 |
+
return False
|
| 219 |
+
|
| 220 |
+
self._gemini_model_index += 1
|
| 221 |
+
|
| 222 |
+
if self._gemini_model_index >= len(GEMINI_FALLBACK_MODELS):
|
| 223 |
+
logger.error("All Gemini models exhausted!")
|
| 224 |
+
return False
|
| 225 |
+
|
| 226 |
+
next_model = GEMINI_FALLBACK_MODELS[self._gemini_model_index]
|
| 227 |
+
logger.info(f"Switching to next Gemini model: {next_model} (index {self._gemini_model_index})")
|
| 228 |
+
|
| 229 |
+
api_key = self.api_key or os.getenv("GOOGLE_API_KEY")
|
| 230 |
+
try:
|
| 231 |
+
self.llm = ChatGoogleGenerativeAI(
|
| 232 |
+
model=next_model,
|
| 233 |
+
google_api_key=api_key,
|
| 234 |
+
temperature=0.2,
|
| 235 |
+
convert_system_message_to_human=True
|
| 236 |
+
)
|
| 237 |
+
self.model_name = next_model
|
| 238 |
+
|
| 239 |
+
# Rebuild agent if using agents
|
| 240 |
+
if self.use_agent:
|
| 241 |
+
try:
|
| 242 |
+
from code_chatbot.agent_workflow import create_agent_graph
|
| 243 |
+
self.agent_executor = create_agent_graph(
|
| 244 |
+
llm=self.llm,
|
| 245 |
+
retriever=self.vector_retriever,
|
| 246 |
+
code_analyzer=self.code_analyzer
|
| 247 |
+
)
|
| 248 |
+
except Exception as e:
|
| 249 |
+
logger.warning(f"Could not rebuild agent: {e}")
|
| 250 |
+
|
| 251 |
+
return True
|
| 252 |
+
except Exception as e:
|
| 253 |
+
logger.error(f"Failed to switch to model {next_model}: {e}")
|
| 254 |
+
return self._try_next_gemini_model() # Recursively try next
|
| 255 |
|
| 256 |
def _build_rag_chain(self):
|
| 257 |
"""Builds a simplified RAG chain with history-aware retrieval."""
|
|
|
|
| 329 |
except Exception as e:
|
| 330 |
# Fallback for Groq/LLM Tool Errors & Rate Limits
|
| 331 |
error_str = str(e)
|
| 332 |
+
|
| 333 |
+
# Check if it's a rate limit error
|
| 334 |
+
if any(err in error_str for err in ["429", "RESOURCE_EXHAUSTED", "quota"]):
|
| 335 |
+
logger.warning(f"Rate limit hit on {self.model_name}: {error_str[:100]}")
|
| 336 |
+
|
| 337 |
+
# Try switching to next Gemini model
|
| 338 |
+
if self.provider == "gemini" and self._try_next_gemini_model():
|
| 339 |
+
logger.info(f"Switched to {self.model_name}, retrying...")
|
| 340 |
+
return self.chat(question) # Retry with new model
|
| 341 |
+
else:
|
| 342 |
+
logger.warning("No more models to try, falling back to Linear RAG")
|
| 343 |
+
return self._linear_chat(question)
|
| 344 |
+
|
| 345 |
+
# Handle tool use errors
|
| 346 |
+
if any(err in error_str for err in ["tool_use_failed", "invalid_request_error", "400"]):
|
| 347 |
logger.warning(f"Agent failed ({error_str}), falling back to Linear RAG.")
|
| 348 |
return self._linear_chat(question)
|
| 349 |
raise e
|
|
|
|
| 352 |
return self._linear_chat(question)
|
| 353 |
|
| 354 |
except Exception as e:
|
| 355 |
+
# Check for rate limits in outer exception too
|
| 356 |
+
error_str = str(e)
|
| 357 |
+
if any(err in error_str for err in ["429", "RESOURCE_EXHAUSTED", "quota"]):
|
| 358 |
+
if self.provider == "gemini" and self._try_next_gemini_model():
|
| 359 |
+
logger.info(f"Switched to {self.model_name} after outer error, retrying...")
|
| 360 |
+
return self.chat(question)
|
| 361 |
+
|
| 362 |
logger.error(f"Error during chat: {e}", exc_info=True)
|
| 363 |
return f"Error: {str(e)}", []
|
| 364 |
|