nusaibah0110 commited on
Commit
cb3b2ec
·
1 Parent(s): dbbb9cd

Prioritize Gemini 2.5 models and cache quota-blocked model fallbacks

Browse files
Files changed (1) hide show
  1. backend/app.py +51 -30
backend/app.py CHANGED
@@ -80,6 +80,43 @@ def get_supported_gemini_models() -> List[str]:
80
  seen.add(name)
81
  return unique_models
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  # Pydantic models for LLM endpoints
84
  class ChatMessage(BaseModel):
85
  role: str
@@ -168,21 +205,9 @@ Be professional, evidence-based, and concise."""
168
  "Check API key permissions and Gemini API enablement."
169
  )
170
 
171
- preferred_models = [
172
- "models/gemini-2.0-flash",
173
- "gemini-2.0-flash",
174
- "models/gemini-2.0-flash-lite",
175
- "gemini-2.0-flash-lite",
176
- "models/gemini-1.5-flash",
177
- "gemini-1.5-flash",
178
- "models/gemini-1.5-pro",
179
- "gemini-1.5-pro",
180
- "models/gemini-pro",
181
- "gemini-pro",
182
- ]
183
- model_names = [m for m in preferred_models if m in available_models]
184
- model_names.extend(m for m in available_models if m not in model_names)
185
  print(f"✅ Chat available models: {available_models}")
 
186
 
187
  response_text = None
188
  used_model = None
@@ -215,7 +240,11 @@ Be professional, evidence-based, and concise."""
215
  print(f"✅ Successfully used chat model: {model_name}")
216
  break
217
  except Exception as model_err:
218
- print(f"⚠️ Chat model {model_name} failed: {str(model_err)}")
 
 
 
 
219
  continue
220
 
221
  if not response_text:
@@ -298,21 +327,9 @@ Analyse ALL the clinical data and the attached colposcopy images to generate a p
298
  "Check API key permissions and Gemini API enablement."
299
  )
300
 
301
- preferred_models = [
302
- "models/gemini-2.0-flash",
303
- "gemini-2.0-flash",
304
- "models/gemini-2.0-flash-lite",
305
- "gemini-2.0-flash-lite",
306
- "models/gemini-1.5-flash",
307
- "gemini-1.5-flash",
308
- "models/gemini-1.5-pro",
309
- "gemini-1.5-pro",
310
- "models/gemini-pro",
311
- "gemini-pro",
312
- ]
313
- model_names = [m for m in preferred_models if m in available_models]
314
- model_names.extend(m for m in available_models if m not in model_names)
315
  print(f"✅ Report available models: {available_models}")
 
316
 
317
  response_text = None
318
  used_model = None
@@ -333,7 +350,11 @@ Analyse ALL the clinical data and the attached colposcopy images to generate a p
333
  print(f"✅ Successfully used model: {model_name}")
334
  break
335
  except Exception as model_err:
336
- print(f"⚠️ Model {model_name} failed: {str(model_err)}")
 
 
 
 
337
  continue
338
 
339
  if not response_text:
 
80
  seen.add(name)
81
  return unique_models
82
 
83
+
84
+ # Cache models that fail due to quota so we skip them on subsequent requests.
85
+ QUOTA_BLOCKED_MODELS: set[str] = set()
86
+
87
+
88
+ def get_ordered_model_candidates(available_models: List[str]) -> List[str]:
89
+ """Order models by preference and exclude quota-blocked models."""
90
+ preferred_models = [
91
+ # Put models that are usually available on free keys first.
92
+ "models/gemini-2.5-flash",
93
+ "gemini-2.5-flash",
94
+ "models/gemini-flash-latest",
95
+ "gemini-flash-latest",
96
+ "models/gemini-2.5-flash-lite",
97
+ "gemini-2.5-flash-lite",
98
+ "models/gemini-flash-lite-latest",
99
+ "gemini-flash-lite-latest",
100
+ # Keep older families as fallback.
101
+ "models/gemini-2.0-flash",
102
+ "gemini-2.0-flash",
103
+ "models/gemini-2.0-flash-lite",
104
+ "gemini-2.0-flash-lite",
105
+ "models/gemini-1.5-flash",
106
+ "gemini-1.5-flash",
107
+ "models/gemini-1.5-pro",
108
+ "gemini-1.5-pro",
109
+ "models/gemini-pro-latest",
110
+ "gemini-pro-latest",
111
+ "models/gemini-pro",
112
+ "gemini-pro",
113
+ ]
114
+
115
+ available = [m for m in available_models if m not in QUOTA_BLOCKED_MODELS]
116
+ ordered = [m for m in preferred_models if m in available]
117
+ ordered.extend(m for m in available if m not in ordered)
118
+ return ordered
119
+
120
  # Pydantic models for LLM endpoints
121
  class ChatMessage(BaseModel):
122
  role: str
 
205
  "Check API key permissions and Gemini API enablement."
206
  )
207
 
208
+ model_names = get_ordered_model_candidates(available_models)
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  print(f"✅ Chat available models: {available_models}")
210
+ print(f"✅ Chat candidate models: {model_names}")
211
 
212
  response_text = None
213
  used_model = None
 
240
  print(f"✅ Successfully used chat model: {model_name}")
241
  break
242
  except Exception as model_err:
243
+ err_str = str(model_err)
244
+ if "429" in err_str or "quota exceeded" in err_str.lower():
245
+ QUOTA_BLOCKED_MODELS.add(model_name)
246
+ print(f"⏭️ Skipping quota-blocked chat model: {model_name}")
247
+ print(f"⚠️ Chat model {model_name} failed: {err_str}")
248
  continue
249
 
250
  if not response_text:
 
327
  "Check API key permissions and Gemini API enablement."
328
  )
329
 
330
+ model_names = get_ordered_model_candidates(available_models)
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  print(f"✅ Report available models: {available_models}")
332
+ print(f"✅ Report candidate models: {model_names}")
333
 
334
  response_text = None
335
  used_model = None
 
350
  print(f"✅ Successfully used model: {model_name}")
351
  break
352
  except Exception as model_err:
353
+ err_str = str(model_err)
354
+ if "429" in err_str or "quota exceeded" in err_str.lower():
355
+ QUOTA_BLOCKED_MODELS.add(model_name)
356
+ print(f"⏭️ Skipping quota-blocked report model: {model_name}")
357
+ print(f"⚠️ Model {model_name} failed: {err_str}")
358
  continue
359
 
360
  if not response_text: