mtornani Claude Sonnet 4.6 commited on
Commit
59aab12
·
1 Parent(s): 6667bcf

Add Gemini 429 retry with exponential backoff in HF fallback

Browse files

Root cause: 6 parallel agents all call Gemini simultaneously when HF exhausted.
Free-tier rate limit (15 RPM) causes 429 on unlucky agents (strutturali,
consistency, financial). Previously: 429 → return empty content, no retry.

Fix: retry loop (3 attempts) with exponential backoff (2s, 4s + jitter).
Only retries on 429/rate-limit errors; non-rate errors fail immediately.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. agents.py +31 -21
agents.py CHANGED
@@ -1194,28 +1194,38 @@ e soggette a revisione post-allineamento.
1194
  # Gemini fallback quando tutti i modelli HF falliscono
1195
  _gapi_key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY") or GEMINI_API_KEY
1196
  if GENAI_AVAILABLE and _gapi_key:
1197
- try:
1198
- genai.configure(api_key=_gapi_key)
1199
- _fallback_model = genai.GenerativeModel("gemini-2.0-flash")
1200
- _response = _fallback_model.generate_content(
1201
- prompt_content,
1202
- generation_config=genai.types.GenerationConfig(
1203
- temperature=MODEL_CONFIG.temperature,
1204
- max_output_tokens=MODEL_CONFIG.max_tokens,
 
 
 
 
 
 
 
1205
  )
1206
- )
1207
- raw_content = _response.text or ""
1208
- citations = []
1209
- if not raw_content.strip():
1210
- logger.error(f"Agent {self.spec.name}: Gemini fallback returned EMPTY content (finish_reason={getattr(_response.candidates[0] if _response.candidates else None, 'finish_reason', 'unknown')})")
1211
- else:
1212
- logger.info(f"Agent {self.spec.name}: Gemini fallback OK ({len(raw_content)} chars)")
1213
- self.cache.set(prompt_content, raw_content)
1214
- except Exception as gemini_e:
1215
- logger.error(f"Agent {self.spec.name}: Gemini fallback FAILED — {type(gemini_e).__name__}: {gemini_e}")
1216
- wrapped = handle_exception(gemini_e, context=f"agent_{self.spec.name}_gemini_fallback")
1217
- log_exception(wrapped, context=f"agent_{self.spec.name}")
1218
- return {'content': '', 'sources': [], 'unverified_claims': [], 'metadata': {'error_id': wrapped.error_id, 'error_msg': wrapped.user_message}}
 
 
 
1219
  else:
1220
  wrapped = handle_exception(e, context=f"agent_{self.spec.name}_hf")
1221
  log_exception(wrapped, context=f"agent_{self.spec.name}")
 
1194
  # Gemini fallback quando tutti i modelli HF falliscono
1195
  _gapi_key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY") or GEMINI_API_KEY
1196
  if GENAI_AVAILABLE and _gapi_key:
1197
+ citations = []
1198
+ for _gemini_attempt in range(3):
1199
+ try:
1200
+ if _gemini_attempt > 0:
1201
+ _retry_delay = (2 ** _gemini_attempt) + __import__('random').uniform(0, 1)
1202
+ logger.warning(f"Agent {self.spec.name}: Gemini retry {_gemini_attempt}/2 in {_retry_delay:.1f}s")
1203
+ time.sleep(_retry_delay)
1204
+ genai.configure(api_key=_gapi_key)
1205
+ _fallback_model = genai.GenerativeModel("gemini-2.0-flash")
1206
+ _response = _fallback_model.generate_content(
1207
+ prompt_content,
1208
+ generation_config=genai.types.GenerationConfig(
1209
+ temperature=MODEL_CONFIG.temperature,
1210
+ max_output_tokens=MODEL_CONFIG.max_tokens,
1211
+ )
1212
  )
1213
+ raw_content = _response.text or ""
1214
+ if not raw_content.strip():
1215
+ logger.error(f"Agent {self.spec.name}: Gemini fallback returned EMPTY content (finish_reason={getattr(_response.candidates[0] if _response.candidates else None, 'finish_reason', 'unknown')})")
1216
+ else:
1217
+ logger.info(f"Agent {self.spec.name}: Gemini fallback OK ({len(raw_content)} chars)")
1218
+ self.cache.set(prompt_content, raw_content)
1219
+ break # success exit retry loop
1220
+ except Exception as gemini_e:
1221
+ _is_rate_limit = '429' in str(gemini_e) or 'exhausted' in str(gemini_e).lower()
1222
+ if _is_rate_limit and _gemini_attempt < 2:
1223
+ logger.warning(f"Agent {self.spec.name}: Gemini 429 attempt {_gemini_attempt+1}/3, will retry")
1224
+ continue
1225
+ logger.error(f"Agent {self.spec.name}: Gemini fallback FAILED {type(gemini_e).__name__}: {gemini_e}")
1226
+ wrapped = handle_exception(gemini_e, context=f"agent_{self.spec.name}_gemini_fallback")
1227
+ log_exception(wrapped, context=f"agent_{self.spec.name}")
1228
+ return {'content': '', 'sources': [], 'unverified_claims': [], 'metadata': {'error_id': wrapped.error_id, 'error_msg': wrapped.user_message}}
1229
  else:
1230
  wrapped = handle_exception(e, context=f"agent_{self.spec.name}_hf")
1231
  log_exception(wrapped, context=f"agent_{self.spec.name}")