Spaces:
Sleeping
Sleeping
Add Gemini 429 retry with exponential backoff in HF fallback
Browse filesRoot cause: 6 parallel agents all call Gemini simultaneously when HF exhausted.
Free-tier rate limit (15 RPM) causes 429 on unlucky agents (strutturali,
consistency, financial). Previously: 429 → return empty content, no retry.
Fix: retry loop (3 attempts) with exponential backoff (2s, 4s + jitter).
Only retries on 429/rate-limit errors; non-rate errors fail immediately.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
agents.py
CHANGED
|
@@ -1194,28 +1194,38 @@ e soggette a revisione post-allineamento.
|
|
| 1194 |
# Gemini fallback quando tutti i modelli HF falliscono
|
| 1195 |
_gapi_key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY") or GEMINI_API_KEY
|
| 1196 |
if GENAI_AVAILABLE and _gapi_key:
|
| 1197 |
-
|
| 1198 |
-
|
| 1199 |
-
|
| 1200 |
-
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
|
| 1204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1205 |
)
|
| 1206 |
-
|
| 1207 |
-
|
| 1208 |
-
|
| 1209 |
-
|
| 1210 |
-
|
| 1211 |
-
|
| 1212 |
-
|
| 1213 |
-
|
| 1214 |
-
|
| 1215 |
-
|
| 1216 |
-
|
| 1217 |
-
|
| 1218 |
-
|
|
|
|
|
|
|
|
|
|
| 1219 |
else:
|
| 1220 |
wrapped = handle_exception(e, context=f"agent_{self.spec.name}_hf")
|
| 1221 |
log_exception(wrapped, context=f"agent_{self.spec.name}")
|
|
|
|
| 1194 |
# Gemini fallback quando tutti i modelli HF falliscono
|
| 1195 |
_gapi_key = os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY") or GEMINI_API_KEY
|
| 1196 |
if GENAI_AVAILABLE and _gapi_key:
|
| 1197 |
+
citations = []
|
| 1198 |
+
for _gemini_attempt in range(3):
|
| 1199 |
+
try:
|
| 1200 |
+
if _gemini_attempt > 0:
|
| 1201 |
+
_retry_delay = (2 ** _gemini_attempt) + __import__('random').uniform(0, 1)
|
| 1202 |
+
logger.warning(f"Agent {self.spec.name}: Gemini retry {_gemini_attempt}/2 in {_retry_delay:.1f}s")
|
| 1203 |
+
time.sleep(_retry_delay)
|
| 1204 |
+
genai.configure(api_key=_gapi_key)
|
| 1205 |
+
_fallback_model = genai.GenerativeModel("gemini-2.0-flash")
|
| 1206 |
+
_response = _fallback_model.generate_content(
|
| 1207 |
+
prompt_content,
|
| 1208 |
+
generation_config=genai.types.GenerationConfig(
|
| 1209 |
+
temperature=MODEL_CONFIG.temperature,
|
| 1210 |
+
max_output_tokens=MODEL_CONFIG.max_tokens,
|
| 1211 |
+
)
|
| 1212 |
)
|
| 1213 |
+
raw_content = _response.text or ""
|
| 1214 |
+
if not raw_content.strip():
|
| 1215 |
+
logger.error(f"Agent {self.spec.name}: Gemini fallback returned EMPTY content (finish_reason={getattr(_response.candidates[0] if _response.candidates else None, 'finish_reason', 'unknown')})")
|
| 1216 |
+
else:
|
| 1217 |
+
logger.info(f"Agent {self.spec.name}: Gemini fallback OK ({len(raw_content)} chars)")
|
| 1218 |
+
self.cache.set(prompt_content, raw_content)
|
| 1219 |
+
break # success — exit retry loop
|
| 1220 |
+
except Exception as gemini_e:
|
| 1221 |
+
_is_rate_limit = '429' in str(gemini_e) or 'exhausted' in str(gemini_e).lower()
|
| 1222 |
+
if _is_rate_limit and _gemini_attempt < 2:
|
| 1223 |
+
logger.warning(f"Agent {self.spec.name}: Gemini 429 attempt {_gemini_attempt+1}/3, will retry")
|
| 1224 |
+
continue
|
| 1225 |
+
logger.error(f"Agent {self.spec.name}: Gemini fallback FAILED — {type(gemini_e).__name__}: {gemini_e}")
|
| 1226 |
+
wrapped = handle_exception(gemini_e, context=f"agent_{self.spec.name}_gemini_fallback")
|
| 1227 |
+
log_exception(wrapped, context=f"agent_{self.spec.name}")
|
| 1228 |
+
return {'content': '', 'sources': [], 'unverified_claims': [], 'metadata': {'error_id': wrapped.error_id, 'error_msg': wrapped.user_message}}
|
| 1229 |
else:
|
| 1230 |
wrapped = handle_exception(e, context=f"agent_{self.spec.name}_hf")
|
| 1231 |
log_exception(wrapped, context=f"agent_{self.spec.name}")
|