Reset Gemini circuit breaker after failed startup probe
Browse filesA single 429 on the probe was circuit-breaking for the entire day,
leaving the simulation in routine-only mode. Now the breaker resets
so the sim loop can retry gracefully per-tick.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- src/soci/api/server.py +7 -0
src/soci/api/server.py
CHANGED
|
@@ -334,6 +334,10 @@ async def lifespan(app: FastAPI):
|
|
| 334 |
if not probe:
|
| 335 |
last_err = getattr(llm, "_last_error", "") or getattr(llm, "_auth_error", "")
|
| 336 |
logger.warning(f"Provider '{_llm_provider}' failed probe ({last_err}) β trying fallbacks")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
for fallback in _fallback_order:
|
| 338 |
if fallback == _llm_provider:
|
| 339 |
continue
|
|
@@ -349,6 +353,9 @@ async def lifespan(app: FastAPI):
|
|
| 349 |
continue
|
| 350 |
else:
|
| 351 |
logger.warning("All provider fallbacks failed β simulation will run in routine-only mode")
|
|
|
|
|
|
|
|
|
|
| 352 |
|
| 353 |
# Default LLM call probability β 0.10 for all providers to conserve daily quotas.
|
| 354 |
# At 0.10: ~15 calls/h with Gemini (5 RPM) β stays well within 1500 RPD limit.
|
|
|
|
| 334 |
if not probe:
|
| 335 |
last_err = getattr(llm, "_last_error", "") or getattr(llm, "_auth_error", "")
|
| 336 |
logger.warning(f"Provider '{_llm_provider}' failed probe ({last_err}) β trying fallbacks")
|
| 337 |
+
# Reset circuit breaker β a failed probe shouldn't block the whole day;
|
| 338 |
+
# the simulation loop will handle rate limits gracefully per-tick.
|
| 339 |
+
if hasattr(llm, "_rate_limited_until"):
|
| 340 |
+
llm._rate_limited_until = 0.0
|
| 341 |
for fallback in _fallback_order:
|
| 342 |
if fallback == _llm_provider:
|
| 343 |
continue
|
|
|
|
| 353 |
continue
|
| 354 |
else:
|
| 355 |
logger.warning("All provider fallbacks failed β simulation will run in routine-only mode")
|
| 356 |
+
# Reset circuit breaker on the original provider so it can retry during simulation
|
| 357 |
+
if hasattr(llm, "_rate_limited_until"):
|
| 358 |
+
llm._rate_limited_until = 0.0
|
| 359 |
|
| 360 |
# Default LLM call probability β 0.10 for all providers to conserve daily quotas.
|
| 361 |
# At 0.10: ~15 calls/h with Gemini (5 RPM) β stays well within 1500 RPD limit.
|