Fix Gemini circuit breaker: distinguish per-minute rate limit from daily quota
Browse filesGemini uses "quota" in ALL 429 responses, even per-minute rate limits.
The circuit breaker was triggering on the first per-minute 429, declaring
daily quota exhausted and blocking all calls until midnight.
Now only circuit-breaks on "per-day"/"daily" keywords or retry-after > 120s.
Per-minute throttles wait up to 30s and retry normally.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- src/soci/engine/llm.py +13 -6
src/soci/engine/llm.py
CHANGED
|
@@ -838,15 +838,19 @@ class GeminiClient:
|
|
| 838 |
wait = float(retry_after)
|
| 839 |
except (ValueError, TypeError):
|
| 840 |
wait = 5.0
|
| 841 |
-
#
|
| 842 |
-
#
|
| 843 |
-
|
|
|
|
|
|
|
| 844 |
circuit_wait = self._secs_until_pacific_midnight()
|
| 845 |
self._rate_limited_until = time.monotonic() + circuit_wait
|
| 846 |
self._last_error = f"daily quota exhausted β resets in {circuit_wait/3600:.1f}h"
|
| 847 |
logger.warning(f"Gemini daily quota exhausted β circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
|
| 848 |
return ""
|
| 849 |
-
|
|
|
|
|
|
|
| 850 |
await asyncio.sleep(wait)
|
| 851 |
elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
|
| 852 |
# Model not available on this endpoint (any status code) β try fallback
|
|
@@ -923,12 +927,15 @@ class GeminiClient:
|
|
| 923 |
wait = float(retry_after)
|
| 924 |
except (ValueError, TypeError):
|
| 925 |
wait = 5.0
|
| 926 |
-
|
|
|
|
|
|
|
| 927 |
circuit_wait = self._secs_until_pacific_midnight()
|
| 928 |
self._rate_limited_until = time.monotonic() + circuit_wait
|
| 929 |
logger.warning(f"Gemini daily quota exhausted β circuit-breaking for {circuit_wait/3600:.1f}h: {body}")
|
| 930 |
return {}
|
| 931 |
-
|
|
|
|
| 932 |
await asyncio.sleep(wait)
|
| 933 |
elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
|
| 934 |
# Model not available on this endpoint (any status code) β try fallback
|
|
|
|
| 838 |
wait = float(retry_after)
|
| 839 |
except (ValueError, TypeError):
|
| 840 |
wait = 5.0
|
| 841 |
+
# Distinguish daily quota from per-minute rate limit.
|
| 842 |
+
# Gemini uses "quota" in ALL 429 bodies, so check for daily-specific keywords.
|
| 843 |
+
body_lower = body_raw.lower()
|
| 844 |
+
is_daily = "per-day" in body_lower or "per day" in body_lower or "daily" in body_lower or wait > 120
|
| 845 |
+
if is_daily:
|
| 846 |
circuit_wait = self._secs_until_pacific_midnight()
|
| 847 |
self._rate_limited_until = time.monotonic() + circuit_wait
|
| 848 |
self._last_error = f"daily quota exhausted β resets in {circuit_wait/3600:.1f}h"
|
| 849 |
logger.warning(f"Gemini daily quota exhausted β circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
|
| 850 |
return ""
|
| 851 |
+
# Per-minute rate limit β wait and retry
|
| 852 |
+
wait = min(wait, 30.0)
|
| 853 |
+
logger.info(f"Gemini per-minute rate limit β waiting {wait:.0f}s before retry")
|
| 854 |
await asyncio.sleep(wait)
|
| 855 |
elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
|
| 856 |
# Model not available on this endpoint (any status code) β try fallback
|
|
|
|
| 927 |
wait = float(retry_after)
|
| 928 |
except (ValueError, TypeError):
|
| 929 |
wait = 5.0
|
| 930 |
+
body_lower = body_raw.lower()
|
| 931 |
+
is_daily = "per-day" in body_lower or "per day" in body_lower or "daily" in body_lower or wait > 120
|
| 932 |
+
if is_daily:
|
| 933 |
circuit_wait = self._secs_until_pacific_midnight()
|
| 934 |
self._rate_limited_until = time.monotonic() + circuit_wait
|
| 935 |
logger.warning(f"Gemini daily quota exhausted β circuit-breaking for {circuit_wait/3600:.1f}h: {body}")
|
| 936 |
return {}
|
| 937 |
+
wait = min(wait, 30.0)
|
| 938 |
+
logger.info(f"Gemini per-minute rate limit β waiting {wait:.0f}s before retry")
|
| 939 |
await asyncio.sleep(wait)
|
| 940 |
elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
|
| 941 |
# Model not available on this endpoint (any status code) β try fallback
|