RayMelius Claude Opus 4.6 commited on
Commit
48ee346
Β·
1 Parent(s): b709098

Fix Gemini circuit breaker: distinguish per-minute rate limit from daily quota

Browse files

Gemini uses "quota" in ALL 429 responses, even per-minute rate limits.
The circuit breaker was triggering on the first per-minute 429, declaring
daily quota exhausted and blocking all calls until midnight.

Now only circuit-breaks on "per-day"/"daily" keywords or retry-after > 120s.
Per-minute throttles wait up to 30s and retry normally.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. src/soci/engine/llm.py +13 -6
src/soci/engine/llm.py CHANGED
@@ -838,15 +838,19 @@ class GeminiClient:
838
  wait = float(retry_after)
839
  except (ValueError, TypeError):
840
  wait = 5.0
841
- # Daily quota exhausted β€” Gemini sends retry-after:5 even for daily limits,
842
- # so detect via message body and circuit-break until midnight Pacific.
843
- if "quota" in body_raw.lower() or wait > 30:
 
 
844
  circuit_wait = self._secs_until_pacific_midnight()
845
  self._rate_limited_until = time.monotonic() + circuit_wait
846
  self._last_error = f"daily quota exhausted β€” resets in {circuit_wait/3600:.1f}h"
847
  logger.warning(f"Gemini daily quota exhausted β€” circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
848
  return ""
849
- logger.warning(f"Gemini 429: {body} β€” waiting {wait}s")
 
 
850
  await asyncio.sleep(wait)
851
  elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
852
  # Model not available on this endpoint (any status code) β€” try fallback
@@ -923,12 +927,15 @@ class GeminiClient:
923
  wait = float(retry_after)
924
  except (ValueError, TypeError):
925
  wait = 5.0
926
- if "quota" in body_raw.lower() or wait > 30:
 
 
927
  circuit_wait = self._secs_until_pacific_midnight()
928
  self._rate_limited_until = time.monotonic() + circuit_wait
929
  logger.warning(f"Gemini daily quota exhausted β€” circuit-breaking for {circuit_wait/3600:.1f}h: {body}")
930
  return {}
931
- logger.warning(f"Gemini 429 (json): {body} β€” waiting {wait}s")
 
932
  await asyncio.sleep(wait)
933
  elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
934
  # Model not available on this endpoint (any status code) β€” try fallback
 
838
  wait = float(retry_after)
839
  except (ValueError, TypeError):
840
  wait = 5.0
841
+ # Distinguish daily quota from per-minute rate limit.
842
+ # Gemini uses "quota" in ALL 429 bodies, so check for daily-specific keywords.
843
+ body_lower = body_raw.lower()
844
+ is_daily = "per-day" in body_lower or "per day" in body_lower or "daily" in body_lower or wait > 120
845
+ if is_daily:
846
  circuit_wait = self._secs_until_pacific_midnight()
847
  self._rate_limited_until = time.monotonic() + circuit_wait
848
  self._last_error = f"daily quota exhausted β€” resets in {circuit_wait/3600:.1f}h"
849
  logger.warning(f"Gemini daily quota exhausted β€” circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
850
  return ""
851
+ # Per-minute rate limit β€” wait and retry
852
+ wait = min(wait, 30.0)
853
+ logger.info(f"Gemini per-minute rate limit β€” waiting {wait:.0f}s before retry")
854
  await asyncio.sleep(wait)
855
  elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
856
  # Model not available on this endpoint (any status code) β€” try fallback
 
927
  wait = float(retry_after)
928
  except (ValueError, TypeError):
929
  wait = 5.0
930
+ body_lower = body_raw.lower()
931
+ is_daily = "per-day" in body_lower or "per day" in body_lower or "daily" in body_lower or wait > 120
932
+ if is_daily:
933
  circuit_wait = self._secs_until_pacific_midnight()
934
  self._rate_limited_until = time.monotonic() + circuit_wait
935
  logger.warning(f"Gemini daily quota exhausted β€” circuit-breaking for {circuit_wait/3600:.1f}h: {body}")
936
  return {}
937
+ wait = min(wait, 30.0)
938
+ logger.info(f"Gemini per-minute rate limit β€” waiting {wait:.0f}s before retry")
939
  await asyncio.sleep(wait)
940
  elif any(kw in body_raw.lower() for kw in _GEMINI_MODEL_UNAVAILABLE_KWS):
941
  # Model not available on this endpoint (any status code) β€” try fallback