RayMelius Claude Sonnet 4.6 commited on
Commit
89df68d
Β·
1 Parent(s): 8005664

Fix Gemini circuit-breaker: expire at midnight Pacific, not fixed 8h

Browse files

The fixed 28800s (8h) window could block calls even after the daily
quota reset at midnight Pacific. Now computes exact seconds until
midnight Pacific so the circuit-breaker lifts the moment Gemini's
quota refreshes.

Also improves quota warning to show remaining requests and hours
until reset: "750 remaining, resets in 4.2h (midnight Pacific)".

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. src/soci/engine/llm.py +22 -4
src/soci/engine/llm.py CHANGED
@@ -678,6 +678,22 @@ class GeminiClient:
678
  def _is_quota_exhausted(self) -> bool:
679
  return time.monotonic() < self._rate_limited_until
680
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
681
  def _track_daily_request(self) -> None:
682
  """Increment daily counter and log warnings at 50/70/90/99% of the daily limit."""
683
  import datetime as _dt
@@ -690,12 +706,14 @@ class GeminiClient:
690
  self._warned_thresholds = set()
691
  self._daily_requests += 1
692
  pct = self._daily_requests / self._daily_limit
 
693
  for threshold in (0.50, 0.70, 0.90, 0.99):
694
  if pct >= threshold and threshold not in self._warned_thresholds:
695
  self._warned_thresholds.add(threshold)
 
696
  logger.warning(
697
  f"Gemini daily quota: {self._daily_requests}/{self._daily_limit} requests used "
698
- f"({pct * 100:.0f}%) β€” resets at midnight Pacific Time"
699
  )
700
 
701
  async def _wait_for_rate_limit(self) -> None:
@@ -762,12 +780,12 @@ class GeminiClient:
762
  wait = 5.0
763
  body_raw = e.response.text or ""
764
  # Daily quota exhausted β€” Gemini sends retry-after:5 even for daily limits,
765
- # so detect via message body and circuit-break for 8 hours.
766
  if "quota" in body_raw.lower() or wait > 30:
767
- circuit_wait = max(wait, 28800) # 8 hours
768
  self._rate_limited_until = time.monotonic() + circuit_wait
769
  body = body_raw[:200].replace("{", "(").replace("}", ")")
770
- logger.warning(f"Gemini daily quota exhausted β€” circuit-breaking for {circuit_wait/3600:.1f}h: {body}")
771
  return ""
772
  body = body_raw[:200].replace("{", "(").replace("}", ")")
773
  logger.warning(f"Gemini 429: {body} β€” waiting {wait}s")
 
678
  def _is_quota_exhausted(self) -> bool:
679
  return time.monotonic() < self._rate_limited_until
680
 
681
+ @staticmethod
682
+ def _secs_until_pacific_midnight() -> float:
683
+ """Seconds from now until the next midnight Pacific Time (UTC-8).
684
+
685
+ Gemini free-tier quotas reset at midnight Pacific, so this is the
686
+ correct circuit-breaker duration after daily quota exhaustion.
687
+ """
688
+ import datetime as _dt
689
+ pacific = _dt.timezone(_dt.timedelta(hours=-8))
690
+ now = _dt.datetime.now(pacific)
691
+ midnight = (now + _dt.timedelta(days=1)).replace(
692
+ hour=0, minute=0, second=0, microsecond=0
693
+ )
694
+ secs = (midnight - now).total_seconds()
695
+ return max(secs, 60.0) # at least 60s even if we're right at midnight
696
+
697
  def _track_daily_request(self) -> None:
698
  """Increment daily counter and log warnings at 50/70/90/99% of the daily limit."""
699
  import datetime as _dt
 
706
  self._warned_thresholds = set()
707
  self._daily_requests += 1
708
  pct = self._daily_requests / self._daily_limit
709
+ remaining = self._daily_limit - self._daily_requests
710
  for threshold in (0.50, 0.70, 0.90, 0.99):
711
  if pct >= threshold and threshold not in self._warned_thresholds:
712
  self._warned_thresholds.add(threshold)
713
+ hrs = self._secs_until_pacific_midnight() / 3600
714
  logger.warning(
715
  f"Gemini daily quota: {self._daily_requests}/{self._daily_limit} requests used "
716
+ f"({pct * 100:.0f}%) β€” {remaining} remaining, resets in {hrs:.1f}h (midnight Pacific)"
717
  )
718
 
719
  async def _wait_for_rate_limit(self) -> None:
 
780
  wait = 5.0
781
  body_raw = e.response.text or ""
782
  # Daily quota exhausted β€” Gemini sends retry-after:5 even for daily limits,
783
+ # so detect via message body and circuit-break until midnight Pacific.
784
  if "quota" in body_raw.lower() or wait > 30:
785
+ circuit_wait = self._secs_until_pacific_midnight()
786
  self._rate_limited_until = time.monotonic() + circuit_wait
787
  body = body_raw[:200].replace("{", "(").replace("}", ")")
788
+ logger.warning(f"Gemini daily quota exhausted β€” circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
789
  return ""
790
  body = body_raw[:200].replace("{", "(").replace("}", ")")
791
  logger.warning(f"Gemini 429: {body} β€” waiting {wait}s")