Fix Gemini circuit-breaker: expire at midnight Pacific, not fixed 8h
Browse filesThe fixed 28800s (8h) window could block calls even after the daily
quota reset at midnight Pacific. Now computes exact seconds until
midnight Pacific so the circuit-breaker lifts the moment Gemini's
quota refreshes.
Also improves quota warning to show remaining requests and hours
until reset: "750 remaining, resets in 4.2h (midnight Pacific)".
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- src/soci/engine/llm.py +22 -4
src/soci/engine/llm.py
CHANGED
|
@@ -678,6 +678,22 @@ class GeminiClient:
|
|
| 678 |
def _is_quota_exhausted(self) -> bool:
|
| 679 |
return time.monotonic() < self._rate_limited_until
|
| 680 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
def _track_daily_request(self) -> None:
|
| 682 |
"""Increment daily counter and log warnings at 50/70/90/99% of the daily limit."""
|
| 683 |
import datetime as _dt
|
|
@@ -690,12 +706,14 @@ class GeminiClient:
|
|
| 690 |
self._warned_thresholds = set()
|
| 691 |
self._daily_requests += 1
|
| 692 |
pct = self._daily_requests / self._daily_limit
|
|
|
|
| 693 |
for threshold in (0.50, 0.70, 0.90, 0.99):
|
| 694 |
if pct >= threshold and threshold not in self._warned_thresholds:
|
| 695 |
self._warned_thresholds.add(threshold)
|
|
|
|
| 696 |
logger.warning(
|
| 697 |
f"Gemini daily quota: {self._daily_requests}/{self._daily_limit} requests used "
|
| 698 |
-
f"({pct * 100:.0f}%) β resets
|
| 699 |
)
|
| 700 |
|
| 701 |
async def _wait_for_rate_limit(self) -> None:
|
|
@@ -762,12 +780,12 @@ class GeminiClient:
|
|
| 762 |
wait = 5.0
|
| 763 |
body_raw = e.response.text or ""
|
| 764 |
# Daily quota exhausted β Gemini sends retry-after:5 even for daily limits,
|
| 765 |
-
# so detect via message body and circuit-break
|
| 766 |
if "quota" in body_raw.lower() or wait > 30:
|
| 767 |
-
circuit_wait =
|
| 768 |
self._rate_limited_until = time.monotonic() + circuit_wait
|
| 769 |
body = body_raw[:200].replace("{", "(").replace("}", ")")
|
| 770 |
-
logger.warning(f"Gemini daily quota exhausted β circuit-breaking for {circuit_wait/3600:.1f}h: {body}")
|
| 771 |
return ""
|
| 772 |
body = body_raw[:200].replace("{", "(").replace("}", ")")
|
| 773 |
logger.warning(f"Gemini 429: {body} β waiting {wait}s")
|
|
|
|
| 678 |
def _is_quota_exhausted(self) -> bool:
|
| 679 |
return time.monotonic() < self._rate_limited_until
|
| 680 |
|
| 681 |
+
@staticmethod
|
| 682 |
+
def _secs_until_pacific_midnight() -> float:
|
| 683 |
+
"""Seconds from now until the next midnight Pacific Time (UTC-8).
|
| 684 |
+
|
| 685 |
+
Gemini free-tier quotas reset at midnight Pacific, so this is the
|
| 686 |
+
correct circuit-breaker duration after daily quota exhaustion.
|
| 687 |
+
"""
|
| 688 |
+
import datetime as _dt
|
| 689 |
+
pacific = _dt.timezone(_dt.timedelta(hours=-8))
|
| 690 |
+
now = _dt.datetime.now(pacific)
|
| 691 |
+
midnight = (now + _dt.timedelta(days=1)).replace(
|
| 692 |
+
hour=0, minute=0, second=0, microsecond=0
|
| 693 |
+
)
|
| 694 |
+
secs = (midnight - now).total_seconds()
|
| 695 |
+
return max(secs, 60.0) # at least 60s even if we're right at midnight
|
| 696 |
+
|
| 697 |
def _track_daily_request(self) -> None:
|
| 698 |
"""Increment daily counter and log warnings at 50/70/90/99% of the daily limit."""
|
| 699 |
import datetime as _dt
|
|
|
|
| 706 |
self._warned_thresholds = set()
|
| 707 |
self._daily_requests += 1
|
| 708 |
pct = self._daily_requests / self._daily_limit
|
| 709 |
+
remaining = self._daily_limit - self._daily_requests
|
| 710 |
for threshold in (0.50, 0.70, 0.90, 0.99):
|
| 711 |
if pct >= threshold and threshold not in self._warned_thresholds:
|
| 712 |
self._warned_thresholds.add(threshold)
|
| 713 |
+
hrs = self._secs_until_pacific_midnight() / 3600
|
| 714 |
logger.warning(
|
| 715 |
f"Gemini daily quota: {self._daily_requests}/{self._daily_limit} requests used "
|
| 716 |
+
f"({pct * 100:.0f}%) β {remaining} remaining, resets in {hrs:.1f}h (midnight Pacific)"
|
| 717 |
)
|
| 718 |
|
| 719 |
async def _wait_for_rate_limit(self) -> None:
|
|
|
|
| 780 |
wait = 5.0
|
| 781 |
body_raw = e.response.text or ""
|
| 782 |
# Daily quota exhausted β Gemini sends retry-after:5 even for daily limits,
|
| 783 |
+
# so detect via message body and circuit-break until midnight Pacific.
|
| 784 |
if "quota" in body_raw.lower() or wait > 30:
|
| 785 |
+
circuit_wait = self._secs_until_pacific_midnight()
|
| 786 |
self._rate_limited_until = time.monotonic() + circuit_wait
|
| 787 |
body = body_raw[:200].replace("{", "(").replace("}", ")")
|
| 788 |
+
logger.warning(f"Gemini daily quota exhausted β circuit-breaking for {circuit_wait/3600:.1f}h (until midnight Pacific): {body}")
|
| 789 |
return ""
|
| 790 |
body = body_raw[:200].replace("{", "(").replace("}", ")")
|
| 791 |
logger.warning(f"Gemini 429: {body} β waiting {wait}s")
|