RayMelius Claude Sonnet 4.6 commited on
Commit
8005664
·
1 Parent(s): 9b3bd67

Fix Gemini RPM (4 not 14) and add daily quota warnings at 50/70/90/99%

Browse files

Free tier is 5 RPM (not 15) — using max_rpm=14 caused every call to 429.
Now uses max_rpm=4 to stay safely under 5 RPM.

Adds _track_daily_request() called on every successful response:
- Warns at 50%, 70%, 90%, 99% of GEMINI_DAILY_LIMIT (default 1500 RPD)
- Resets counter at midnight Pacific Time
- Daily limit overridable via GEMINI_DAILY_LIMIT env var

Free tier facts: 5 RPM, ~1500 RPD, 250k TPM; resets midnight Pacific.
Cost on paid tier: ~$0.000133/request (~$0.20/day at 1500 RPD).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. src/soci/engine/llm.py +39 -3
src/soci/engine/llm.py CHANGED
@@ -625,10 +625,18 @@ class GroqClient:
625
  class GeminiClient:
626
  """Google Gemini via the OpenAI-compatible AI Studio endpoint.
627
 
628
- Free tier (no credit card):
629
- - gemini-2.0-flash: 15 RPM, 1 M tokens/day — plenty for a simulation.
 
 
630
  - Get a free key at https://aistudio.google.com/apikey
631
  Uses the OpenAI-compatible endpoint so no extra SDK is needed.
 
 
 
 
 
 
632
  """
633
 
634
  def __init__(
@@ -636,7 +644,8 @@ class GeminiClient:
636
  api_key: Optional[str] = None,
637
  default_model: str = MODEL_GEMINI_FLASH,
638
  max_retries: int = 3,
639
- max_rpm: int = 14, # stay under the 15 RPM free-tier limit
 
640
  ) -> None:
641
  self.api_key = api_key or os.environ.get("GEMINI_API_KEY", "")
642
  if not self.api_key:
@@ -660,10 +669,35 @@ class GeminiClient:
660
  self._last_request_time: float = 0.0
661
  self._rate_lock = asyncio.Lock()
662
  self._rate_limited_until: float = 0.0
 
 
 
 
 
663
 
664
  def _is_quota_exhausted(self) -> bool:
665
  return time.monotonic() < self._rate_limited_until
666
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
667
  async def _wait_for_rate_limit(self) -> None:
668
  async with self._rate_lock:
669
  now = time.monotonic()
@@ -717,6 +751,7 @@ class GeminiClient:
717
  data = resp.json()
718
  usage = data.get("usage", {})
719
  self.usage.record(model, usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0))
 
720
  return data["choices"][0]["message"]["content"]
721
  except httpx.HTTPStatusError as e:
722
  if e.response.status_code == 429:
@@ -787,6 +822,7 @@ class GeminiClient:
787
  data = resp.json()
788
  usage = data.get("usage", {})
789
  self.usage.record(model, usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0))
 
790
  text = data["choices"][0]["message"]["content"]
791
  return _parse_json_response(text)
792
  except httpx.HTTPStatusError as e:
 
625
  class GeminiClient:
626
  """Google Gemini via the OpenAI-compatible AI Studio endpoint.
627
 
628
+ Free tier (no credit card, as of 2026):
629
+ - gemini-2.0-flash: 5 RPM, ~1,500 RPD, 250,000 TPM
630
+ - Daily quota resets at midnight Pacific Time
631
+ - Paid tier: $0.10/1M input tokens, $0.40/1M output tokens
632
  - Get a free key at https://aistudio.google.com/apikey
633
  Uses the OpenAI-compatible endpoint so no extra SDK is needed.
634
+
635
+ Token/cost guide (typical Soci request ~1,000 input + 90 output tokens):
636
+ - Cost per request (paid): ~$0.000133 ($0.10 input + $0.40 output per 1M)
637
+ - 1,500 RPD free tier ≈ $0.20/day on paid tier
638
+ - 500 RPD usage ≈ $0.07/day
639
+ - Override daily limit via GEMINI_DAILY_LIMIT env var.
640
  """
641
 
642
  def __init__(
 
644
  api_key: Optional[str] = None,
645
  default_model: str = MODEL_GEMINI_FLASH,
646
  max_retries: int = 3,
647
+ max_rpm: int = 4, # stay under 5 RPM free-tier limit (was 14, caused constant 429s)
648
+ daily_limit: int = 1500, # free-tier RPD; override with GEMINI_DAILY_LIMIT
649
  ) -> None:
650
  self.api_key = api_key or os.environ.get("GEMINI_API_KEY", "")
651
  if not self.api_key:
 
669
  self._last_request_time: float = 0.0
670
  self._rate_lock = asyncio.Lock()
671
  self._rate_limited_until: float = 0.0
672
+ # Daily usage tracking — resets at midnight Pacific (UTC-8/-7)
673
+ self._daily_limit: int = int(os.environ.get("GEMINI_DAILY_LIMIT", str(daily_limit)))
674
+ self._daily_requests: int = 0
675
+ self._daily_date: str = "" # "YYYY-MM-DD" in Pacific time
676
+ self._warned_thresholds: set = set() # tracks which % levels were already logged
677
 
678
  def _is_quota_exhausted(self) -> bool:
679
  return time.monotonic() < self._rate_limited_until
680
 
681
+ def _track_daily_request(self) -> None:
682
+ """Increment daily counter and log warnings at 50/70/90/99% of the daily limit."""
683
+ import datetime as _dt
684
+ # Pacific time offset: UTC-8 (PST) / UTC-7 (PDT). Use -8 as a safe conservative value.
685
+ pacific_offset = _dt.timezone(_dt.timedelta(hours=-8))
686
+ today = _dt.datetime.now(pacific_offset).strftime("%Y-%m-%d")
687
+ if today != self._daily_date:
688
+ self._daily_date = today
689
+ self._daily_requests = 0
690
+ self._warned_thresholds = set()
691
+ self._daily_requests += 1
692
+ pct = self._daily_requests / self._daily_limit
693
+ for threshold in (0.50, 0.70, 0.90, 0.99):
694
+ if pct >= threshold and threshold not in self._warned_thresholds:
695
+ self._warned_thresholds.add(threshold)
696
+ logger.warning(
697
+ f"Gemini daily quota: {self._daily_requests}/{self._daily_limit} requests used "
698
+ f"({pct * 100:.0f}%) — resets at midnight Pacific Time"
699
+ )
700
+
701
  async def _wait_for_rate_limit(self) -> None:
702
  async with self._rate_lock:
703
  now = time.monotonic()
 
751
  data = resp.json()
752
  usage = data.get("usage", {})
753
  self.usage.record(model, usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0))
754
+ self._track_daily_request()
755
  return data["choices"][0]["message"]["content"]
756
  except httpx.HTTPStatusError as e:
757
  if e.response.status_code == 429:
 
822
  data = resp.json()
823
  usage = data.get("usage", {})
824
  self.usage.record(model, usage.get("prompt_tokens", 0), usage.get("completion_tokens", 0))
825
+ self._track_daily_request()
826
  text = data["choices"][0]["message"]["content"]
827
  return _parse_json_response(text)
828
  except httpx.HTTPStatusError as e: