Mirrowel commited on
Commit
1ac7bd0
·
1 Parent(s): 65fe549

refactor(usage): 🔨 sync request_count across quota group models

Browse files

Refactor quota group handling to synchronize request_count across all models
in a quota group instead of recalculating it on each access. This improves
performance and ensures consistency.

The previous implementation iterated through all models in a group to sum
request counts and find baselines on each quota check. The new implementation
proactively syncs these values when requests are recorded.

Changes:
- Sync request_count to all models in quota group when recording successes,
failures, or updating quota baselines
- Quota estimation now reads values from any representative model since all
models in the group are guaranteed to have synchronized data
- Add credential filename parsing support for OAuth files without paths

Also in this commit:
- chore(gitignore): ignore quota_viewer_config.json

.gitignore CHANGED
@@ -124,9 +124,11 @@ start_proxy.bat
124
  key_usage.json
125
  staged_changes.txt
126
  launcher_config.json
 
127
  cache/antigravity/thought_signatures.json
128
  logs/
129
  cache/
130
  *.env
131
 
132
  oauth_creds/
 
 
124
  key_usage.json
125
  staged_changes.txt
126
  launcher_config.json
127
+ quota_viewer_config.json
128
  cache/antigravity/thought_signatures.json
129
  logs/
130
  cache/
131
  *.env
132
 
133
  oauth_creds/
134
+
src/rotator_library/providers/utilities/antigravity_quota_tracker.py CHANGED
@@ -651,41 +651,31 @@ class AntigravityQuotaTracker:
651
  cred_usage = usage_data[cred_path]
652
  models_usage = cred_usage.get("models", {})
653
 
654
- # Sum up request counts across all models in group
655
- total_requests = 0
656
- baseline_remaining = None
657
- baseline_fetched_at = None
658
- reset_time_iso = None
 
 
 
 
 
 
 
 
 
659
 
660
- for gm in group_models:
661
- # Try with and without provider prefix
662
- prefixed_model = f"antigravity/{gm}"
663
- model_usage = models_usage.get(
664
- prefixed_model
665
- ) or models_usage.get(gm, {})
666
-
667
- total_requests += model_usage.get("request_count", 0)
668
-
669
- # Use the first available baseline
670
- if baseline_remaining is None:
671
- baseline_remaining = model_usage.get(
672
- "baseline_remaining_fraction"
673
- )
674
- baseline_fetched_at = model_usage.get(
675
- "baseline_fetched_at"
676
- )
677
-
678
- # Use earliest reset time
679
- if model_usage.get("quota_reset_ts"):
680
- ts = model_usage["quota_reset_ts"]
681
- try:
682
- iso = datetime.fromtimestamp(
683
- ts, tz=timezone.utc
684
- ).isoformat()
685
- if reset_time_iso is None or iso < reset_time_iso:
686
- reset_time_iso = iso
687
- except (ValueError, OSError):
688
- pass
689
 
690
  # Calculate estimate
691
  # cost_per_request is in percentage (0.4 = 0.4%), convert to fraction
@@ -693,9 +683,11 @@ class AntigravityQuotaTracker:
693
  group_models[0], tier
694
  )
695
  cost_per_request_fraction = cost_per_request_percent / 100.0
696
- max_requests = self.get_max_requests_for_model(
697
- group_models[0], tier
698
- )
 
 
699
 
700
  if baseline_remaining is not None:
701
  estimated_remaining = baseline_remaining - (
 
651
  cred_usage = usage_data[cred_path]
652
  models_usage = cred_usage.get("models", {})
653
 
654
+ # Get request_count from representative model (synced across group)
655
+ # Try with and without provider prefix for first model in group
656
+ representative_model = group_models[0]
657
+ prefixed_model = f"antigravity/{representative_model}"
658
+ model_usage = models_usage.get(
659
+ prefixed_model
660
+ ) or models_usage.get(representative_model, {})
661
+
662
+ total_requests = model_usage.get("request_count", 0)
663
+ baseline_remaining = model_usage.get(
664
+ "baseline_remaining_fraction"
665
+ )
666
+ baseline_fetched_at = model_usage.get("baseline_fetched_at")
667
+ max_requests = model_usage.get("quota_max_requests")
668
 
669
+ # Get reset time from any model in group (also synced)
670
+ reset_time_iso = None
671
+ if model_usage.get("quota_reset_ts"):
672
+ ts = model_usage["quota_reset_ts"]
673
+ try:
674
+ reset_time_iso = datetime.fromtimestamp(
675
+ ts, tz=timezone.utc
676
+ ).isoformat()
677
+ except (ValueError, OSError):
678
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
679
 
680
  # Calculate estimate
681
  # cost_per_request is in percentage (0.4 = 0.4%), convert to fraction
 
683
  group_models[0], tier
684
  )
685
  cost_per_request_fraction = cost_per_request_percent / 100.0
686
+ # Use max_requests from usage data if available, otherwise calculate
687
+ if max_requests is None:
688
+ max_requests = self.get_max_requests_for_model(
689
+ group_models[0], tier
690
+ )
691
 
692
  if baseline_remaining is not None:
693
  estimated_remaining = baseline_remaining - (
src/rotator_library/usage_manager.py CHANGED
@@ -186,6 +186,7 @@ class UsageManager:
186
  Supports multiple credential formats:
187
  - OAuth: "oauth_creds/antigravity_oauth_15.json" -> "antigravity"
188
  - OAuth: "C:\\...\\oauth_creds\\gemini_cli_oauth_1.json" -> "gemini_cli"
 
189
  - API key style: stored with provider prefix metadata
190
 
191
  Args:
@@ -199,7 +200,7 @@ class UsageManager:
199
  # Normalize path separators
200
  normalized = credential.replace("\\", "/")
201
 
202
- # Pattern: {provider}_oauth_{number}.json
203
  match = re.search(r"/([a-z_]+)_oauth_\d+\.json$", normalized, re.IGNORECASE)
204
  if match:
205
  return match.group(1).lower()
@@ -209,6 +210,11 @@ class UsageManager:
209
  if match:
210
  return match.group(1).lower()
211
 
 
 
 
 
 
212
  return None
213
 
214
  def _get_provider_instance(self, provider: str) -> Optional[Any]:
@@ -337,22 +343,20 @@ class UsageManager:
337
  """
338
  Get usage count for credential selection, considering quota groups.
339
 
340
- If the model belongs to a quota group, returns the weighted combined usage
341
- across all models in the group. Otherwise returns individual model usage.
342
-
343
- Weights are applied per-model to account for models that consume more quota
344
- per request (e.g., Opus might count 2x compared to Sonnet).
345
-
346
  For providers in _REQUEST_COUNT_PROVIDERS (e.g., antigravity), uses
347
  request_count instead of success_count since failed requests also
348
  consume quota.
349
 
 
 
 
 
350
  Args:
351
  key: Credential identifier
352
  model: Model name (with provider prefix, e.g., "antigravity/claude-sonnet-4-5")
353
 
354
  Returns:
355
- Weighted combined usage if grouped, otherwise individual model usage
356
  """
357
  # Determine usage field based on provider
358
  # Some providers (antigravity) count failed requests against quota
@@ -363,7 +367,14 @@ class UsageManager:
363
  else "success_count"
364
  )
365
 
366
- # Check if model is in a quota group
 
 
 
 
 
 
 
367
  group = self._get_model_quota_group(key, model)
368
 
369
  if group:
@@ -1571,6 +1582,35 @@ class UsageManager:
1571
  model_data["success_count"] += 1
1572
  model_data["request_count"] = model_data.get("request_count", 0) + 1
1573
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1574
  # Update quota_display if max_requests is set (Antigravity-specific)
1575
  max_req = model_data.get("quota_max_requests")
1576
  if max_req:
@@ -1765,6 +1805,7 @@ class UsageManager:
1765
  # Track failure for quota estimation (request still consumes quota)
1766
  model_data["failure_count"] = model_data.get("failure_count", 0) + 1
1767
  model_data["request_count"] = model_data.get("request_count", 0) + 1
 
1768
 
1769
  # Apply to all models in the same quota group
1770
  group = self._get_model_quota_group(key, model)
@@ -1785,6 +1826,15 @@ class UsageManager:
1785
  },
1786
  )
1787
  group_model_data["quota_reset_ts"] = quota_reset_ts
 
 
 
 
 
 
 
 
 
1788
  # Also set transient cooldown for selection logic
1789
  model_cooldowns[grouped_model] = quota_reset_ts
1790
 
@@ -1887,6 +1937,35 @@ class UsageManager:
1887
  model_data["failure_count"] = model_data.get("failure_count", 0) + 1
1888
  model_data["request_count"] = model_data.get("request_count", 0) + 1
1889
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1890
  key_data["last_failure"] = {
1891
  "timestamp": now_ts,
1892
  "model": model,
@@ -1991,6 +2070,32 @@ class UsageManager:
1991
  model_data["quota_max_requests"] = max_requests
1992
  model_data["quota_display"] = f"{used_requests}/{max_requests}"
1993
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1994
  lib_logger.debug(
1995
  f"Updated quota baseline for {mask_credential(credential)} model={model}: "
1996
  f"remaining={remaining_fraction:.2%}, synced_request_count={used_requests}"
 
186
  Supports multiple credential formats:
187
  - OAuth: "oauth_creds/antigravity_oauth_15.json" -> "antigravity"
188
  - OAuth: "C:\\...\\oauth_creds\\gemini_cli_oauth_1.json" -> "gemini_cli"
189
+ - OAuth filename only: "antigravity_oauth_1.json" -> "antigravity"
190
  - API key style: stored with provider prefix metadata
191
 
192
  Args:
 
200
  # Normalize path separators
201
  normalized = credential.replace("\\", "/")
202
 
203
+ # Pattern: path ending with {provider}_oauth_{number}.json
204
  match = re.search(r"/([a-z_]+)_oauth_\d+\.json$", normalized, re.IGNORECASE)
205
  if match:
206
  return match.group(1).lower()
 
210
  if match:
211
  return match.group(1).lower()
212
 
213
+ # Pattern: filename only {provider}_oauth_{number}.json (no path)
214
+ match = re.match(r"([a-z_]+)_oauth_\d+\.json$", normalized, re.IGNORECASE)
215
+ if match:
216
+ return match.group(1).lower()
217
+
218
  return None
219
 
220
  def _get_provider_instance(self, provider: str) -> Optional[Any]:
 
343
  """
344
  Get usage count for credential selection, considering quota groups.
345
 
 
 
 
 
 
 
346
  For providers in _REQUEST_COUNT_PROVIDERS (e.g., antigravity), uses
347
  request_count instead of success_count since failed requests also
348
  consume quota.
349
 
350
+ If the model belongs to a quota group, the request_count is already
351
+ synced across all models in the group (by record_success/record_failure),
352
+ so we just read from the requested model directly.
353
+
354
  Args:
355
  key: Credential identifier
356
  model: Model name (with provider prefix, e.g., "antigravity/claude-sonnet-4-5")
357
 
358
  Returns:
359
+ Usage count for the model (synced across group if applicable)
360
  """
361
  # Determine usage field based on provider
362
  # Some providers (antigravity) count failed requests against quota
 
367
  else "success_count"
368
  )
369
 
370
+ # For providers with synced quota groups (antigravity), request_count
371
+ # is already synced across all models in the group, so just read directly.
372
+ # For other providers, we still need to sum success_count across group.
373
+ if provider in self._REQUEST_COUNT_PROVIDERS:
374
+ # request_count is synced - just read the model's value
375
+ return self._get_usage_count(key, model, usage_field)
376
+
377
+ # For non-synced providers, check if model is in a quota group and sum
378
  group = self._get_model_quota_group(key, model)
379
 
380
  if group:
 
1582
  model_data["success_count"] += 1
1583
  model_data["request_count"] = model_data.get("request_count", 0) + 1
1584
 
1585
+ # Sync request_count across quota group (for providers with shared quota pools)
1586
+ new_request_count = model_data["request_count"]
1587
+ group = self._get_model_quota_group(key, model)
1588
+ if group:
1589
+ grouped_models = self._get_grouped_models(key, group)
1590
+ for grouped_model in grouped_models:
1591
+ if grouped_model != model:
1592
+ other_model_data = key_data["models"].setdefault(
1593
+ grouped_model,
1594
+ {
1595
+ "window_start_ts": None,
1596
+ "quota_reset_ts": None,
1597
+ "success_count": 0,
1598
+ "failure_count": 0,
1599
+ "request_count": 0,
1600
+ "prompt_tokens": 0,
1601
+ "completion_tokens": 0,
1602
+ "approx_cost": 0.0,
1603
+ },
1604
+ )
1605
+ other_model_data["request_count"] = new_request_count
1606
+ # Also sync quota_max_requests if set
1607
+ max_req = model_data.get("quota_max_requests")
1608
+ if max_req:
1609
+ other_model_data["quota_max_requests"] = max_req
1610
+ other_model_data["quota_display"] = (
1611
+ f"{new_request_count}/{max_req}"
1612
+ )
1613
+
1614
  # Update quota_display if max_requests is set (Antigravity-specific)
1615
  max_req = model_data.get("quota_max_requests")
1616
  if max_req:
 
1805
  # Track failure for quota estimation (request still consumes quota)
1806
  model_data["failure_count"] = model_data.get("failure_count", 0) + 1
1807
  model_data["request_count"] = model_data.get("request_count", 0) + 1
1808
+ new_request_count = model_data["request_count"]
1809
 
1810
  # Apply to all models in the same quota group
1811
  group = self._get_model_quota_group(key, model)
 
1826
  },
1827
  )
1828
  group_model_data["quota_reset_ts"] = quota_reset_ts
1829
+ # Sync request_count across quota group
1830
+ group_model_data["request_count"] = new_request_count
1831
+ # Also sync quota_max_requests if set
1832
+ max_req = model_data.get("quota_max_requests")
1833
+ if max_req:
1834
+ group_model_data["quota_max_requests"] = max_req
1835
+ group_model_data["quota_display"] = (
1836
+ f"{new_request_count}/{max_req}"
1837
+ )
1838
  # Also set transient cooldown for selection logic
1839
  model_cooldowns[grouped_model] = quota_reset_ts
1840
 
 
1937
  model_data["failure_count"] = model_data.get("failure_count", 0) + 1
1938
  model_data["request_count"] = model_data.get("request_count", 0) + 1
1939
 
1940
+ # Sync request_count across quota group
1941
+ new_request_count = model_data["request_count"]
1942
+ group = self._get_model_quota_group(key, model)
1943
+ if group:
1944
+ grouped_models = self._get_grouped_models(key, group)
1945
+ for grouped_model in grouped_models:
1946
+ if grouped_model != model:
1947
+ other_model_data = models_data.setdefault(
1948
+ grouped_model,
1949
+ {
1950
+ "window_start_ts": None,
1951
+ "quota_reset_ts": None,
1952
+ "success_count": 0,
1953
+ "failure_count": 0,
1954
+ "request_count": 0,
1955
+ "prompt_tokens": 0,
1956
+ "completion_tokens": 0,
1957
+ "approx_cost": 0.0,
1958
+ },
1959
+ )
1960
+ other_model_data["request_count"] = new_request_count
1961
+ # Also sync quota_max_requests if set
1962
+ max_req = model_data.get("quota_max_requests")
1963
+ if max_req:
1964
+ other_model_data["quota_max_requests"] = max_req
1965
+ other_model_data["quota_display"] = (
1966
+ f"{new_request_count}/{max_req}"
1967
+ )
1968
+
1969
  key_data["last_failure"] = {
1970
  "timestamp": now_ts,
1971
  "model": model,
 
2070
  model_data["quota_max_requests"] = max_requests
2071
  model_data["quota_display"] = f"{used_requests}/{max_requests}"
2072
 
2073
+ # Sync request_count and quota_max_requests across quota group
2074
+ group = self._get_model_quota_group(credential, model)
2075
+ if group:
2076
+ grouped_models = self._get_grouped_models(credential, group)
2077
+ for grouped_model in grouped_models:
2078
+ if grouped_model != model:
2079
+ other_model_data = key_data["models"].setdefault(
2080
+ grouped_model,
2081
+ {
2082
+ "window_start_ts": None,
2083
+ "quota_reset_ts": None,
2084
+ "success_count": 0,
2085
+ "failure_count": 0,
2086
+ "request_count": 0,
2087
+ "prompt_tokens": 0,
2088
+ "completion_tokens": 0,
2089
+ "approx_cost": 0.0,
2090
+ },
2091
+ )
2092
+ other_model_data["request_count"] = used_requests
2093
+ if max_requests is not None:
2094
+ other_model_data["quota_max_requests"] = max_requests
2095
+ other_model_data["quota_display"] = (
2096
+ f"{used_requests}/{max_requests}"
2097
+ )
2098
+
2099
  lib_logger.debug(
2100
  f"Updated quota baseline for {mask_credential(credential)} model={model}: "
2101
  f"remaining={remaining_fraction:.2%}, synced_request_count={used_requests}"