Spaces:
Paused
refactor(usage): 🔨 sync request_count across quota group models
Browse filesRefactor quota group handling to synchronize request_count across all models
in a quota group instead of recalculating it on each access. This improves
performance and ensures consistency.
The previous implementation iterated through all models in a group to sum
request counts and find baselines on each quota check. The new implementation
proactively syncs these values when requests are recorded.
Changes:
- Sync request_count to all models in quota group when recording successes,
failures, or updating quota baselines
- Quota estimation now reads values from any representative model since all
models in the group are guaranteed to have synchronized data
- Add credential filename parsing support for OAuth files without paths
Also in this commit:
- chore(gitignore): ignore quota_viewer_config.json
|
@@ -124,9 +124,11 @@ start_proxy.bat
|
|
| 124 |
key_usage.json
|
| 125 |
staged_changes.txt
|
| 126 |
launcher_config.json
|
|
|
|
| 127 |
cache/antigravity/thought_signatures.json
|
| 128 |
logs/
|
| 129 |
cache/
|
| 130 |
*.env
|
| 131 |
|
| 132 |
oauth_creds/
|
|
|
|
|
|
| 124 |
key_usage.json
|
| 125 |
staged_changes.txt
|
| 126 |
launcher_config.json
|
| 127 |
+
quota_viewer_config.json
|
| 128 |
cache/antigravity/thought_signatures.json
|
| 129 |
logs/
|
| 130 |
cache/
|
| 131 |
*.env
|
| 132 |
|
| 133 |
oauth_creds/
|
| 134 |
+
|
|
@@ -651,41 +651,31 @@ class AntigravityQuotaTracker:
|
|
| 651 |
cred_usage = usage_data[cred_path]
|
| 652 |
models_usage = cred_usage.get("models", {})
|
| 653 |
|
| 654 |
-
#
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 659 |
|
| 660 |
-
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
|
| 664 |
-
|
| 665 |
-
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
if baseline_remaining is None:
|
| 671 |
-
baseline_remaining = model_usage.get(
|
| 672 |
-
"baseline_remaining_fraction"
|
| 673 |
-
)
|
| 674 |
-
baseline_fetched_at = model_usage.get(
|
| 675 |
-
"baseline_fetched_at"
|
| 676 |
-
)
|
| 677 |
-
|
| 678 |
-
# Use earliest reset time
|
| 679 |
-
if model_usage.get("quota_reset_ts"):
|
| 680 |
-
ts = model_usage["quota_reset_ts"]
|
| 681 |
-
try:
|
| 682 |
-
iso = datetime.fromtimestamp(
|
| 683 |
-
ts, tz=timezone.utc
|
| 684 |
-
).isoformat()
|
| 685 |
-
if reset_time_iso is None or iso < reset_time_iso:
|
| 686 |
-
reset_time_iso = iso
|
| 687 |
-
except (ValueError, OSError):
|
| 688 |
-
pass
|
| 689 |
|
| 690 |
# Calculate estimate
|
| 691 |
# cost_per_request is in percentage (0.4 = 0.4%), convert to fraction
|
|
@@ -693,9 +683,11 @@ class AntigravityQuotaTracker:
|
|
| 693 |
group_models[0], tier
|
| 694 |
)
|
| 695 |
cost_per_request_fraction = cost_per_request_percent / 100.0
|
| 696 |
-
max_requests
|
| 697 |
-
|
| 698 |
-
|
|
|
|
|
|
|
| 699 |
|
| 700 |
if baseline_remaining is not None:
|
| 701 |
estimated_remaining = baseline_remaining - (
|
|
|
|
| 651 |
cred_usage = usage_data[cred_path]
|
| 652 |
models_usage = cred_usage.get("models", {})
|
| 653 |
|
| 654 |
+
# Get request_count from representative model (synced across group)
|
| 655 |
+
# Try with and without provider prefix for first model in group
|
| 656 |
+
representative_model = group_models[0]
|
| 657 |
+
prefixed_model = f"antigravity/{representative_model}"
|
| 658 |
+
model_usage = models_usage.get(
|
| 659 |
+
prefixed_model
|
| 660 |
+
) or models_usage.get(representative_model, {})
|
| 661 |
+
|
| 662 |
+
total_requests = model_usage.get("request_count", 0)
|
| 663 |
+
baseline_remaining = model_usage.get(
|
| 664 |
+
"baseline_remaining_fraction"
|
| 665 |
+
)
|
| 666 |
+
baseline_fetched_at = model_usage.get("baseline_fetched_at")
|
| 667 |
+
max_requests = model_usage.get("quota_max_requests")
|
| 668 |
|
| 669 |
+
# Get reset time from any model in group (also synced)
|
| 670 |
+
reset_time_iso = None
|
| 671 |
+
if model_usage.get("quota_reset_ts"):
|
| 672 |
+
ts = model_usage["quota_reset_ts"]
|
| 673 |
+
try:
|
| 674 |
+
reset_time_iso = datetime.fromtimestamp(
|
| 675 |
+
ts, tz=timezone.utc
|
| 676 |
+
).isoformat()
|
| 677 |
+
except (ValueError, OSError):
|
| 678 |
+
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 679 |
|
| 680 |
# Calculate estimate
|
| 681 |
# cost_per_request is in percentage (0.4 = 0.4%), convert to fraction
|
|
|
|
| 683 |
group_models[0], tier
|
| 684 |
)
|
| 685 |
cost_per_request_fraction = cost_per_request_percent / 100.0
|
| 686 |
+
# Use max_requests from usage data if available, otherwise calculate
|
| 687 |
+
if max_requests is None:
|
| 688 |
+
max_requests = self.get_max_requests_for_model(
|
| 689 |
+
group_models[0], tier
|
| 690 |
+
)
|
| 691 |
|
| 692 |
if baseline_remaining is not None:
|
| 693 |
estimated_remaining = baseline_remaining - (
|
|
@@ -186,6 +186,7 @@ class UsageManager:
|
|
| 186 |
Supports multiple credential formats:
|
| 187 |
- OAuth: "oauth_creds/antigravity_oauth_15.json" -> "antigravity"
|
| 188 |
- OAuth: "C:\\...\\oauth_creds\\gemini_cli_oauth_1.json" -> "gemini_cli"
|
|
|
|
| 189 |
- API key style: stored with provider prefix metadata
|
| 190 |
|
| 191 |
Args:
|
|
@@ -199,7 +200,7 @@ class UsageManager:
|
|
| 199 |
# Normalize path separators
|
| 200 |
normalized = credential.replace("\\", "/")
|
| 201 |
|
| 202 |
-
# Pattern: {provider}_oauth_{number}.json
|
| 203 |
match = re.search(r"/([a-z_]+)_oauth_\d+\.json$", normalized, re.IGNORECASE)
|
| 204 |
if match:
|
| 205 |
return match.group(1).lower()
|
|
@@ -209,6 +210,11 @@ class UsageManager:
|
|
| 209 |
if match:
|
| 210 |
return match.group(1).lower()
|
| 211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
return None
|
| 213 |
|
| 214 |
def _get_provider_instance(self, provider: str) -> Optional[Any]:
|
|
@@ -337,22 +343,20 @@ class UsageManager:
|
|
| 337 |
"""
|
| 338 |
Get usage count for credential selection, considering quota groups.
|
| 339 |
|
| 340 |
-
If the model belongs to a quota group, returns the weighted combined usage
|
| 341 |
-
across all models in the group. Otherwise returns individual model usage.
|
| 342 |
-
|
| 343 |
-
Weights are applied per-model to account for models that consume more quota
|
| 344 |
-
per request (e.g., Opus might count 2x compared to Sonnet).
|
| 345 |
-
|
| 346 |
For providers in _REQUEST_COUNT_PROVIDERS (e.g., antigravity), uses
|
| 347 |
request_count instead of success_count since failed requests also
|
| 348 |
consume quota.
|
| 349 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
Args:
|
| 351 |
key: Credential identifier
|
| 352 |
model: Model name (with provider prefix, e.g., "antigravity/claude-sonnet-4-5")
|
| 353 |
|
| 354 |
Returns:
|
| 355 |
-
|
| 356 |
"""
|
| 357 |
# Determine usage field based on provider
|
| 358 |
# Some providers (antigravity) count failed requests against quota
|
|
@@ -363,7 +367,14 @@ class UsageManager:
|
|
| 363 |
else "success_count"
|
| 364 |
)
|
| 365 |
|
| 366 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
group = self._get_model_quota_group(key, model)
|
| 368 |
|
| 369 |
if group:
|
|
@@ -1571,6 +1582,35 @@ class UsageManager:
|
|
| 1571 |
model_data["success_count"] += 1
|
| 1572 |
model_data["request_count"] = model_data.get("request_count", 0) + 1
|
| 1573 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1574 |
# Update quota_display if max_requests is set (Antigravity-specific)
|
| 1575 |
max_req = model_data.get("quota_max_requests")
|
| 1576 |
if max_req:
|
|
@@ -1765,6 +1805,7 @@ class UsageManager:
|
|
| 1765 |
# Track failure for quota estimation (request still consumes quota)
|
| 1766 |
model_data["failure_count"] = model_data.get("failure_count", 0) + 1
|
| 1767 |
model_data["request_count"] = model_data.get("request_count", 0) + 1
|
|
|
|
| 1768 |
|
| 1769 |
# Apply to all models in the same quota group
|
| 1770 |
group = self._get_model_quota_group(key, model)
|
|
@@ -1785,6 +1826,15 @@ class UsageManager:
|
|
| 1785 |
},
|
| 1786 |
)
|
| 1787 |
group_model_data["quota_reset_ts"] = quota_reset_ts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1788 |
# Also set transient cooldown for selection logic
|
| 1789 |
model_cooldowns[grouped_model] = quota_reset_ts
|
| 1790 |
|
|
@@ -1887,6 +1937,35 @@ class UsageManager:
|
|
| 1887 |
model_data["failure_count"] = model_data.get("failure_count", 0) + 1
|
| 1888 |
model_data["request_count"] = model_data.get("request_count", 0) + 1
|
| 1889 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1890 |
key_data["last_failure"] = {
|
| 1891 |
"timestamp": now_ts,
|
| 1892 |
"model": model,
|
|
@@ -1991,6 +2070,32 @@ class UsageManager:
|
|
| 1991 |
model_data["quota_max_requests"] = max_requests
|
| 1992 |
model_data["quota_display"] = f"{used_requests}/{max_requests}"
|
| 1993 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1994 |
lib_logger.debug(
|
| 1995 |
f"Updated quota baseline for {mask_credential(credential)} model={model}: "
|
| 1996 |
f"remaining={remaining_fraction:.2%}, synced_request_count={used_requests}"
|
|
|
|
| 186 |
Supports multiple credential formats:
|
| 187 |
- OAuth: "oauth_creds/antigravity_oauth_15.json" -> "antigravity"
|
| 188 |
- OAuth: "C:\\...\\oauth_creds\\gemini_cli_oauth_1.json" -> "gemini_cli"
|
| 189 |
+
- OAuth filename only: "antigravity_oauth_1.json" -> "antigravity"
|
| 190 |
- API key style: stored with provider prefix metadata
|
| 191 |
|
| 192 |
Args:
|
|
|
|
| 200 |
# Normalize path separators
|
| 201 |
normalized = credential.replace("\\", "/")
|
| 202 |
|
| 203 |
+
# Pattern: path ending with {provider}_oauth_{number}.json
|
| 204 |
match = re.search(r"/([a-z_]+)_oauth_\d+\.json$", normalized, re.IGNORECASE)
|
| 205 |
if match:
|
| 206 |
return match.group(1).lower()
|
|
|
|
| 210 |
if match:
|
| 211 |
return match.group(1).lower()
|
| 212 |
|
| 213 |
+
# Pattern: filename only {provider}_oauth_{number}.json (no path)
|
| 214 |
+
match = re.match(r"([a-z_]+)_oauth_\d+\.json$", normalized, re.IGNORECASE)
|
| 215 |
+
if match:
|
| 216 |
+
return match.group(1).lower()
|
| 217 |
+
|
| 218 |
return None
|
| 219 |
|
| 220 |
def _get_provider_instance(self, provider: str) -> Optional[Any]:
|
|
|
|
| 343 |
"""
|
| 344 |
Get usage count for credential selection, considering quota groups.
|
| 345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
For providers in _REQUEST_COUNT_PROVIDERS (e.g., antigravity), uses
|
| 347 |
request_count instead of success_count since failed requests also
|
| 348 |
consume quota.
|
| 349 |
|
| 350 |
+
If the model belongs to a quota group, the request_count is already
|
| 351 |
+
synced across all models in the group (by record_success/record_failure),
|
| 352 |
+
so we just read from the requested model directly.
|
| 353 |
+
|
| 354 |
Args:
|
| 355 |
key: Credential identifier
|
| 356 |
model: Model name (with provider prefix, e.g., "antigravity/claude-sonnet-4-5")
|
| 357 |
|
| 358 |
Returns:
|
| 359 |
+
Usage count for the model (synced across group if applicable)
|
| 360 |
"""
|
| 361 |
# Determine usage field based on provider
|
| 362 |
# Some providers (antigravity) count failed requests against quota
|
|
|
|
| 367 |
else "success_count"
|
| 368 |
)
|
| 369 |
|
| 370 |
+
# For providers with synced quota groups (antigravity), request_count
|
| 371 |
+
# is already synced across all models in the group, so just read directly.
|
| 372 |
+
# For other providers, we still need to sum success_count across group.
|
| 373 |
+
if provider in self._REQUEST_COUNT_PROVIDERS:
|
| 374 |
+
# request_count is synced - just read the model's value
|
| 375 |
+
return self._get_usage_count(key, model, usage_field)
|
| 376 |
+
|
| 377 |
+
# For non-synced providers, check if model is in a quota group and sum
|
| 378 |
group = self._get_model_quota_group(key, model)
|
| 379 |
|
| 380 |
if group:
|
|
|
|
| 1582 |
model_data["success_count"] += 1
|
| 1583 |
model_data["request_count"] = model_data.get("request_count", 0) + 1
|
| 1584 |
|
| 1585 |
+
# Sync request_count across quota group (for providers with shared quota pools)
|
| 1586 |
+
new_request_count = model_data["request_count"]
|
| 1587 |
+
group = self._get_model_quota_group(key, model)
|
| 1588 |
+
if group:
|
| 1589 |
+
grouped_models = self._get_grouped_models(key, group)
|
| 1590 |
+
for grouped_model in grouped_models:
|
| 1591 |
+
if grouped_model != model:
|
| 1592 |
+
other_model_data = key_data["models"].setdefault(
|
| 1593 |
+
grouped_model,
|
| 1594 |
+
{
|
| 1595 |
+
"window_start_ts": None,
|
| 1596 |
+
"quota_reset_ts": None,
|
| 1597 |
+
"success_count": 0,
|
| 1598 |
+
"failure_count": 0,
|
| 1599 |
+
"request_count": 0,
|
| 1600 |
+
"prompt_tokens": 0,
|
| 1601 |
+
"completion_tokens": 0,
|
| 1602 |
+
"approx_cost": 0.0,
|
| 1603 |
+
},
|
| 1604 |
+
)
|
| 1605 |
+
other_model_data["request_count"] = new_request_count
|
| 1606 |
+
# Also sync quota_max_requests if set
|
| 1607 |
+
max_req = model_data.get("quota_max_requests")
|
| 1608 |
+
if max_req:
|
| 1609 |
+
other_model_data["quota_max_requests"] = max_req
|
| 1610 |
+
other_model_data["quota_display"] = (
|
| 1611 |
+
f"{new_request_count}/{max_req}"
|
| 1612 |
+
)
|
| 1613 |
+
|
| 1614 |
# Update quota_display if max_requests is set (Antigravity-specific)
|
| 1615 |
max_req = model_data.get("quota_max_requests")
|
| 1616 |
if max_req:
|
|
|
|
| 1805 |
# Track failure for quota estimation (request still consumes quota)
|
| 1806 |
model_data["failure_count"] = model_data.get("failure_count", 0) + 1
|
| 1807 |
model_data["request_count"] = model_data.get("request_count", 0) + 1
|
| 1808 |
+
new_request_count = model_data["request_count"]
|
| 1809 |
|
| 1810 |
# Apply to all models in the same quota group
|
| 1811 |
group = self._get_model_quota_group(key, model)
|
|
|
|
| 1826 |
},
|
| 1827 |
)
|
| 1828 |
group_model_data["quota_reset_ts"] = quota_reset_ts
|
| 1829 |
+
# Sync request_count across quota group
|
| 1830 |
+
group_model_data["request_count"] = new_request_count
|
| 1831 |
+
# Also sync quota_max_requests if set
|
| 1832 |
+
max_req = model_data.get("quota_max_requests")
|
| 1833 |
+
if max_req:
|
| 1834 |
+
group_model_data["quota_max_requests"] = max_req
|
| 1835 |
+
group_model_data["quota_display"] = (
|
| 1836 |
+
f"{new_request_count}/{max_req}"
|
| 1837 |
+
)
|
| 1838 |
# Also set transient cooldown for selection logic
|
| 1839 |
model_cooldowns[grouped_model] = quota_reset_ts
|
| 1840 |
|
|
|
|
| 1937 |
model_data["failure_count"] = model_data.get("failure_count", 0) + 1
|
| 1938 |
model_data["request_count"] = model_data.get("request_count", 0) + 1
|
| 1939 |
|
| 1940 |
+
# Sync request_count across quota group
|
| 1941 |
+
new_request_count = model_data["request_count"]
|
| 1942 |
+
group = self._get_model_quota_group(key, model)
|
| 1943 |
+
if group:
|
| 1944 |
+
grouped_models = self._get_grouped_models(key, group)
|
| 1945 |
+
for grouped_model in grouped_models:
|
| 1946 |
+
if grouped_model != model:
|
| 1947 |
+
other_model_data = models_data.setdefault(
|
| 1948 |
+
grouped_model,
|
| 1949 |
+
{
|
| 1950 |
+
"window_start_ts": None,
|
| 1951 |
+
"quota_reset_ts": None,
|
| 1952 |
+
"success_count": 0,
|
| 1953 |
+
"failure_count": 0,
|
| 1954 |
+
"request_count": 0,
|
| 1955 |
+
"prompt_tokens": 0,
|
| 1956 |
+
"completion_tokens": 0,
|
| 1957 |
+
"approx_cost": 0.0,
|
| 1958 |
+
},
|
| 1959 |
+
)
|
| 1960 |
+
other_model_data["request_count"] = new_request_count
|
| 1961 |
+
# Also sync quota_max_requests if set
|
| 1962 |
+
max_req = model_data.get("quota_max_requests")
|
| 1963 |
+
if max_req:
|
| 1964 |
+
other_model_data["quota_max_requests"] = max_req
|
| 1965 |
+
other_model_data["quota_display"] = (
|
| 1966 |
+
f"{new_request_count}/{max_req}"
|
| 1967 |
+
)
|
| 1968 |
+
|
| 1969 |
key_data["last_failure"] = {
|
| 1970 |
"timestamp": now_ts,
|
| 1971 |
"model": model,
|
|
|
|
| 2070 |
model_data["quota_max_requests"] = max_requests
|
| 2071 |
model_data["quota_display"] = f"{used_requests}/{max_requests}"
|
| 2072 |
|
| 2073 |
+
# Sync request_count and quota_max_requests across quota group
|
| 2074 |
+
group = self._get_model_quota_group(credential, model)
|
| 2075 |
+
if group:
|
| 2076 |
+
grouped_models = self._get_grouped_models(credential, group)
|
| 2077 |
+
for grouped_model in grouped_models:
|
| 2078 |
+
if grouped_model != model:
|
| 2079 |
+
other_model_data = key_data["models"].setdefault(
|
| 2080 |
+
grouped_model,
|
| 2081 |
+
{
|
| 2082 |
+
"window_start_ts": None,
|
| 2083 |
+
"quota_reset_ts": None,
|
| 2084 |
+
"success_count": 0,
|
| 2085 |
+
"failure_count": 0,
|
| 2086 |
+
"request_count": 0,
|
| 2087 |
+
"prompt_tokens": 0,
|
| 2088 |
+
"completion_tokens": 0,
|
| 2089 |
+
"approx_cost": 0.0,
|
| 2090 |
+
},
|
| 2091 |
+
)
|
| 2092 |
+
other_model_data["request_count"] = used_requests
|
| 2093 |
+
if max_requests is not None:
|
| 2094 |
+
other_model_data["quota_max_requests"] = max_requests
|
| 2095 |
+
other_model_data["quota_display"] = (
|
| 2096 |
+
f"{used_requests}/{max_requests}"
|
| 2097 |
+
)
|
| 2098 |
+
|
| 2099 |
lib_logger.debug(
|
| 2100 |
f"Updated quota baseline for {mask_credential(credential)} model={model}: "
|
| 2101 |
f"remaining={remaining_fraction:.2%}, synced_request_count={used_requests}"
|