Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Dec 16, 2025

Commit

1af1879

1 Parent(s): 1ac7bd0

refactor(quota-viewer): 🔨 enhance credential sorting and cooldown display

Add natural/numeric sorting for credentials to ensure proper ordering
(e.g., proj-1, proj-2, proj-10 instead of proj-1, proj-10, proj-2).

Improve cooldown display in quota viewer by grouping cooldowns by quota
groups when available, providing clearer visibility into rate limiting
status for grouped models.

Also in this commit:
- refactor(client): improve model stats lookup with alias support
- feat(usage-manager): add quota display formatting for logging

Files changed (3) hide show

src/proxy_app/quota_viewer.py +82 -13
src/rotator_library/client.py +98 -63
src/rotator_library/usage_manager.py +51 -4

src/proxy_app/quota_viewer.py CHANGED Viewed

@@ -6,6 +6,7 @@ Uses only httpx + rich (no heavy rotator_library imports).
 """
 import os
 import sys
 import time
 from datetime import datetime, timezone
@@ -128,6 +129,19 @@ def format_cooldown(seconds: int) -> str:
         return f"{hours}h {mins}m" if mins > 0 else f"{hours}h"
 class QuotaViewer:
     """Main Quota Viewer TUI class."""
@@ -548,6 +562,9 @@ class QuotaViewer:
                 prov_stats = self.cached_stats.get("providers", {}).get(provider, {})
                 credentials = prov_stats.get("credentials", [])
                 if not credentials:
                     self.console.print(
                         "[dim]No credentials configured for this provider.[/dim]"
@@ -584,6 +601,8 @@ class QuotaViewer:
                     if self.cached_stats
                     else []
                 )
                 for idx, cred in enumerate(credentials, 1):
                     identifier = cred.get("identifier", f"credential {idx}")
                     email = cred.get("email", identifier)
@@ -640,6 +659,8 @@ class QuotaViewer:
                     if self.cached_stats
                     else []
                 )
                 if 1 <= idx <= len(credentials):
                     cred = credentials[idx - 1]
                     cred_id = cred.get("identifier", "")
@@ -717,21 +738,69 @@ class QuotaViewer:
             f"[dim]{stats_line}[/dim]",
         ]
-        # Show model cooldowns if any
-        if model_cooldowns:
-            content_lines.append("")
-            content_lines.append("[yellow]Active Cooldowns:[/yellow]")
-            for model_name, cooldown_info in model_cooldowns.items():
-                remaining = cooldown_info.get("remaining_seconds", 0)
-                if remaining > 0:
-                    # Shorten model name for display
-                    short_model = model_name.split("/")[-1][:35]
-                    content_lines.append(
-                        f"  [yellow]⏱️ {short_model}: {format_cooldown(int(remaining))}[/yellow]"
-                    )
         # Model groups (for providers with quota tracking)
         model_groups = cred.get("model_groups", {})
         if model_groups:
             content_lines.append("")
             for group_name, group_stats in model_groups.items():

 """
 import os
+import re
 import sys
 import time
 from datetime import datetime, timezone
         return f"{hours}h {mins}m" if mins > 0 else f"{hours}h"
+def natural_sort_key(item: Dict[str, Any]) -> List:
+    """
+    Generate a sort key for natural/numeric sorting.
+    Sorts credentials like proj-1, proj-2, proj-10 correctly
+    instead of alphabetically (proj-1, proj-10, proj-2).
+    """
+    identifier = item.get("identifier", "")
+    # Split into text and numeric parts
+    parts = re.split(r"(\d+)", identifier)
+    return [int(p) if p.isdigit() else p.lower() for p in parts]
 class QuotaViewer:
     """Main Quota Viewer TUI class."""
                 prov_stats = self.cached_stats.get("providers", {}).get(provider, {})
                 credentials = prov_stats.get("credentials", [])
+                # Sort credentials naturally (1, 2, 10 not 1, 10, 2)
+                credentials = sorted(credentials, key=natural_sort_key)
                 if not credentials:
                     self.console.print(
                         "[dim]No credentials configured for this provider.[/dim]"
                     if self.cached_stats
                     else []
                 )
+                # Sort credentials naturally
+                credentials = sorted(credentials, key=natural_sort_key)
                 for idx, cred in enumerate(credentials, 1):
                     identifier = cred.get("identifier", f"credential {idx}")
                     email = cred.get("email", identifier)
                     if self.cached_stats
                     else []
                 )
+                # Sort credentials naturally to match display order
+                credentials = sorted(credentials, key=natural_sort_key)
                 if 1 <= idx <= len(credentials):
                     cred = credentials[idx - 1]
                     cred_id = cred.get("identifier", "")
             f"[dim]{stats_line}[/dim]",
         ]
         # Model groups (for providers with quota tracking)
         model_groups = cred.get("model_groups", {})
+        # Show cooldowns grouped by quota group (if model_groups exist)
+        if model_cooldowns:
+            if model_groups:
+                # Group cooldowns by quota group
+                group_cooldowns: Dict[
+                    str, int
+                ] = {}  # group_name -> max_remaining_seconds
+                ungrouped_cooldowns: List[Tuple[str, int]] = []
+                for model_name, cooldown_info in model_cooldowns.items():
+                    remaining = cooldown_info.get("remaining_seconds", 0)
+                    if remaining <= 0:
+                        continue
+                    # Find which group this model belongs to
+                    clean_model = model_name.split("/")[-1]
+                    found_group = None
+                    for group_name, group_info in model_groups.items():
+                        group_models = group_info.get("models", [])
+                        if clean_model in group_models:
+                            found_group = group_name
+                            break
+                    if found_group:
+                        group_cooldowns[found_group] = max(
+                            group_cooldowns.get(found_group, 0), remaining
+                        )
+                    else:
+                        ungrouped_cooldowns.append((model_name, remaining))
+                if group_cooldowns or ungrouped_cooldowns:
+                    content_lines.append("")
+                    content_lines.append("[yellow]Active Cooldowns:[/yellow]")
+                    # Show grouped cooldowns
+                    for group_name in sorted(group_cooldowns.keys()):
+                        remaining = group_cooldowns[group_name]
+                        content_lines.append(
+                            f"  [yellow]⏱️ {group_name}: {format_cooldown(remaining)}[/yellow]"
+                        )
+                    # Show ungrouped (shouldn't happen often)
+                    for model_name, remaining in ungrouped_cooldowns:
+                        short_model = model_name.split("/")[-1][:35]
+                        content_lines.append(
+                            f"  [yellow]⏱️ {short_model}: {format_cooldown(remaining)}[/yellow]"
+                        )
+            else:
+                # No model groups - show per-model cooldowns
+                content_lines.append("")
+                content_lines.append("[yellow]Active Cooldowns:[/yellow]")
+                for model_name, cooldown_info in model_cooldowns.items():
+                    remaining = cooldown_info.get("remaining_seconds", 0)
+                    if remaining > 0:
+                        short_model = model_name.split("/")[-1][:35]
+                        content_lines.append(
+                            f"  [yellow]⏱️ {short_model}: {format_cooldown(int(remaining))}[/yellow]"
+                        )
+        # Display model groups with quota info
         if model_groups:
             content_lines.append("")
             for group_name, group_stats in model_groups.items():

src/rotator_library/client.py CHANGED Viewed

@@ -2664,27 +2664,25 @@ class RotatingClient:
                         models_data = cred.get("models", {})
                         group_stats["credentials_total"] += 1
-                        # Find any model from this group
                         for model in group_models:
-                            # Try with and without provider prefix
-                            prefixed_model = f"{provider}/{model}"
-                            model_stats = models_data.get(
-                                prefixed_model
-                            ) or models_data.get(model)
                             if model_stats:
-                                baseline = model_stats.get(
-                                    "baseline_remaining_fraction"
-                                )
-                                if baseline is not None:
-                                    remaining_pct = int(baseline * 100)
-                                    group_stats["total_remaining_pcts"].append(
-                                        remaining_pct
-                                    )
-                                    if baseline <= 0:
-                                        group_stats["credentials_exhausted"] += 1
                                 break
                     # Calculate average remaining percentage
                     if group_stats["total_remaining_pcts"]:
                         group_stats["avg_remaining_pct"] = int(
@@ -2701,56 +2699,53 @@ class RotatingClient:
                     models_data = cred.get("models", {})
                     for group_name, group_models in quota_groups.items():
-                        # Find representative model from this group
                         for model in group_models:
-                            prefixed_model = f"{provider}/{model}"
-                            model_stats = models_data.get(
-                                prefixed_model
-                            ) or models_data.get(model)
                             if model_stats:
-                                baseline = model_stats.get(
-                                    "baseline_remaining_fraction"
-                                )
-                                max_req = model_stats.get("quota_max_requests")
-                                req_count = model_stats.get("request_count", 0)
-                                reset_ts = model_stats.get("quota_reset_ts")
-                                remaining_pct = (
-                                    int(baseline * 100)
-                                    if baseline is not None
-                                    else None
-                                )
-                                is_exhausted = baseline is not None and baseline <= 0
-                                # Format reset time
-                                reset_iso = None
-                                if reset_ts:
-                                    try:
-                                        from datetime import datetime, timezone
-                                        reset_iso = datetime.fromtimestamp(
-                                            reset_ts, tz=timezone.utc
-                                        ).isoformat()
-                                    except (ValueError, OSError):
-                                        pass
-                                cred["model_groups"][group_name] = {
-                                    "remaining_pct": remaining_pct,
-                                    "requests_used": req_count,
-                                    "requests_max": max_req,
-                                    "display": f"{req_count}/{max_req}"
-                                    if max_req
-                                    else f"{req_count}/?",
-                                    "is_exhausted": is_exhausted,
-                                    "reset_time_iso": reset_iso,
-                                    "models": group_models,
-                                    "confidence": self._get_baseline_confidence(
-                                        model_stats
-                                    ),
-                                }
                                 break
                     # Try to get email from provider's cache
                     cred_path = cred.get("full_path", "")
                     if hasattr(provider_instance, "project_tier_cache"):
@@ -2760,6 +2755,46 @@ class RotatingClient:
         return stats
     def _get_baseline_confidence(self, model_stats: Dict) -> str:
         """
         Determine confidence level based on baseline age.

                         models_data = cred.get("models", {})
                         group_stats["credentials_total"] += 1
+                        # Find any model from this group (try all with alias fallback)
+                        model_stats = None
                         for model in group_models:
+                            model_stats = self._find_model_stats_in_data(
+                                models_data, model, provider, provider_instance
+                            )
                             if model_stats:
                                 break
+                        if model_stats:
+                            baseline = model_stats.get("baseline_remaining_fraction")
+                            if baseline is not None:
+                                remaining_pct = int(baseline * 100)
+                                group_stats["total_remaining_pcts"].append(
+                                    remaining_pct
+                                )
+                                if baseline <= 0:
+                                    group_stats["credentials_exhausted"] += 1
                     # Calculate average remaining percentage
                     if group_stats["total_remaining_pcts"]:
                         group_stats["avg_remaining_pct"] = int(
                     models_data = cred.get("models", {})
                     for group_name, group_models in quota_groups.items():
+                        # Find representative model from this group (try all with alias fallback)
+                        model_stats = None
                         for model in group_models:
+                            model_stats = self._find_model_stats_in_data(
+                                models_data, model, provider, provider_instance
+                            )
                             if model_stats:
                                 break
+                        if model_stats:
+                            baseline = model_stats.get("baseline_remaining_fraction")
+                            max_req = model_stats.get("quota_max_requests")
+                            req_count = model_stats.get("request_count", 0)
+                            reset_ts = model_stats.get("quota_reset_ts")
+                            remaining_pct = (
+                                int(baseline * 100) if baseline is not None else None
+                            )
+                            is_exhausted = baseline is not None and baseline <= 0
+                            # Format reset time
+                            reset_iso = None
+                            if reset_ts:
+                                try:
+                                    from datetime import datetime, timezone
+                                    reset_iso = datetime.fromtimestamp(
+                                        reset_ts, tz=timezone.utc
+                                    ).isoformat()
+                                except (ValueError, OSError):
+                                    pass
+                            cred["model_groups"][group_name] = {
+                                "remaining_pct": remaining_pct,
+                                "requests_used": req_count,
+                                "requests_max": max_req,
+                                "display": f"{req_count}/{max_req}"
+                                if max_req
+                                else f"{req_count}/?",
+                                "is_exhausted": is_exhausted,
+                                "reset_time_iso": reset_iso,
+                                "models": group_models,
+                                "confidence": self._get_baseline_confidence(
+                                    model_stats
+                                ),
+                            }
                     # Try to get email from provider's cache
                     cred_path = cred.get("full_path", "")
                     if hasattr(provider_instance, "project_tier_cache"):
         return stats
+    def _find_model_stats_in_data(
+        self,
+        models_data: Dict[str, Any],
+        model: str,
+        provider: str,
+        provider_instance: Any,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Find model stats in models_data, trying various name variants.
+        Handles aliased model names (e.g., gemini-3-pro-preview -> gemini-3-pro-high)
+        by using the provider's _user_to_api_model() mapping.
+        Args:
+            models_data: Dict of model_name -> stats from credential
+            model: Model name to look up (user-facing name)
+            provider: Provider name for prefixing
+            provider_instance: Provider instance for alias methods
+        Returns:
+            Model stats dict if found, None otherwise
+        """
+        # Try direct match with and without provider prefix
+        prefixed_model = f"{provider}/{model}"
+        model_stats = models_data.get(prefixed_model) or models_data.get(model)
+        if model_stats:
+            return model_stats
+        # Try with API model name (e.g., gemini-3-pro-preview -> gemini-3-pro-high)
+        if hasattr(provider_instance, "_user_to_api_model"):
+            api_model = provider_instance._user_to_api_model(model)
+            if api_model != model:
+                prefixed_api = f"{provider}/{api_model}"
+                model_stats = models_data.get(prefixed_api) or models_data.get(
+                    api_model
+                )
+        return model_stats
     def _get_baseline_confidence(self, model_stats: Dict) -> str:
         """
         Determine confidence level based on baseline age.

src/rotator_library/usage_manager.py CHANGED Viewed

@@ -392,6 +392,49 @@ class UsageManager:
         # Not grouped - return individual model usage (no weight applied)
         return self._get_usage_count(key, model, usage_field)
     def _get_usage_field_name(self, credential: str) -> str:
         """
         Get the usage tracking field name for a credential.
@@ -1285,9 +1328,10 @@ class UsageManager:
                                     if credential_tier_names
                                     else "unknown"
                                 )
                                 lib_logger.info(
                                     f"Acquired key {mask_credential(key)} for model {model} "
-                                    f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, usage: {usage})"
                                 )
                                 return key
@@ -1303,9 +1347,10 @@ class UsageManager:
                                     if credential_tier_names
                                     else "unknown"
                                 )
                                 lib_logger.info(
                                     f"Acquired key {mask_credential(key)} for model {model} "
-                                    f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{effective_max_concurrent}, usage: {usage})"
                                 )
                                 return key
@@ -1421,9 +1466,10 @@ class UsageManager:
                                 else None
                             )
                             tier_info = f"tier: {tier_name}, " if tier_name else ""
                             lib_logger.info(
                                 f"Acquired key {mask_credential(key)} for model {model} "
-                                f"({tier_info}selection: {selection_method}, usage: {usage})"
                             )
                             return key
@@ -1440,9 +1486,10 @@ class UsageManager:
                                 else None
                             )
                             tier_info = f"tier: {tier_name}, " if tier_name else ""
                             lib_logger.info(
                                 f"Acquired key {mask_credential(key)} for model {model} "
-                                f"({tier_info}selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{effective_max_concurrent}, usage: {usage})"
                             )
                             return key

         # Not grouped - return individual model usage (no weight applied)
         return self._get_usage_count(key, model, usage_field)
+    def _get_quota_display(self, key: str, model: str) -> str:
+        """
+        Get a formatted quota display string for logging.
+        For antigravity (providers in _REQUEST_COUNT_PROVIDERS), returns:
+            "quota: 170/250 [32%]" format
+        For other providers, returns:
+            "usage: 170" format (no max available)
+        Args:
+            key: Credential identifier
+            model: Model name (with provider prefix)
+        Returns:
+            Formatted string for logging
+        """
+        provider = self._get_provider_from_credential(key)
+        if provider not in self._REQUEST_COUNT_PROVIDERS:
+            # Non-antigravity: just show usage count
+            usage = self._get_usage_count(key, model, "success_count")
+            return f"usage: {usage}"
+        # Antigravity: show quota display with remaining percentage
+        if self._usage_data is None:
+            return "quota: 0/? [100%]"
+        key_data = self._usage_data.get(key, {})
+        model_data = key_data.get("models", {}).get(model, {})
+        request_count = model_data.get("request_count", 0)
+        max_requests = model_data.get("quota_max_requests")
+        if max_requests:
+            remaining = max_requests - request_count
+            remaining_pct = (
+                int((remaining / max_requests) * 100) if max_requests > 0 else 0
+            )
+            return f"quota: {request_count}/{max_requests} [{remaining_pct}%]"
+        else:
+            return f"quota: {request_count}"
     def _get_usage_field_name(self, credential: str) -> str:
         """
         Get the usage tracking field name for a credential.
                                     if credential_tier_names
                                     else "unknown"
                                 )
+                                quota_display = self._get_quota_display(key, model)
                                 lib_logger.info(
                                     f"Acquired key {mask_credential(key)} for model {model} "
+                                    f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, {quota_display})"
                                 )
                                 return key
                                     if credential_tier_names
                                     else "unknown"
                                 )
+                                quota_display = self._get_quota_display(key, model)
                                 lib_logger.info(
                                     f"Acquired key {mask_credential(key)} for model {model} "
+                                    f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{effective_max_concurrent}, {quota_display})"
                                 )
                                 return key
                                 else None
                             )
                             tier_info = f"tier: {tier_name}, " if tier_name else ""
+                            quota_display = self._get_quota_display(key, model)
                             lib_logger.info(
                                 f"Acquired key {mask_credential(key)} for model {model} "
+                                f"({tier_info}selection: {selection_method}, {quota_display})"
                             )
                             return key
                                 else None
                             )
                             tier_info = f"tier: {tier_name}, " if tier_name else ""
+                            quota_display = self._get_quota_display(key, model)
                             lib_logger.info(
                                 f"Acquired key {mask_credential(key)} for model {model} "
+                                f"({tier_info}selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{effective_max_concurrent}, {quota_display})"
                             )
                             return key