Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Dec 16, 2025

Commit

90d4836

1 Parent(s): 06b3f7d

feat(quota): ✨ improve aggregation with tier priorities and fix double-counting

- Adds tier priority metadata for proper sorting of credential tiers in the UI
- Fixes double-counting when models share quota groups by using aggregated group totals
- Enhances reset time display to show expiration for low/exhausted quotas
- Implements provider-specific stats merging to preserve cache during partial updates
- Recalculates summary statistics on-demand instead of full cache replacement

Files changed (3) hide show

src/proxy_app/launcher_tui.py +1 -1
src/proxy_app/quota_viewer.py +216 -25
src/rotator_library/client.py +47 -4

src/proxy_app/launcher_tui.py CHANGED Viewed

@@ -429,7 +429,7 @@ class LauncherTUI:
             self.console.print("   3. 🔑 Manage Credentials")
         self.console.print("   4. 📊 View Provider & Advanced Settings")
-        self.console.print("   5. 📈 View Quota & Usage Stats")
         self.console.print("   6. 🔄 Reload Configuration")
         self.console.print("   7. ℹ️  About")
         self.console.print("   8. 🚪 Exit")

             self.console.print("   3. 🔑 Manage Credentials")
         self.console.print("   4. 📊 View Provider & Advanced Settings")
+        self.console.print("   5. 📈 View Quota & Usage Stats (Alpha)")
         self.console.print("   6. 🔄 Reload Configuration")
         self.console.print("   7. ℹ️  About")
         self.console.print("   8. 🚪 Exit")

src/proxy_app/quota_viewer.py CHANGED Viewed

@@ -3,6 +3,42 @@ Lightweight Quota Stats Viewer TUI.
 Connects to a running proxy to display quota and usage statistics.
 Uses only httpx + rich (no heavy rotator_library imports).
 """
 import os
@@ -257,6 +293,131 @@ class QuotaViewer:
             self.last_error = str(e)
             return None
     def post_action(
         self,
         action: str,
@@ -300,7 +461,14 @@ class QuotaViewer:
                     return None
                 result = response.json()
-                self.cached_stats = result
                 self.last_error = None
                 return result
@@ -424,8 +592,12 @@ class QuotaViewer:
                         tiers = group_stats.get("tiers", {})
                         # Format tier info: "5(15)f/2s" = 5 active out of 15 free, 2 standard all active
                         tier_parts = []
-                        for tier_name, tier_info in sorted(tiers.items()):
                             if tier_name == "unknown":
                                 continue  # Skip unknown tiers in display
                             total_t = tier_info.get("total", 0)
@@ -546,10 +718,13 @@ class QuotaViewer:
         valid_choices = [str(i) for i in range(1, len(provider_list) + 1)]
         valid_choices.extend(["r", "R", "s", "S", "m", "M", "b", "B", "g", "G"])
-        choice = Prompt.ask("Select option", default="B").strip()
         if choice.lower() == "b":
             self.running = False
         elif choice.lower() == "g":
             # Toggle view mode
             self.view_mode = "global" if self.view_mode == "current" else "current"
@@ -659,6 +834,7 @@ class QuotaViewer:
                 ):
                     self.post_action("reload", scope="all")
             elif choice == "F" and has_quota_groups:
                 with self.console.status(
                     f"[bold]Fetching live quota for ALL {provider} credentials...",
                     spinner="dots",
@@ -666,16 +842,17 @@ class QuotaViewer:
                     result = self.post_action(
                         "force_refresh", scope="provider", provider=provider
                     )
-                    if result and result.get("refresh_result"):
-                        rr = result["refresh_result"]
-                        self.console.print(
-                            f"\n[green]Refreshed {rr.get('credentials_refreshed', 0)} credentials "
-                            f"in {rr.get('duration_ms', 0)}ms[/green]"
-                        )
-                        if rr.get("errors"):
-                            for err in rr["errors"]:
-                                self.console.print(f"[red]  Error: {err}[/red]")
-                        Prompt.ask("Press Enter to continue", default="")
             elif choice.startswith("F") and choice[1:].isdigit() and has_quota_groups:
                 idx = int(choice[1:])
                 credentials = (
@@ -691,6 +868,7 @@ class QuotaViewer:
                     cred = credentials[idx - 1]
                     cred_id = cred.get("identifier", "")
                     email = cred.get("email", cred_id)
                     with self.console.status(
                         f"[bold]Fetching live quota for {email}...", spinner="dots"
                     ):
@@ -700,15 +878,16 @@ class QuotaViewer:
                             provider=provider,
                             credential=cred_id,
                         )
-                        if result and result.get("refresh_result"):
-                            rr = result["refresh_result"]
-                            self.console.print(
-                                f"\n[green]Refreshed in {rr.get('duration_ms', 0)}ms[/green]"
-                            )
-                            if rr.get("errors"):
-                                for err in rr["errors"]:
-                                    self.console.print(f"[red]  Error: {err}[/red]")
-                            Prompt.ask("Press Enter to continue", default="")
     def _render_credential_panel(self, idx: int, cred: Dict[str, Any], provider: str):
         """Render a single credential as a panel."""
@@ -841,16 +1020,28 @@ class QuotaViewer:
                 display = group_stats.get("display", f"{requests_used}/?")
                 bar = create_progress_bar(remaining_pct)
                 # Color based on status
                 if is_exhausted:
                     color = "red"
-                    status_text = "⛔ EXHAUSTED"
                 elif remaining_pct is not None and remaining_pct < 20:
                     color = "yellow"
-                    status_text = "⚠️ LOW"
                 else:
                     color = "green"
-                    status_text = f"Resets: {reset_time}"
                 # Confidence indicator
                 conf_indicator = ""

 Connects to a running proxy to display quota and usage statistics.
 Uses only httpx + rich (no heavy rotator_library imports).
+TODO: Missing Features & Improvements
+======================================
+Display Improvements:
+- [ ] Add color legend/help screen explaining status colors and symbols
+- [ ] Show credential email/project ID if available (currently just filename)
+- [ ] Add keyboard shortcut hints (e.g., "Press ? for help")
+- [ ] Support terminal resize / responsive layout
+Global Stats Fix:
+- [ ] HACK: Global requests currently set to current period requests only
+      (see client.py get_quota_stats). This doesn't include archived stats.
+      Fix requires tracking archived requests per quota group in usage_manager.py
+      to avoid double-counting models that share quota groups.
+Data & Refresh:
+- [ ] Auto-refresh option (configurable interval)
+- [ ] Show last refresh timestamp more prominently
+- [ ] Cache invalidation when switching between current/global view
+- [ ] Support for non-OAuth providers (API keys like nvapi-*, gsk_*, etc.)
+Remote Management:
+- [ ] Test connection before saving remote
+- [ ] Import/export remote configurations
+- [ ] SSH tunnel support for remote proxies
+Quota Groups:
+- [ ] Show which models are in each quota group (expandable)
+- [ ] Historical quota usage graphs (if data available)
+- [ ] Alerts/notifications when quota is low
+Credential Details:
+- [ ] Show per-model breakdown within quota groups
+- [ ] Edit credential priority/tier manually
+- [ ] Disable/enable individual credentials
 """
 import os
             self.last_error = str(e)
             return None
+    def _merge_provider_stats(self, provider: str, result: Dict[str, Any]) -> None:
+        """
+        Merge provider-specific stats into the existing cache.
+        Updates just the specified provider's data and recalculates the
+        summary fields to reflect the change.
+        Args:
+            provider: Provider name that was refreshed
+            result: API response containing the refreshed provider data
+        """
+        if not self.cached_stats:
+            self.cached_stats = result
+            return
+        # Merge provider data
+        if "providers" in result and provider in result["providers"]:
+            if "providers" not in self.cached_stats:
+                self.cached_stats["providers"] = {}
+            self.cached_stats["providers"][provider] = result["providers"][provider]
+        # Update timestamp
+        if "timestamp" in result:
+            self.cached_stats["timestamp"] = result["timestamp"]
+        # Recalculate summary from all providers
+        self._recalculate_summary()
+    def _recalculate_summary(self) -> None:
+        """
+        Recalculate summary fields from all provider data in cache.
+        Updates both 'summary' and 'global_summary' based on current
+        provider stats.
+        """
+        providers = self.cached_stats.get("providers", {})
+        if not providers:
+            return
+        # Calculate summary from all providers
+        total_creds = 0
+        active_creds = 0
+        exhausted_creds = 0
+        total_requests = 0
+        total_input_cached = 0
+        total_input_uncached = 0
+        total_output = 0
+        total_cost = 0.0
+        for prov_stats in providers.values():
+            total_creds += prov_stats.get("credential_count", 0)
+            active_creds += prov_stats.get("active_count", 0)
+            exhausted_creds += prov_stats.get("exhausted_count", 0)
+            total_requests += prov_stats.get("total_requests", 0)
+            tokens = prov_stats.get("tokens", {})
+            total_input_cached += tokens.get("input_cached", 0)
+            total_input_uncached += tokens.get("input_uncached", 0)
+            total_output += tokens.get("output", 0)
+            cost = prov_stats.get("approx_cost")
+            if cost:
+                total_cost += cost
+        total_input = total_input_cached + total_input_uncached
+        input_cache_pct = (
+            round(total_input_cached / total_input * 100, 1) if total_input > 0 else 0
+        )
+        self.cached_stats["summary"] = {
+            "total_providers": len(providers),
+            "total_credentials": total_creds,
+            "active_credentials": active_creds,
+            "exhausted_credentials": exhausted_creds,
+            "total_requests": total_requests,
+            "tokens": {
+                "input_cached": total_input_cached,
+                "input_uncached": total_input_uncached,
+                "input_cache_pct": input_cache_pct,
+                "output": total_output,
+            },
+            "approx_total_cost": total_cost if total_cost > 0 else None,
+        }
+        # Also recalculate global_summary if it exists
+        if "global_summary" in self.cached_stats:
+            global_total_requests = 0
+            global_input_cached = 0
+            global_input_uncached = 0
+            global_output = 0
+            global_cost = 0.0
+            for prov_stats in providers.values():
+                global_data = prov_stats.get("global", prov_stats)
+                global_total_requests += global_data.get("total_requests", 0)
+                tokens = global_data.get("tokens", {})
+                global_input_cached += tokens.get("input_cached", 0)
+                global_input_uncached += tokens.get("input_uncached", 0)
+                global_output += tokens.get("output", 0)
+                cost = global_data.get("approx_cost")
+                if cost:
+                    global_cost += cost
+            global_total_input = global_input_cached + global_input_uncached
+            global_cache_pct = (
+                round(global_input_cached / global_total_input * 100, 1)
+                if global_total_input > 0
+                else 0
+            )
+            self.cached_stats["global_summary"] = {
+                "total_providers": len(providers),
+                "total_credentials": total_creds,
+                "total_requests": global_total_requests,
+                "tokens": {
+                    "input_cached": global_input_cached,
+                    "input_uncached": global_input_uncached,
+                    "input_cache_pct": global_cache_pct,
+                    "output": global_output,
+                },
+                "approx_total_cost": global_cost if global_cost > 0 else None,
+            }
     def post_action(
         self,
         action: str,
                     return None
                 result = response.json()
+                # If scope is provider-specific, merge into existing cache
+                if scope == "provider" and provider and self.cached_stats:
+                    self._merge_provider_stats(provider, result)
+                else:
+                    # Full refresh - replace everything
+                    self.cached_stats = result
                 self.last_error = None
                 return result
                         tiers = group_stats.get("tiers", {})
                         # Format tier info: "5(15)f/2s" = 5 active out of 15 free, 2 standard all active
+                        # Sort by priority (lower number = higher priority, appears first)
                         tier_parts = []
+                        sorted_tiers = sorted(
+                            tiers.items(), key=lambda x: x[1].get("priority", 10)
+                        )
+                        for tier_name, tier_info in sorted_tiers:
                             if tier_name == "unknown":
                                 continue  # Skip unknown tiers in display
                             total_t = tier_info.get("total", 0)
         valid_choices = [str(i) for i in range(1, len(provider_list) + 1)]
         valid_choices.extend(["r", "R", "s", "S", "m", "M", "b", "B", "g", "G"])
+        choice = Prompt.ask("Select option", default="").strip()
         if choice.lower() == "b":
             self.running = False
+        elif choice == "":
+            # Empty input - just refresh the screen
+            pass
         elif choice.lower() == "g":
             # Toggle view mode
             self.view_mode = "global" if self.view_mode == "current" else "current"
                 ):
                     self.post_action("reload", scope="all")
             elif choice == "F" and has_quota_groups:
+                result = None
                 with self.console.status(
                     f"[bold]Fetching live quota for ALL {provider} credentials...",
                     spinner="dots",
                     result = self.post_action(
                         "force_refresh", scope="provider", provider=provider
                     )
+                # Handle result OUTSIDE spinner
+                if result and result.get("refresh_result"):
+                    rr = result["refresh_result"]
+                    self.console.print(
+                        f"\n[green]Refreshed {rr.get('credentials_refreshed', 0)} credentials "
+                        f"in {rr.get('duration_ms', 0)}ms[/green]"
+                    )
+                    if rr.get("errors"):
+                        for err in rr["errors"]:
+                            self.console.print(f"[red]  Error: {err}[/red]")
+                    Prompt.ask("Press Enter to continue", default="")
             elif choice.startswith("F") and choice[1:].isdigit() and has_quota_groups:
                 idx = int(choice[1:])
                 credentials = (
                     cred = credentials[idx - 1]
                     cred_id = cred.get("identifier", "")
                     email = cred.get("email", cred_id)
+                    result = None
                     with self.console.status(
                         f"[bold]Fetching live quota for {email}...", spinner="dots"
                     ):
                             provider=provider,
                             credential=cred_id,
                         )
+                    # Handle result OUTSIDE spinner
+                    if result and result.get("refresh_result"):
+                        rr = result["refresh_result"]
+                        self.console.print(
+                            f"\n[green]Refreshed in {rr.get('duration_ms', 0)}ms[/green]"
+                        )
+                        if rr.get("errors"):
+                            for err in rr["errors"]:
+                                self.console.print(f"[red]  Error: {err}[/red]")
+                        Prompt.ask("Press Enter to continue", default="")
     def _render_credential_panel(self, idx: int, cred: Dict[str, Any], provider: str):
         """Render a single credential as a panel."""
                 display = group_stats.get("display", f"{requests_used}/?")
                 bar = create_progress_bar(remaining_pct)
+                # Build status text - always show reset time if available
+                has_reset_time = reset_time and reset_time != "-"
                 # Color based on status
                 if is_exhausted:
                     color = "red"
+                    if has_reset_time:
+                        status_text = f"⛔ Resets: {reset_time}"
+                    else:
+                        status_text = "⛔ EXHAUSTED"
                 elif remaining_pct is not None and remaining_pct < 20:
                     color = "yellow"
+                    if has_reset_time:
+                        status_text = f"⚠️ Resets: {reset_time}"
+                    else:
+                        status_text = "⚠️ LOW"
                 else:
                     color = "green"
+                    if has_reset_time:
+                        status_text = f"Resets: {reset_time}"
+                    else:
+                        status_text = ""  # Hide if unused/no reset time
                 # Confidence indicator
                 conf_indicator = ""

src/rotator_library/client.py CHANGED Viewed

@@ -2678,9 +2678,18 @@ class RotatingClient:
                             tier = provider_instance.project_tier_cache.get(cred_path)
                         tier = tier or "unknown"
-                        # Initialize tier entry if needed
                         if tier not in group_stats["tiers"]:
-                            group_stats["tiers"][tier] = {"total": 0, "active": 0}
                         group_stats["tiers"][tier]["total"] += 1
                         # Find model with VALID baseline (not just any model with stats)
@@ -2745,16 +2754,28 @@ class RotatingClient:
                     for group_name, group_models in quota_groups.items():
                         # Find model with VALID baseline (prefer over any model with stats)
                         model_stats = None
                         for model in group_models:
                             candidate = self._find_model_stats_in_data(
                                 models_data, model, provider, provider_instance
                             )
                             if candidate:
                                 baseline = candidate.get("baseline_remaining_fraction")
                                 if baseline is not None:
                                     model_stats = candidate
-                                    break
                                 # Keep first found as fallback
                                 if model_stats is None:
                                     model_stats = candidate
@@ -2763,7 +2784,10 @@ class RotatingClient:
                             baseline = model_stats.get("baseline_remaining_fraction")
                             max_req = model_stats.get("quota_max_requests")
                             req_count = model_stats.get("request_count", 0)
-                            reset_ts = model_stats.get("quota_reset_ts")
                             remaining_pct = (
                                 int(baseline * 100) if baseline is not None else None
@@ -2797,6 +2821,25 @@ class RotatingClient:
                                 ),
                             }
                     # Try to get email from provider's cache
                     cred_path = cred.get("full_path", "")
                     if hasattr(provider_instance, "project_tier_cache"):

                             tier = provider_instance.project_tier_cache.get(cred_path)
                         tier = tier or "unknown"
+                        # Initialize tier entry if needed with priority for sorting
                         if tier not in group_stats["tiers"]:
+                            priority = 10  # default
+                            if hasattr(provider_instance, "_resolve_tier_priority"):
+                                priority = provider_instance._resolve_tier_priority(
+                                    tier
+                                )
+                            group_stats["tiers"][tier] = {
+                                "total": 0,
+                                "active": 0,
+                                "priority": priority,
+                            }
                         group_stats["tiers"][tier]["total"] += 1
                         # Find model with VALID baseline (not just any model with stats)
                     for group_name, group_models in quota_groups.items():
                         # Find model with VALID baseline (prefer over any model with stats)
+                        # Also track the best reset_ts across all models in the group
                         model_stats = None
+                        best_reset_ts = None
                         for model in group_models:
                             candidate = self._find_model_stats_in_data(
                                 models_data, model, provider, provider_instance
                             )
                             if candidate:
+                                # Track the best (latest) reset_ts from any model in group
+                                candidate_reset_ts = candidate.get("quota_reset_ts")
+                                if candidate_reset_ts:
+                                    if (
+                                        best_reset_ts is None
+                                        or candidate_reset_ts > best_reset_ts
+                                    ):
+                                        best_reset_ts = candidate_reset_ts
                                 baseline = candidate.get("baseline_remaining_fraction")
                                 if baseline is not None:
                                     model_stats = candidate
+                                    # Don't break - continue to find best reset_ts
                                 # Keep first found as fallback
                                 if model_stats is None:
                                     model_stats = candidate
                             baseline = model_stats.get("baseline_remaining_fraction")
                             max_req = model_stats.get("quota_max_requests")
                             req_count = model_stats.get("request_count", 0)
+                            # Use best_reset_ts from any model in the group
+                            reset_ts = best_reset_ts or model_stats.get(
+                                "quota_reset_ts"
+                            )
                             remaining_pct = (
                                 int(baseline * 100) if baseline is not None else None
                                 ),
                             }
+                    # Recalculate credential's requests from model_groups
+                    # This fixes double-counting when models share quota groups
+                    if cred.get("model_groups"):
+                        group_requests = sum(
+                            g.get("requests_used", 0)
+                            for g in cred["model_groups"].values()
+                        )
+                        cred["requests"] = group_requests
+                        # HACK: Fix global requests if present
+                        # This is a simplified fix that sets global.requests = current group_requests.
+                        # TODO: Properly track archived requests per quota group in usage_manager.py
+                        # so that global stats correctly sum: current_period + archived_periods
+                        # without double-counting models that share quota groups.
+                        # See: usage_manager.py lines 2388-2404 where global stats are built
+                        # by iterating all models (causing double-counting for grouped models).
+                        if cred.get("global"):
+                            cred["global"]["requests"] = group_requests
                     # Try to get email from provider's cache
                     cred_path = cred.get("full_path", "")
                     if hasattr(provider_instance, "project_tier_cache"):