Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Dec 16, 2025

Commit

8b4ff52

1 Parent(s): 846c165

feat(quota-viewer): ✨ add quota and usage statistics viewer system

This introduces a comprehensive quota and usage statistics monitoring system with API endpoints and a rich terminal UI for real-time credential consumption visibility.

Key components:
- Add GET/POST /v1/quota-stats API endpoints to main proxy for retrieving and refreshing statistics
- Implement full-featured TUI viewer (quota_viewer.py) with provider summaries, credential details, progress bars, and remote management
- Add configuration system (quota_viewer_config.py) supporting multiple remote proxies with API key management
- Extend RotatingClient with get_quota_stats(), reload_usage_from_disk(), and force_refresh_quota() methods
- Enhance usage_manager with get_stats_for_endpoint() for aggregated statistics
- Integrate viewer into launcher TUI as new menu option 5

Features include toggle between current and global/lifetime stats, token formatting, cooldown timers, quota group visualization, and estimated cost tracking. Supports Antigravity quota group enrichment and live API refresh.

Also in this commit:
- fix(antigravity): increase empty response retry attempts from 4 to 6

Files changed (7) hide show

src/proxy_app/launcher_tui.py +23 -6
src/proxy_app/main.py +139 -0
src/proxy_app/quota_viewer.py +1086 -0
src/proxy_app/quota_viewer_config.py +288 -0
src/rotator_library/client.py +277 -0
src/rotator_library/providers/antigravity_provider.py +1 -1
src/rotator_library/usage_manager.py +476 -0

src/proxy_app/launcher_tui.py CHANGED Viewed

@@ -429,9 +429,10 @@ class LauncherTUI:
             self.console.print("   3. 🔑 Manage Credentials")
         self.console.print("   4. 📊 View Provider & Advanced Settings")
-        self.console.print("   5. 🔄 Reload Configuration")
-        self.console.print("   6. ℹ️  About")
-        self.console.print("   7. 🚪 Exit")
         self.console.print()
         self.console.print("━" * 70)
@@ -439,7 +440,7 @@ class LauncherTUI:
         choice = Prompt.ask(
             "Select option",
-            choices=["1", "2", "3", "4", "5", "6", "7"],
             show_choices=False,
         )
@@ -452,12 +453,14 @@ class LauncherTUI:
         elif choice == "4":
             self.show_provider_settings_menu()
         elif choice == "5":
             load_dotenv(dotenv_path=_get_env_file(), override=True)
             self.config = LauncherConfig()  # Reload config
             self.console.print("\n[green]✅ Configuration reloaded![/green]")
-        elif choice == "6":
-            self.show_about()
         elif choice == "7":
             self.running = False
             sys.exit(0)
@@ -874,6 +877,20 @@ class LauncherTUI:
         # Reload environment after settings tool
         load_dotenv(dotenv_path=_get_env_file(), override=True)
     def show_about(self):
         """Display About page with project information"""
         clear_screen()

             self.console.print("   3. 🔑 Manage Credentials")
         self.console.print("   4. 📊 View Provider & Advanced Settings")
+        self.console.print("   5. 📈 View Quota & Usage Stats")
+        self.console.print("   6. 🔄 Reload Configuration")
+        self.console.print("   7. ℹ️  About")
+        self.console.print("   8. 🚪 Exit")
         self.console.print()
         self.console.print("━" * 70)
         choice = Prompt.ask(
             "Select option",
+            choices=["1", "2", "3", "4", "5", "6", "7", "8"],
             show_choices=False,
         )
         elif choice == "4":
             self.show_provider_settings_menu()
         elif choice == "5":
+            self.launch_quota_viewer()
+        elif choice == "6":
             load_dotenv(dotenv_path=_get_env_file(), override=True)
             self.config = LauncherConfig()  # Reload config
             self.console.print("\n[green]✅ Configuration reloaded![/green]")
         elif choice == "7":
+            self.show_about()
+        elif choice == "8":
             self.running = False
             sys.exit(0)
         # Reload environment after settings tool
         load_dotenv(dotenv_path=_get_env_file(), override=True)
+    def launch_quota_viewer(self):
+        """Launch the quota stats viewer"""
+        clear_screen()
+        self.console.print("━" * 70)
+        self.console.print("Quota & Usage Statistics Viewer")
+        self.console.print("━" * 70)
+        self.console.print()
+        # Import the lightweight viewer (no heavy imports)
+        from proxy_app.quota_viewer import run_quota_viewer
+        run_quota_viewer()
     def show_about(self):
         """Display About page with project information"""
         clear_screen()

src/proxy_app/main.py CHANGED Viewed

@@ -1148,6 +1148,145 @@ async def list_providers(_=Depends(verify_api_key)):
     return list(PROVIDER_PLUGINS.keys())
 @app.post("/v1/token-count")
 async def token_count(
     request: Request,

     return list(PROVIDER_PLUGINS.keys())
+@app.get("/v1/quota-stats")
+async def get_quota_stats(
+    request: Request,
+    client: RotatingClient = Depends(get_rotating_client),
+    _=Depends(verify_api_key),
+    provider: str = None,
+):
+    """
+    Returns quota and usage statistics for all credentials.
+    This returns cached data from the proxy without making external API calls.
+    Use POST to reload from disk or force refresh from external APIs.
+    Query Parameters:
+        provider: Optional filter to return stats for a specific provider only
+    Returns:
+        {
+            "providers": {
+                "provider_name": {
+                    "credential_count": int,
+                    "active_count": int,
+                    "on_cooldown_count": int,
+                    "exhausted_count": int,
+                    "total_requests": int,
+                    "tokens": {...},
+                    "approx_cost": float | null,
+                    "quota_groups": {...},  // For Antigravity
+                    "credentials": [...]
+                }
+            },
+            "summary": {...},
+            "data_source": "cache",
+            "timestamp": float
+        }
+    """
+    try:
+        stats = await client.get_quota_stats(provider_filter=provider)
+        return stats
+    except Exception as e:
+        logging.error(f"Failed to get quota stats: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/v1/quota-stats")
+async def refresh_quota_stats(
+    request: Request,
+    client: RotatingClient = Depends(get_rotating_client),
+    _=Depends(verify_api_key),
+):
+    """
+    Refresh quota and usage statistics.
+    Request body:
+        {
+            "action": "reload" | "force_refresh",
+            "scope": "all" | "provider" | "credential",
+            "provider": "antigravity",  // required if scope != "all"
+            "credential": "antigravity_oauth_1.json"  // required if scope == "credential"
+        }
+    Actions:
+        - reload: Re-read data from disk (no external API calls)
+        - force_refresh: For Antigravity, fetch live quota from API.
+                        For other providers, same as reload.
+    Returns:
+        Same as GET, plus a "refresh_result" field with operation details.
+    """
+    try:
+        data = await request.json()
+        action = data.get("action", "reload")
+        scope = data.get("scope", "all")
+        provider = data.get("provider")
+        credential = data.get("credential")
+        # Validate parameters
+        if action not in ("reload", "force_refresh"):
+            raise HTTPException(
+                status_code=400,
+                detail="action must be 'reload' or 'force_refresh'",
+            )
+        if scope not in ("all", "provider", "credential"):
+            raise HTTPException(
+                status_code=400,
+                detail="scope must be 'all', 'provider', or 'credential'",
+            )
+        if scope in ("provider", "credential") and not provider:
+            raise HTTPException(
+                status_code=400,
+                detail="'provider' is required when scope is 'provider' or 'credential'",
+            )
+        if scope == "credential" and not credential:
+            raise HTTPException(
+                status_code=400,
+                detail="'credential' is required when scope is 'credential'",
+            )
+        refresh_result = {
+            "action": action,
+            "scope": scope,
+            "provider": provider,
+            "credential": credential,
+        }
+        if action == "reload":
+            # Just reload from disk
+            start_time = time.time()
+            await client.reload_usage_from_disk()
+            refresh_result["duration_ms"] = int((time.time() - start_time) * 1000)
+            refresh_result["success"] = True
+            refresh_result["message"] = "Reloaded usage data from disk"
+        elif action == "force_refresh":
+            # Force refresh from external API (for supported providers like Antigravity)
+            result = await client.force_refresh_quota(
+                provider=provider if scope in ("provider", "credential") else None,
+                credential=credential if scope == "credential" else None,
+            )
+            refresh_result.update(result)
+            refresh_result["success"] = result["failed_count"] == 0
+        # Get updated stats
+        stats = await client.get_quota_stats(provider_filter=provider)
+        stats["refresh_result"] = refresh_result
+        stats["data_source"] = "refreshed"
+        return stats
+    except HTTPException:
+        raise
+    except Exception as e:
+        logging.error(f"Failed to refresh quota stats: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
 @app.post("/v1/token-count")
 async def token_count(
     request: Request,

src/proxy_app/quota_viewer.py ADDED Viewed

	@@ -0,0 +1,1086 @@

+"""
+Lightweight Quota Stats Viewer TUI.
+Connects to a running proxy to display quota and usage statistics.
+Uses only httpx + rich (no heavy rotator_library imports).
+"""
+import os
+import sys
+import time
+from datetime import datetime, timezone
+from typing import Any, Dict, List, Optional, Tuple
+import httpx
+from rich.console import Console
+from rich.panel import Panel
+from rich.progress import BarColumn, Progress, TextColumn
+from rich.prompt import Prompt
+from rich.table import Table
+from rich.text import Text
+from .quota_viewer_config import QuotaViewerConfig
+def clear_screen():
+    """Clear the terminal screen."""
+    os.system("cls" if os.name == "nt" else "clear")
+def format_tokens(count: int) -> str:
+    """Format token count for display (e.g., 125000 -> 125k)."""
+    if count >= 1_000_000:
+        return f"{count / 1_000_000:.1f}M"
+    elif count >= 1_000:
+        return f"{count / 1_000:.0f}k"
+    return str(count)
+def format_cost(cost: Optional[float]) -> str:
+    """Format cost for display."""
+    if cost is None or cost == 0:
+        return "-"
+    if cost < 0.01:
+        return f"${cost:.4f}"
+    return f"${cost:.2f}"
+def format_time_ago(timestamp: Optional[float]) -> str:
+    """Format timestamp as relative time (e.g., '5 min ago')."""
+    if not timestamp:
+        return "Never"
+    try:
+        delta = time.time() - timestamp
+        if delta < 60:
+            return f"{int(delta)}s ago"
+        elif delta < 3600:
+            return f"{int(delta / 60)} min ago"
+        elif delta < 86400:
+            return f"{int(delta / 3600)}h ago"
+        else:
+            return f"{int(delta / 86400)}d ago"
+    except (ValueError, OSError):
+        return "Unknown"
+def format_reset_time(iso_time: Optional[str]) -> str:
+    """Format ISO time string for display."""
+    if not iso_time:
+        return "-"
+    try:
+        dt = datetime.fromisoformat(iso_time.replace("Z", "+00:00"))
+        # Convert to local time
+        local_dt = dt.astimezone()
+        return local_dt.strftime("%b %d %H:%M")
+    except (ValueError, AttributeError):
+        return iso_time[:16] if iso_time else "-"
+def create_progress_bar(percent: Optional[int], width: int = 10) -> str:
+    """Create a text-based progress bar."""
+    if percent is None:
+        return "░" * width
+    filled = int(percent / 100 * width)
+    return "▓" * filled + "░" * (width - filled)
+def format_cooldown(seconds: int) -> str:
+    """Format cooldown seconds as human-readable string."""
+    if seconds < 60:
+        return f"{seconds}s"
+    elif seconds < 3600:
+        mins = seconds // 60
+        secs = seconds % 60
+        return f"{mins}m {secs}s" if secs > 0 else f"{mins}m"
+    else:
+        hours = seconds // 3600
+        mins = (seconds % 3600) // 60
+        return f"{hours}h {mins}m" if mins > 0 else f"{hours}h"
+class QuotaViewer:
+    """Main Quota Viewer TUI class."""
+    def __init__(self, config: Optional[QuotaViewerConfig] = None):
+        """
+        Initialize the viewer.
+        Args:
+            config: Optional config object. If not provided, one will be created.
+        """
+        self.console = Console()
+        self.config = config or QuotaViewerConfig()
+        self.config.sync_with_launcher_config()
+        self.current_remote: Optional[Dict[str, Any]] = None
+        self.cached_stats: Optional[Dict[str, Any]] = None
+        self.last_error: Optional[str] = None
+        self.running = True
+        self.view_mode = "current"  # "current" or "global"
+    def _get_headers(self) -> Dict[str, str]:
+        """Get HTTP headers including auth if configured."""
+        headers = {}
+        if self.current_remote and self.current_remote.get("api_key"):
+            headers["Authorization"] = f"Bearer {self.current_remote['api_key']}"
+        return headers
+    def _get_base_url(self) -> str:
+        """Get base URL for the current remote."""
+        if not self.current_remote:
+            return "http://127.0.0.1:8000"
+        host = self.current_remote.get("host", "127.0.0.1")
+        port = self.current_remote.get("port", 8000)
+        # Use https if port is 443 or host looks like a domain
+        scheme = "https" if port == 443 or "." in host else "http"
+        return f"{scheme}://{host}:{port}"
+    def check_connection(
+        self, remote: Dict[str, Any], timeout: float = 3.0
+    ) -> Tuple[bool, str]:
+        """
+        Check if a remote proxy is reachable.
+        Args:
+            remote: Remote configuration dict
+            timeout: Connection timeout in seconds
+        Returns:
+            Tuple of (is_online, status_message)
+        """
+        host = remote.get("host", "127.0.0.1")
+        port = remote.get("port", 8000)
+        scheme = "https" if port == 443 or "." in host else "http"
+        url = f"{scheme}://{host}:{port}/"
+        headers = {}
+        if remote.get("api_key"):
+            headers["Authorization"] = f"Bearer {remote['api_key']}"
+        try:
+            with httpx.Client(timeout=timeout) as client:
+                response = client.get(url, headers=headers)
+                if response.status_code == 200:
+                    return True, "Online"
+                elif response.status_code == 401:
+                    return False, "Auth failed"
+                else:
+                    return False, f"HTTP {response.status_code}"
+        except httpx.ConnectError:
+            return False, "Offline"
+        except httpx.TimeoutException:
+            return False, "Timeout"
+        except Exception as e:
+            return False, str(e)[:20]
+    def fetch_stats(self, provider: Optional[str] = None) -> Optional[Dict[str, Any]]:
+        """
+        Fetch quota stats from the current remote.
+        Args:
+            provider: Optional provider filter
+        Returns:
+            Stats dict or None on failure
+        """
+        url = f"{self._get_base_url()}/v1/quota-stats"
+        if provider:
+            url += f"?provider={provider}"
+        try:
+            with httpx.Client(timeout=30.0) as client:
+                response = client.get(url, headers=self._get_headers())
+                if response.status_code == 401:
+                    self.last_error = "Authentication failed. Check API key."
+                    return None
+                elif response.status_code != 200:
+                    self.last_error = (
+                        f"HTTP {response.status_code}: {response.text[:100]}"
+                    )
+                    return None
+                self.cached_stats = response.json()
+                self.last_error = None
+                return self.cached_stats
+        except httpx.ConnectError:
+            self.last_error = "Connection failed. Is the proxy running?"
+            return None
+        except httpx.TimeoutException:
+            self.last_error = "Request timed out."
+            return None
+        except Exception as e:
+            self.last_error = str(e)
+            return None
+    def post_action(
+        self,
+        action: str,
+        scope: str = "all",
+        provider: Optional[str] = None,
+        credential: Optional[str] = None,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Post a refresh action to the proxy.
+        Args:
+            action: "reload" or "force_refresh"
+            scope: "all", "provider", or "credential"
+            provider: Provider name (required for scope != "all")
+            credential: Credential identifier (required for scope == "credential")
+        Returns:
+            Response dict or None on failure
+        """
+        url = f"{self._get_base_url()}/v1/quota-stats"
+        payload = {
+            "action": action,
+            "scope": scope,
+        }
+        if provider:
+            payload["provider"] = provider
+        if credential:
+            payload["credential"] = credential
+        try:
+            with httpx.Client(timeout=60.0) as client:
+                response = client.post(url, headers=self._get_headers(), json=payload)
+                if response.status_code == 401:
+                    self.last_error = "Authentication failed. Check API key."
+                    return None
+                elif response.status_code != 200:
+                    self.last_error = (
+                        f"HTTP {response.status_code}: {response.text[:100]}"
+                    )
+                    return None
+                result = response.json()
+                self.cached_stats = result
+                self.last_error = None
+                return result
+        except httpx.ConnectError:
+            self.last_error = "Connection failed. Is the proxy running?"
+            return None
+        except httpx.TimeoutException:
+            self.last_error = "Request timed out."
+            return None
+        except Exception as e:
+            self.last_error = str(e)
+            return None
+    # =========================================================================
+    # DISPLAY SCREENS
+    # =========================================================================
+    def show_connection_error(self):
+        """Display connection error screen."""
+        clear_screen()
+        self.console.print(
+            Panel(
+                Text.from_markup(
+                    "[bold red]Connection Error[/bold red]\n\n"
+                    f"{self.last_error or 'Unknown error'}\n\n"
+                    "[bold]This tool requires the proxy to be running.[/bold]\n"
+                    "Start the proxy first, then try again.\n\n"
+                    "[dim]Tip: Select option 1 from the main menu to run the proxy.[/dim]"
+                ),
+                border_style="red",
+                expand=False,
+            )
+        )
+        Prompt.ask("\nPress Enter to return to main menu", default="")
+    def show_summary_screen(self):
+        """Display the main summary screen with all providers."""
+        clear_screen()
+        # Header
+        remote_name = (
+            self.current_remote.get("name", "Unknown")
+            if self.current_remote
+            else "None"
+        )
+        remote_host = self.current_remote.get("host", "") if self.current_remote else ""
+        remote_port = self.current_remote.get("port", "") if self.current_remote else ""
+        # Calculate data age
+        data_age = ""
+        if self.cached_stats and self.cached_stats.get("timestamp"):
+            age_seconds = int(time.time() - self.cached_stats["timestamp"])
+            data_age = f"Data age: {age_seconds}s"
+        # View mode indicator
+        if self.view_mode == "global":
+            view_label = "[magenta]📊 Global/Lifetime[/magenta]"
+        else:
+            view_label = "[cyan]📈 Current Period[/cyan]"
+        self.console.print("━" * 78)
+        self.console.print(
+            f"[bold cyan]📈 Quota & Usage Statistics[/bold cyan]  |  {view_label}"
+        )
+        self.console.print("━" * 78)
+        self.console.print(
+            f"Connected to: [bold]{remote_name}[/bold] ({remote_host}:{remote_port}) "
+            f"[green]✅[/green] | {data_age}"
+        )
+        self.console.print()
+        if not self.cached_stats:
+            self.console.print("[yellow]No data available. Press R to reload.[/yellow]")
+        else:
+            # Build provider table
+            table = Table(box=None, show_header=True, header_style="bold")
+            table.add_column("Provider", style="cyan", min_width=12)
+            table.add_column("Creds", justify="center", min_width=6)
+            table.add_column("Quota Status", min_width=28)
+            table.add_column("Requests", justify="right", min_width=9)
+            table.add_column("Tokens (in/out)", min_width=22)
+            table.add_column("Cost", justify="right", min_width=8)
+            providers = self.cached_stats.get("providers", {})
+            provider_list = list(providers.keys())
+            for idx, (provider, prov_stats) in enumerate(providers.items(), 1):
+                cred_count = prov_stats.get("credential_count", 0)
+                # Use global stats if in global mode
+                if self.view_mode == "global":
+                    stats_source = prov_stats.get("global", prov_stats)
+                    total_requests = stats_source.get("total_requests", 0)
+                    tokens = stats_source.get("tokens", {})
+                    cost_value = stats_source.get("approx_cost")
+                else:
+                    total_requests = prov_stats.get("total_requests", 0)
+                    tokens = prov_stats.get("tokens", {})
+                    cost_value = prov_stats.get("approx_cost")
+                # Format tokens
+                input_total = tokens.get("input_cached", 0) + tokens.get(
+                    "input_uncached", 0
+                )
+                output = tokens.get("output", 0)
+                cache_pct = tokens.get("input_cache_pct", 0)
+                token_str = f"{format_tokens(input_total)}/{format_tokens(output)} ({cache_pct}% cached)"
+                # Format cost
+                cost_str = format_cost(cost_value)
+                # Build quota status string (for providers with quota groups)
+                quota_groups = prov_stats.get("quota_groups", {})
+                if quota_groups:
+                    quota_lines = []
+                    for group_name, group_stats in quota_groups.items():
+                        avg_pct = group_stats.get("avg_remaining_pct", 0)
+                        exhausted = group_stats.get("credentials_exhausted", 0)
+                        total = group_stats.get("credentials_total", 0)
+                        # Determine color based on remaining
+                        if exhausted > 0:
+                            color = "red"
+                            status = f"({exhausted}/{total} exhausted)"
+                        elif avg_pct < 20:
+                            color = "yellow"
+                            status = ""
+                        else:
+                            color = "green"
+                            status = ""
+                        bar = create_progress_bar(avg_pct)
+                        display_name = group_name[:10]
+                        quota_lines.append(
+                            f"[{color}]{display_name}: {avg_pct}% {bar}[/{color}] {status}"
+                        )
+                    # First line goes in the main row
+                    first_quota = quota_lines[0] if quota_lines else "-"
+                    table.add_row(
+                        provider,
+                        str(cred_count),
+                        first_quota,
+                        str(total_requests),
+                        token_str,
+                        cost_str,
+                    )
+                    # Additional quota lines as sub-rows
+                    for quota_line in quota_lines[1:]:
+                        table.add_row("", "", quota_line, "", "", "")
+                else:
+                    # No quota groups
+                    table.add_row(
+                        provider,
+                        str(cred_count),
+                        "-",
+                        str(total_requests),
+                        token_str,
+                        cost_str,
+                    )
+                # Add separator between providers (except last)
+                if idx < len(providers):
+                    table.add_row(
+                        "─" * 10, "─" * 4, "─" * 26, "─" * 7, "─" * 20, "─" * 6
+                    )
+            self.console.print(table)
+            # Summary line - use global_summary if in global mode
+            if self.view_mode == "global":
+                summary = self.cached_stats.get(
+                    "global_summary", self.cached_stats.get("summary", {})
+                )
+            else:
+                summary = self.cached_stats.get("summary", {})
+            total_creds = summary.get("total_credentials", 0)
+            total_requests = summary.get("total_requests", 0)
+            total_tokens = summary.get("tokens", {})
+            total_input = total_tokens.get("input_cached", 0) + total_tokens.get(
+                "input_uncached", 0
+            )
+            total_output = total_tokens.get("output", 0)
+            total_cost = format_cost(summary.get("approx_total_cost"))
+            self.console.print()
+            self.console.print(
+                f"[bold]Total:[/bold] {total_creds} credentials | "
+                f"{total_requests} requests | "
+                f"{format_tokens(total_input)}/{format_tokens(total_output)} tokens | "
+                f"{total_cost} cost"
+            )
+        # Menu
+        self.console.print()
+        self.console.print("━" * 78)
+        self.console.print()
+        # Build provider menu options
+        providers = self.cached_stats.get("providers", {}) if self.cached_stats else {}
+        provider_list = list(providers.keys())
+        for idx, provider in enumerate(provider_list, 1):
+            self.console.print(f"   {idx}. View [cyan]{provider}[/cyan] details")
+        self.console.print()
+        self.console.print("   G. Toggle view mode (current/global)")
+        self.console.print("   R. Reload all stats (re-read from proxy)")
+        self.console.print("   S. Switch remote")
+        self.console.print("   M. Manage remotes")
+        self.console.print("   B. Back to main menu")
+        self.console.print()
+        self.console.print("━" * 78)
+        # Get input
+        valid_choices = [str(i) for i in range(1, len(provider_list) + 1)]
+        valid_choices.extend(["r", "R", "s", "S", "m", "M", "b", "B", "g", "G"])
+        choice = Prompt.ask("Select option", default="B").strip()
+        if choice.lower() == "b":
+            self.running = False
+        elif choice.lower() == "g":
+            # Toggle view mode
+            self.view_mode = "global" if self.view_mode == "current" else "current"
+        elif choice.lower() == "r":
+            with self.console.status("[bold]Reloading stats...", spinner="dots"):
+                self.post_action("reload", scope="all")
+        elif choice.lower() == "s":
+            self.show_switch_remote_screen()
+        elif choice.lower() == "m":
+            self.show_manage_remotes_screen()
+        elif choice.isdigit() and 1 <= int(choice) <= len(provider_list):
+            provider = provider_list[int(choice) - 1]
+            self.show_provider_detail_screen(provider)
+    def show_provider_detail_screen(self, provider: str):
+        """Display detailed stats for a specific provider."""
+        while True:
+            clear_screen()
+            # View mode indicator
+            if self.view_mode == "global":
+                view_label = "[magenta]Global/Lifetime[/magenta]"
+            else:
+                view_label = "[cyan]Current Period[/cyan]"
+            self.console.print("━" * 78)
+            self.console.print(
+                f"[bold cyan]📊 {provider.title()} - Detailed Stats[/bold cyan]  |  {view_label}"
+            )
+            self.console.print("━" * 78)
+            self.console.print()
+            if not self.cached_stats:
+                self.console.print("[yellow]No data available.[/yellow]")
+            else:
+                prov_stats = self.cached_stats.get("providers", {}).get(provider, {})
+                credentials = prov_stats.get("credentials", [])
+                if not credentials:
+                    self.console.print(
+                        "[dim]No credentials configured for this provider.[/dim]"
+                    )
+                else:
+                    for idx, cred in enumerate(credentials, 1):
+                        self._render_credential_panel(idx, cred, provider)
+                        self.console.print()
+            # Menu
+            self.console.print("━" * 78)
+            self.console.print()
+            self.console.print("   G.  Toggle view mode (current/global)")
+            self.console.print("   R.  Reload stats (from proxy cache)")
+            self.console.print("   RA. Reload all stats")
+            # Force refresh options (only for providers that support it)
+            has_quota_groups = bool(
+                self.cached_stats
+                and self.cached_stats.get("providers", {})
+                .get(provider, {})
+                .get("quota_groups")
+            )
+            if has_quota_groups:
+                self.console.print()
+                self.console.print(
+                    f"   F.  [yellow]Force refresh ALL {provider} quotas from API[/yellow]"
+                )
+                credentials = (
+                    self.cached_stats.get("providers", {})
+                    .get(provider, {})
+                    .get("credentials", [])
+                    if self.cached_stats
+                    else []
+                )
+                for idx, cred in enumerate(credentials, 1):
+                    identifier = cred.get("identifier", f"credential {idx}")
+                    email = cred.get("email", identifier)
+                    self.console.print(
+                        f"   F{idx}. Force refresh [{idx}] only ({email})"
+                    )
+            self.console.print()
+            self.console.print("   B.  Back to summary")
+            self.console.print()
+            self.console.print("━" * 78)
+            choice = Prompt.ask("Select option", default="B").strip().upper()
+            if choice == "B":
+                break
+            elif choice == "G":
+                # Toggle view mode
+                self.view_mode = "global" if self.view_mode == "current" else "current"
+            elif choice == "R":
+                with self.console.status(
+                    f"[bold]Reloading {provider} stats...", spinner="dots"
+                ):
+                    self.post_action("reload", scope="provider", provider=provider)
+            elif choice == "RA":
+                with self.console.status(
+                    "[bold]Reloading all stats...", spinner="dots"
+                ):
+                    self.post_action("reload", scope="all")
+            elif choice == "F" and has_quota_groups:
+                with self.console.status(
+                    f"[bold]Fetching live quota for ALL {provider} credentials...",
+                    spinner="dots",
+                ):
+                    result = self.post_action(
+                        "force_refresh", scope="provider", provider=provider
+                    )
+                    if result and result.get("refresh_result"):
+                        rr = result["refresh_result"]
+                        self.console.print(
+                            f"\n[green]Refreshed {rr.get('credentials_refreshed', 0)} credentials "
+                            f"in {rr.get('duration_ms', 0)}ms[/green]"
+                        )
+                        if rr.get("errors"):
+                            for err in rr["errors"]:
+                                self.console.print(f"[red]  Error: {err}[/red]")
+                        Prompt.ask("Press Enter to continue", default="")
+            elif choice.startswith("F") and choice[1:].isdigit() and has_quota_groups:
+                idx = int(choice[1:])
+                credentials = (
+                    self.cached_stats.get("providers", {})
+                    .get(provider, {})
+                    .get("credentials", [])
+                    if self.cached_stats
+                    else []
+                )
+                if 1 <= idx <= len(credentials):
+                    cred = credentials[idx - 1]
+                    cred_id = cred.get("identifier", "")
+                    email = cred.get("email", cred_id)
+                    with self.console.status(
+                        f"[bold]Fetching live quota for {email}...", spinner="dots"
+                    ):
+                        result = self.post_action(
+                            "force_refresh",
+                            scope="credential",
+                            provider=provider,
+                            credential=cred_id,
+                        )
+                        if result and result.get("refresh_result"):
+                            rr = result["refresh_result"]
+                            self.console.print(
+                                f"\n[green]Refreshed in {rr.get('duration_ms', 0)}ms[/green]"
+                            )
+                            if rr.get("errors"):
+                                for err in rr["errors"]:
+                                    self.console.print(f"[red]  Error: {err}[/red]")
+                            Prompt.ask("Press Enter to continue", default="")
+    def _render_credential_panel(self, idx: int, cred: Dict[str, Any], provider: str):
+        """Render a single credential as a panel."""
+        identifier = cred.get("identifier", f"credential {idx}")
+        email = cred.get("email")
+        tier = cred.get("tier", "")
+        status = cred.get("status", "unknown")
+        # Check for active cooldowns
+        key_cooldown = cred.get("key_cooldown_remaining")
+        model_cooldowns = cred.get("model_cooldowns", {})
+        has_cooldown = key_cooldown or model_cooldowns
+        # Status indicator
+        if status == "exhausted":
+            status_icon = "[red]⛔ Exhausted[/red]"
+        elif status == "cooldown" or has_cooldown:
+            if key_cooldown:
+                status_icon = f"[yellow]⚠️ Cooldown ({format_cooldown(int(key_cooldown))})[/yellow]"
+            else:
+                status_icon = "[yellow]⚠️ Cooldown[/yellow]"
+        else:
+            status_icon = "[green]✅ Active[/green]"
+        # Header line
+        display_name = email if email else identifier
+        tier_str = f" ({tier})" if tier else ""
+        header = f"[{idx}] {display_name}{tier_str} {status_icon}"
+        # Use global stats if in global mode
+        if self.view_mode == "global":
+            stats_source = cred.get("global", cred)
+        else:
+            stats_source = cred
+        # Stats line
+        last_used = format_time_ago(cred.get("last_used_ts"))  # Always from current
+        requests = stats_source.get("requests", 0)
+        tokens = stats_source.get("tokens", {})
+        input_total = tokens.get("input_cached", 0) + tokens.get("input_uncached", 0)
+        output = tokens.get("output", 0)
+        cost = format_cost(stats_source.get("approx_cost"))
+        stats_line = (
+            f"Last used: {last_used} | Requests: {requests} | "
+            f"Tokens: {format_tokens(input_total)}/{format_tokens(output)}"
+        )
+        if cost != "-":
+            stats_line += f" | Cost: {cost}"
+        # Build panel content
+        content_lines = [
+            f"[dim]{stats_line}[/dim]",
+        ]
+        # Show model cooldowns if any
+        if model_cooldowns:
+            content_lines.append("")
+            content_lines.append("[yellow]Active Cooldowns:[/yellow]")
+            for model_name, cooldown_info in model_cooldowns.items():
+                remaining = cooldown_info.get("remaining_seconds", 0)
+                if remaining > 0:
+                    # Shorten model name for display
+                    short_model = model_name.split("/")[-1][:35]
+                    content_lines.append(
+                        f"  [yellow]⏱️ {short_model}: {format_cooldown(int(remaining))}[/yellow]"
+                    )
+        # Model groups (for providers with quota tracking)
+        model_groups = cred.get("model_groups", {})
+        if model_groups:
+            content_lines.append("")
+            for group_name, group_stats in model_groups.items():
+                remaining_pct = group_stats.get("remaining_pct")
+                requests_used = group_stats.get("requests_used", 0)
+                requests_max = group_stats.get("requests_max")
+                is_exhausted = group_stats.get("is_exhausted", False)
+                reset_time = format_reset_time(group_stats.get("reset_time_iso"))
+                confidence = group_stats.get("confidence", "low")
+                # Format display
+                display = group_stats.get("display", f"{requests_used}/?")
+                bar = create_progress_bar(remaining_pct)
+                # Color based on status
+                if is_exhausted:
+                    color = "red"
+                    status_text = "⛔ EXHAUSTED"
+                elif remaining_pct is not None and remaining_pct < 20:
+                    color = "yellow"
+                    status_text = "⚠️ LOW"
+                else:
+                    color = "green"
+                    status_text = f"Resets: {reset_time}"
+                # Confidence indicator
+                conf_indicator = ""
+                if confidence == "low":
+                    conf_indicator = " [dim](~)[/dim]"
+                elif confidence == "medium":
+                    conf_indicator = " [dim](?)[/dim]"
+                pct_str = f"{remaining_pct}%" if remaining_pct is not None else "?%"
+                content_lines.append(
+                    f"  [{color}]{group_name:<18} {display:<10} {pct_str:>4} {bar}[/{color}]  {status_text}{conf_indicator}"
+                )
+        else:
+            # For providers without quota groups, show model breakdown if available
+            models = cred.get("models", {})
+            if models:
+                content_lines.append("")
+                content_lines.append("  [dim]Models used:[/dim]")
+                for model_name, model_stats in models.items():
+                    req_count = model_stats.get("success_count", 0)
+                    model_cost = format_cost(model_stats.get("approx_cost"))
+                    # Shorten model name for display
+                    short_name = model_name.split("/")[-1][:30]
+                    content_lines.append(
+                        f"    {short_name}: {req_count} requests, {model_cost}"
+                    )
+        self.console.print(
+            Panel(
+                "\n".join(content_lines),
+                title=header,
+                title_align="left",
+                border_style="dim",
+                expand=True,
+            )
+        )
+    def show_switch_remote_screen(self):
+        """Display remote selection screen."""
+        clear_screen()
+        self.console.print("━" * 78)
+        self.console.print("[bold cyan]🔄 Switch Remote[/bold cyan]")
+        self.console.print("━" * 78)
+        self.console.print()
+        current_name = self.current_remote.get("name") if self.current_remote else None
+        self.console.print(f"Current: [bold]{current_name}[/bold]")
+        self.console.print()
+        self.console.print("Available remotes:")
+        remotes = self.config.get_remotes()
+        remote_status: List[Tuple[Dict, bool, str]] = []
+        # Check status of all remotes
+        with self.console.status("[dim]Checking remote status...", spinner="dots"):
+            for remote in remotes:
+                is_online, status_msg = self.check_connection(remote)
+                remote_status.append((remote, is_online, status_msg))
+        for idx, (remote, is_online, status_msg) in enumerate(remote_status, 1):
+            name = remote.get("name", "Unknown")
+            host = remote.get("host", "")
+            port = remote.get("port", 8000)
+            is_current = name == current_name
+            current_marker = " (current)" if is_current else ""
+            if is_online:
+                status_icon = "[green]✅ Online[/green]"
+            else:
+                status_icon = f"[red]⚠️ {status_msg}[/red]"
+            self.console.print(
+                f"   {idx}. {name:<20} {host}:{port:<6} {status_icon}{current_marker}"
+            )
+        self.console.print()
+        self.console.print("━" * 78)
+        self.console.print()
+        choice = Prompt.ask(
+            f"Select remote (1-{len(remotes)}) or B to go back", default="B"
+        ).strip()
+        if choice.lower() == "b":
+            return
+        if choice.isdigit() and 1 <= int(choice) <= len(remotes):
+            selected = remotes[int(choice) - 1]
+            self.current_remote = selected
+            self.config.set_last_used(selected["name"])
+            self.cached_stats = None  # Clear cache
+            # Try to fetch stats from new remote
+            with self.console.status("[bold]Connecting...", spinner="dots"):
+                stats = self.fetch_stats()
+                if stats is None:
+                    # Try with API key from .env for Local
+                    if selected["name"] == "Local" and not selected.get("api_key"):
+                        env_key = self.config.get_api_key_from_env()
+                        if env_key:
+                            self.current_remote["api_key"] = env_key
+                            stats = self.fetch_stats()
+            if stats is None:
+                self.show_api_key_prompt()
+    def show_api_key_prompt(self):
+        """Prompt for API key when authentication fails."""
+        self.console.print()
+        self.console.print(
+            "[yellow]Authentication required or connection failed.[/yellow]"
+        )
+        self.console.print(f"Error: {self.last_error}")
+        self.console.print()
+        api_key = Prompt.ask(
+            "Enter API key (or press Enter to cancel)", default=""
+        ).strip()
+        if api_key:
+            self.current_remote["api_key"] = api_key
+            # Update config with new API key
+            self.config.update_remote(self.current_remote["name"], api_key=api_key)
+            # Try again
+            with self.console.status("[bold]Reconnecting...", spinner="dots"):
+                if self.fetch_stats() is None:
+                    self.console.print(f"[red]Still failed: {self.last_error}[/red]")
+                    Prompt.ask("Press Enter to continue", default="")
+        else:
+            self.console.print("[dim]Cancelled.[/dim]")
+            Prompt.ask("Press Enter to continue", default="")
+    def show_manage_remotes_screen(self):
+        """Display remote management screen."""
+        while True:
+            clear_screen()
+            self.console.print("━" * 78)
+            self.console.print("[bold cyan]⚙️ Manage Remotes[/bold cyan]")
+            self.console.print("━" * 78)
+            self.console.print()
+            remotes = self.config.get_remotes()
+            table = Table(box=None, show_header=True, header_style="bold")
+            table.add_column("#", style="dim", width=3)
+            table.add_column("Name", min_width=16)
+            table.add_column("Host", min_width=24)
+            table.add_column("Port", justify="right", width=6)
+            table.add_column("Default", width=8)
+            for idx, remote in enumerate(remotes, 1):
+                is_default = "★" if remote.get("is_default") else ""
+                table.add_row(
+                    str(idx),
+                    remote.get("name", ""),
+                    remote.get("host", ""),
+                    str(remote.get("port", 8000)),
+                    is_default,
+                )
+            self.console.print(table)
+            self.console.print()
+            self.console.print("━" * 78)
+            self.console.print()
+            self.console.print("   A. Add new remote")
+            self.console.print("   E. Edit remote (enter number, e.g., E1)")
+            self.console.print("   D. Delete remote (enter number, e.g., D1)")
+            self.console.print("   S. Set default remote")
+            self.console.print("   B. Back")
+            self.console.print()
+            self.console.print("━" * 78)
+            choice = Prompt.ask("Select option", default="B").strip().upper()
+            if choice == "B":
+                break
+            elif choice == "A":
+                self._add_remote_dialog()
+            elif choice == "S":
+                self._set_default_dialog(remotes)
+            elif choice.startswith("E") and choice[1:].isdigit():
+                idx = int(choice[1:])
+                if 1 <= idx <= len(remotes):
+                    self._edit_remote_dialog(remotes[idx - 1])
+            elif choice.startswith("D") and choice[1:].isdigit():
+                idx = int(choice[1:])
+                if 1 <= idx <= len(remotes):
+                    self._delete_remote_dialog(remotes[idx - 1])
+    def _add_remote_dialog(self):
+        """Dialog to add a new remote."""
+        self.console.print()
+        self.console.print("[bold]Add New Remote[/bold]")
+        self.console.print()
+        name = Prompt.ask("Name", default="").strip()
+        if not name:
+            self.console.print("[dim]Cancelled.[/dim]")
+            return
+        host = Prompt.ask("Host", default="").strip()
+        if not host:
+            self.console.print("[dim]Cancelled.[/dim]")
+            return
+        port_str = Prompt.ask("Port", default="8000").strip()
+        try:
+            port = int(port_str)
+        except ValueError:
+            port = 8000
+        api_key = Prompt.ask("API Key (optional)", default="").strip() or None
+        if self.config.add_remote(name, host, port, api_key):
+            self.console.print(f"[green]Added remote '{name}'.[/green]")
+        else:
+            self.console.print(f"[red]Remote '{name}' already exists.[/red]")
+        Prompt.ask("Press Enter to continue", default="")
+    def _edit_remote_dialog(self, remote: Dict[str, Any]):
+        """Dialog to edit an existing remote."""
+        self.console.print()
+        self.console.print(f"[bold]Edit Remote: {remote['name']}[/bold]")
+        self.console.print("[dim]Press Enter to keep current value[/dim]")
+        self.console.print()
+        new_name = Prompt.ask("Name", default=remote["name"]).strip()
+        new_host = Prompt.ask("Host", default=remote.get("host", "")).strip()
+        new_port_str = Prompt.ask("Port", default=str(remote.get("port", 8000))).strip()
+        try:
+            new_port = int(new_port_str)
+        except ValueError:
+            new_port = remote.get("port", 8000)
+        current_key = remote.get("api_key", "") or ""
+        display_key = f"{current_key[:8]}..." if len(current_key) > 8 else current_key
+        new_key = Prompt.ask(
+            f"API Key (current: {display_key or 'none'})", default=""
+        ).strip()
+        updates = {}
+        if new_name != remote["name"]:
+            updates["new_name"] = new_name
+        if new_host != remote.get("host"):
+            updates["host"] = new_host
+        if new_port != remote.get("port"):
+            updates["port"] = new_port
+        if new_key:
+            updates["api_key"] = new_key
+        if updates:
+            if self.config.update_remote(remote["name"], **updates):
+                self.console.print("[green]Remote updated.[/green]")
+                # Update current_remote if it was the one being edited
+                if (
+                    self.current_remote
+                    and self.current_remote["name"] == remote["name"]
+                ):
+                    self.current_remote.update(updates)
+                    if "new_name" in updates:
+                        self.current_remote["name"] = updates["new_name"]
+            else:
+                self.console.print("[red]Failed to update remote.[/red]")
+        else:
+            self.console.print("[dim]No changes made.[/dim]")
+        Prompt.ask("Press Enter to continue", default="")
+    def _delete_remote_dialog(self, remote: Dict[str, Any]):
+        """Dialog to delete a remote."""
+        self.console.print()
+        self.console.print(f"[yellow]Delete remote '{remote['name']}'?[/yellow]")
+        confirm = Prompt.ask("Type 'yes' to confirm", default="no").strip().lower()
+        if confirm == "yes":
+            if self.config.delete_remote(remote["name"]):
+                self.console.print(f"[green]Deleted remote '{remote['name']}'.[/green]")
+                # If deleted current remote, switch to another
+                if (
+                    self.current_remote
+                    and self.current_remote["name"] == remote["name"]
+                ):
+                    self.current_remote = self.config.get_default_remote()
+                    self.cached_stats = None
+            else:
+                self.console.print(
+                    "[red]Cannot delete. At least one remote must exist.[/red]"
+                )
+        else:
+            self.console.print("[dim]Cancelled.[/dim]")
+        Prompt.ask("Press Enter to continue", default="")
+    def _set_default_dialog(self, remotes: List[Dict[str, Any]]):
+        """Dialog to set the default remote."""
+        self.console.print()
+        choice = Prompt.ask(f"Set default (1-{len(remotes)})", default="").strip()
+        if choice.isdigit() and 1 <= int(choice) <= len(remotes):
+            remote = remotes[int(choice) - 1]
+            if self.config.set_default_remote(remote["name"]):
+                self.console.print(
+                    f"[green]'{remote['name']}' is now the default.[/green]"
+                )
+            else:
+                self.console.print("[red]Failed to set default.[/red]")
+            Prompt.ask("Press Enter to continue", default="")
+    # =========================================================================
+    # MAIN LOOP
+    # =========================================================================
+    def run(self):
+        """Main viewer loop."""
+        # Get initial remote
+        self.current_remote = self.config.get_last_used_remote()
+        if not self.current_remote:
+            self.console.print("[red]No remotes configured.[/red]")
+            return
+        # For Local remote, try to get API key from .env if not set
+        if self.current_remote["name"] == "Local" and not self.current_remote.get(
+            "api_key"
+        ):
+            env_key = self.config.get_api_key_from_env()
+            if env_key:
+                self.current_remote["api_key"] = env_key
+        # Initial fetch
+        with self.console.status("[bold]Connecting to proxy...", spinner="dots"):
+            stats = self.fetch_stats()
+        if stats is None:
+            self.show_connection_error()
+            return
+        # Main loop
+        while self.running:
+            self.show_summary_screen()
+def run_quota_viewer():
+    """Entry point for the quota viewer."""
+    viewer = QuotaViewer()
+    viewer.run()
+if __name__ == "__main__":
+    run_quota_viewer()

src/proxy_app/quota_viewer_config.py ADDED Viewed

	@@ -0,0 +1,288 @@

+"""
+Configuration management for the Quota Viewer.
+Handles remote proxy configurations including:
+- Multiple remote proxies (local, VPS, etc.)
+- API key storage per remote
+- Default and last-used remote tracking
+"""
+import json
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+class QuotaViewerConfig:
+    """Manages quota viewer configuration including remote proxies."""
+    def __init__(self, config_path: Optional[Path] = None):
+        """
+        Initialize the config manager.
+        Args:
+            config_path: Path to config file. Defaults to quota_viewer_config.json
+                        in the current directory or EXE directory.
+        """
+        if config_path is None:
+            import sys
+            if getattr(sys, "frozen", False):
+                base_dir = Path(sys.executable).parent
+            else:
+                base_dir = Path.cwd()
+            config_path = base_dir / "quota_viewer_config.json"
+        self.config_path = config_path
+        self.config = self._load()
+    def _load(self) -> Dict[str, Any]:
+        """Load config from file or return defaults."""
+        if self.config_path.exists():
+            try:
+                with open(self.config_path, "r", encoding="utf-8") as f:
+                    config = json.load(f)
+                # Ensure required fields exist
+                if "remotes" not in config:
+                    config["remotes"] = []
+                return config
+            except (json.JSONDecodeError, IOError):
+                pass
+        # Return default config with Local remote
+        return {
+            "remotes": [
+                {
+                    "name": "Local",
+                    "host": "127.0.0.1",
+                    "port": 8000,
+                    "api_key": None,
+                    "is_default": True,
+                }
+            ],
+            "last_used": "Local",
+        }
+    def _save(self) -> bool:
+        """Save config to file. Returns True on success."""
+        try:
+            with open(self.config_path, "w", encoding="utf-8") as f:
+                json.dump(self.config, f, indent=2)
+            return True
+        except IOError:
+            return False
+    def get_remotes(self) -> List[Dict[str, Any]]:
+        """Get list of all configured remotes."""
+        return self.config.get("remotes", [])
+    def get_remote_by_name(self, name: str) -> Optional[Dict[str, Any]]:
+        """Get a remote by name."""
+        for remote in self.config.get("remotes", []):
+            if remote["name"] == name:
+                return remote
+        return None
+    def get_default_remote(self) -> Optional[Dict[str, Any]]:
+        """Get the default remote."""
+        for remote in self.config.get("remotes", []):
+            if remote.get("is_default"):
+                return remote
+        # Fallback to first remote
+        remotes = self.config.get("remotes", [])
+        return remotes[0] if remotes else None
+    def get_last_used_remote(self) -> Optional[Dict[str, Any]]:
+        """Get the last used remote, or default if not set."""
+        last_used_name = self.config.get("last_used")
+        if last_used_name:
+            remote = self.get_remote_by_name(last_used_name)
+            if remote:
+                return remote
+        return self.get_default_remote()
+    def set_last_used(self, name: str) -> bool:
+        """Set the last used remote name."""
+        self.config["last_used"] = name
+        return self._save()
+    def add_remote(
+        self,
+        name: str,
+        host: str,
+        port: int = 8000,
+        api_key: Optional[str] = None,
+        is_default: bool = False,
+    ) -> bool:
+        """
+        Add a new remote configuration.
+        Args:
+            name: Display name for the remote
+            host: Hostname or IP address
+            port: Port number (default 8000)
+            api_key: Optional API key for authentication
+            is_default: Whether this should be the default remote
+        Returns:
+            True on success, False if name already exists
+        """
+        # Check for duplicate name
+        if self.get_remote_by_name(name):
+            return False
+        # If setting as default, clear default from others
+        if is_default:
+            for remote in self.config.get("remotes", []):
+                remote["is_default"] = False
+        remote = {
+            "name": name,
+            "host": host,
+            "port": port,
+            "api_key": api_key,
+            "is_default": is_default,
+        }
+        self.config.setdefault("remotes", []).append(remote)
+        return self._save()
+    def update_remote(self, name: str, **kwargs) -> bool:
+        """
+        Update an existing remote configuration.
+        Args:
+            name: Name of the remote to update
+            **kwargs: Fields to update (host, port, api_key, is_default, new_name)
+        Returns:
+            True on success, False if remote not found
+        """
+        remote = self.get_remote_by_name(name)
+        if not remote:
+            return False
+        # Handle rename
+        if "new_name" in kwargs:
+            new_name = kwargs.pop("new_name")
+            if new_name != name and self.get_remote_by_name(new_name):
+                return False  # New name already exists
+            remote["name"] = new_name
+            # Update last_used if it was this remote
+            if self.config.get("last_used") == name:
+                self.config["last_used"] = new_name
+        # If setting as default, clear default from others
+        if kwargs.get("is_default"):
+            for r in self.config.get("remotes", []):
+                r["is_default"] = False
+        # Update other fields
+        for key in ("host", "port", "api_key", "is_default"):
+            if key in kwargs:
+                remote[key] = kwargs[key]
+        return self._save()
+    def delete_remote(self, name: str) -> bool:
+        """
+        Delete a remote configuration.
+        Args:
+            name: Name of the remote to delete
+        Returns:
+            True on success, False if remote not found or is the only one
+        """
+        remotes = self.config.get("remotes", [])
+        if len(remotes) <= 1:
+            return False  # Don't delete the last remote
+        for i, remote in enumerate(remotes):
+            if remote["name"] == name:
+                remotes.pop(i)
+                # Update last_used if it was this remote
+                if self.config.get("last_used") == name:
+                    self.config["last_used"] = remotes[0]["name"] if remotes else None
+                return self._save()
+        return False
+    def set_default_remote(self, name: str) -> bool:
+        """Set a remote as the default."""
+        remote = self.get_remote_by_name(name)
+        if not remote:
+            return False
+        # Clear default from all remotes
+        for r in self.config.get("remotes", []):
+            r["is_default"] = False
+        # Set new default
+        remote["is_default"] = True
+        return self._save()
+    def sync_with_launcher_config(self) -> None:
+        """
+        Sync the Local remote with launcher_config.json if it exists.
+        This ensures the Local remote always matches the launcher settings.
+        """
+        import sys
+        if getattr(sys, "frozen", False):
+            base_dir = Path(sys.executable).parent
+        else:
+            base_dir = Path.cwd()
+        launcher_config_path = base_dir / "launcher_config.json"
+        if launcher_config_path.exists():
+            try:
+                with open(launcher_config_path, "r", encoding="utf-8") as f:
+                    launcher_config = json.load(f)
+                host = launcher_config.get("host", "127.0.0.1")
+                port = launcher_config.get("port", 8000)
+                # Update Local remote
+                local_remote = self.get_remote_by_name("Local")
+                if local_remote:
+                    local_remote["host"] = host
+                    local_remote["port"] = port
+                    self._save()
+                else:
+                    # Create Local remote if it doesn't exist
+                    self.add_remote("Local", host, port, is_default=True)
+            except (json.JSONDecodeError, IOError):
+                pass
+    def get_api_key_from_env(self) -> Optional[str]:
+        """
+        Get PROXY_API_KEY from .env file for Local remote.
+        Returns:
+            API key string or None
+        """
+        import sys
+        if getattr(sys, "frozen", False):
+            base_dir = Path(sys.executable).parent
+        else:
+            base_dir = Path.cwd()
+        env_path = base_dir / ".env"
+        if not env_path.exists():
+            return None
+        try:
+            with open(env_path, "r", encoding="utf-8") as f:
+                for line in f:
+                    line = line.strip()
+                    if line.startswith("PROXY_API_KEY="):
+                        value = line.split("=", 1)[1].strip()
+                        # Remove quotes if present
+                        if value and value[0] in ('"', "'") and value[-1] == value[0]:
+                            value = value[1:-1]
+                        return value if value else None
+        except IOError:
+            pass
+        return None

src/rotator_library/client.py CHANGED Viewed

@@ -2612,3 +2612,280 @@ class RotatingClient:
             for models in all_provider_models.values():
                 flat_models.extend(models)
             return flat_models

             for models in all_provider_models.values():
                 flat_models.extend(models)
             return flat_models
+    async def get_quota_stats(
+        self,
+        provider_filter: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Get quota and usage stats for all credentials.
+        This returns cached/disk data aggregated by provider.
+        For provider-specific quota info (e.g., Antigravity quota groups),
+        it enriches the data from provider plugins.
+        Args:
+            provider_filter: If provided, only return stats for this provider
+        Returns:
+            Complete stats dict ready for the /v1/quota-stats endpoint
+        """
+        # Get base stats from usage manager
+        stats = await self.usage_manager.get_stats_for_endpoint(provider_filter)
+        # Enrich with provider-specific quota data
+        for provider, prov_stats in stats.get("providers", {}).items():
+            provider_class = self._provider_plugins.get(provider)
+            if not provider_class:
+                continue
+            # Get or create provider instance
+            if provider not in self._provider_instances:
+                self._provider_instances[provider] = provider_class()
+            provider_instance = self._provider_instances[provider]
+            # Check if provider has quota tracking (like Antigravity)
+            if hasattr(provider_instance, "_get_effective_quota_groups"):
+                # Add quota group summary
+                quota_groups = provider_instance._get_effective_quota_groups()
+                prov_stats["quota_groups"] = {}
+                for group_name, group_models in quota_groups.items():
+                    group_stats = {
+                        "models": group_models,
+                        "credentials_total": 0,
+                        "credentials_exhausted": 0,
+                        "avg_remaining_pct": 0,
+                        "total_remaining_pcts": [],
+                    }
+                    # Calculate per-credential quota for this group
+                    for cred in prov_stats.get("credentials", []):
+                        models_data = cred.get("models", {})
+                        group_stats["credentials_total"] += 1
+                        # Find any model from this group
+                        for model in group_models:
+                            # Try with and without provider prefix
+                            prefixed_model = f"{provider}/{model}"
+                            model_stats = models_data.get(
+                                prefixed_model
+                            ) or models_data.get(model)
+                            if model_stats:
+                                baseline = model_stats.get(
+                                    "baseline_remaining_fraction"
+                                )
+                                if baseline is not None:
+                                    remaining_pct = int(baseline * 100)
+                                    group_stats["total_remaining_pcts"].append(
+                                        remaining_pct
+                                    )
+                                    if baseline <= 0:
+                                        group_stats["credentials_exhausted"] += 1
+                                break
+                    # Calculate average remaining percentage
+                    if group_stats["total_remaining_pcts"]:
+                        group_stats["avg_remaining_pct"] = int(
+                            sum(group_stats["total_remaining_pcts"])
+                            / len(group_stats["total_remaining_pcts"])
+                        )
+                    del group_stats["total_remaining_pcts"]
+                    prov_stats["quota_groups"][group_name] = group_stats
+                # Also enrich each credential with formatted quota group info
+                for cred in prov_stats.get("credentials", []):
+                    cred["model_groups"] = {}
+                    models_data = cred.get("models", {})
+                    for group_name, group_models in quota_groups.items():
+                        # Find representative model from this group
+                        for model in group_models:
+                            prefixed_model = f"{provider}/{model}"
+                            model_stats = models_data.get(
+                                prefixed_model
+                            ) or models_data.get(model)
+                            if model_stats:
+                                baseline = model_stats.get(
+                                    "baseline_remaining_fraction"
+                                )
+                                max_req = model_stats.get("quota_max_requests")
+                                req_count = model_stats.get("request_count", 0)
+                                reset_ts = model_stats.get("quota_reset_ts")
+                                remaining_pct = (
+                                    int(baseline * 100)
+                                    if baseline is not None
+                                    else None
+                                )
+                                is_exhausted = baseline is not None and baseline <= 0
+                                # Format reset time
+                                reset_iso = None
+                                if reset_ts:
+                                    try:
+                                        from datetime import datetime, timezone
+                                        reset_iso = datetime.fromtimestamp(
+                                            reset_ts, tz=timezone.utc
+                                        ).isoformat()
+                                    except (ValueError, OSError):
+                                        pass
+                                cred["model_groups"][group_name] = {
+                                    "remaining_pct": remaining_pct,
+                                    "requests_used": req_count,
+                                    "requests_max": max_req,
+                                    "display": f"{req_count}/{max_req}"
+                                    if max_req
+                                    else f"{req_count}/?",
+                                    "is_exhausted": is_exhausted,
+                                    "reset_time_iso": reset_iso,
+                                    "models": group_models,
+                                    "confidence": self._get_baseline_confidence(
+                                        model_stats
+                                    ),
+                                }
+                                break
+                    # Try to get email from provider's cache
+                    cred_path = cred.get("full_path", "")
+                    if hasattr(provider_instance, "project_tier_cache"):
+                        tier = provider_instance.project_tier_cache.get(cred_path)
+                        if tier:
+                            cred["tier"] = tier
+        return stats
+    def _get_baseline_confidence(self, model_stats: Dict) -> str:
+        """
+        Determine confidence level based on baseline age.
+        Args:
+            model_stats: Model statistics dict with baseline_fetched_at
+        Returns:
+            "high" | "medium" | "low"
+        """
+        baseline_fetched_at = model_stats.get("baseline_fetched_at")
+        if not baseline_fetched_at:
+            return "low"
+        age_seconds = time.time() - baseline_fetched_at
+        if age_seconds < 300:  # 5 minutes
+            return "high"
+        elif age_seconds < 1800:  # 30 minutes
+            return "medium"
+        return "low"
+    async def reload_usage_from_disk(self) -> None:
+        """
+        Force reload usage data from disk.
+        Useful when wanting fresh stats without making external API calls.
+        """
+        await self.usage_manager.reload_from_disk()
+    async def force_refresh_quota(
+        self,
+        provider: Optional[str] = None,
+        credential: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Force refresh quota from external API.
+        For Antigravity, this fetches live quota data from the API.
+        For other providers, this is a no-op (just reloads from disk).
+        Args:
+            provider: If specified, only refresh this provider
+            credential: If specified, only refresh this specific credential
+        Returns:
+            Refresh result dict with success/failure info
+        """
+        result = {
+            "action": "force_refresh",
+            "scope": "credential"
+            if credential
+            else ("provider" if provider else "all"),
+            "provider": provider,
+            "credential": credential,
+            "credentials_refreshed": 0,
+            "success_count": 0,
+            "failed_count": 0,
+            "duration_ms": 0,
+            "errors": [],
+        }
+        start_time = time.time()
+        # Determine which providers to refresh
+        if provider:
+            providers_to_refresh = (
+                [provider] if provider in self.all_credentials else []
+            )
+        else:
+            providers_to_refresh = list(self.all_credentials.keys())
+        for prov in providers_to_refresh:
+            provider_class = self._provider_plugins.get(prov)
+            if not provider_class:
+                continue
+            # Get or create provider instance
+            if prov not in self._provider_instances:
+                self._provider_instances[prov] = provider_class()
+            provider_instance = self._provider_instances[prov]
+            # Check if provider supports quota refresh (like Antigravity)
+            if hasattr(provider_instance, "fetch_initial_baselines"):
+                # Get credentials to refresh
+                if credential:
+                    # Find full path for this credential
+                    creds_to_refresh = []
+                    for cred_path in self.all_credentials.get(prov, []):
+                        if cred_path.endswith(credential) or cred_path == credential:
+                            creds_to_refresh.append(cred_path)
+                            break
+                else:
+                    creds_to_refresh = self.all_credentials.get(prov, [])
+                if not creds_to_refresh:
+                    continue
+                try:
+                    # Fetch live quota from API for ALL specified credentials
+                    quota_results = await provider_instance.fetch_initial_baselines(
+                        creds_to_refresh
+                    )
+                    # Store baselines in usage manager
+                    if hasattr(provider_instance, "_store_baselines_to_usage_manager"):
+                        stored = (
+                            await provider_instance._store_baselines_to_usage_manager(
+                                quota_results, self.usage_manager
+                            )
+                        )
+                        result["success_count"] += stored
+                    result["credentials_refreshed"] += len(creds_to_refresh)
+                    # Count failures
+                    for cred_path, data in quota_results.items():
+                        if data.get("status") != "success":
+                            result["failed_count"] += 1
+                            result["errors"].append(
+                                f"{Path(cred_path).name}: {data.get('error', 'Unknown error')}"
+                            )
+                except Exception as e:
+                    lib_logger.error(f"Failed to refresh quota for {prov}: {e}")
+                    result["errors"].append(f"{prov}: {str(e)}")
+                    result["failed_count"] += len(creds_to_refresh)
+        result["duration_ms"] = int((time.time() - start_time) * 1000)
+        return result

src/rotator_library/providers/antigravity_provider.py CHANGED Viewed

@@ -103,7 +103,7 @@ DEFAULT_MAX_OUTPUT_TOKENS = 64000
 # Empty response retry configuration
 # When Antigravity returns an empty response (no content, no tool calls),
 # automatically retry up to this many attempts before giving up (minimum 1)
-EMPTY_RESPONSE_MAX_ATTEMPTS = max(1, _env_int("ANTIGRAVITY_EMPTY_RESPONSE_ATTEMPTS", 4))
 EMPTY_RESPONSE_RETRY_DELAY = _env_int("ANTIGRAVITY_EMPTY_RESPONSE_RETRY_DELAY", 2)
 # Model alias mappings (internal ↔ public)

 # Empty response retry configuration
 # When Antigravity returns an empty response (no content, no tool calls),
 # automatically retry up to this many attempts before giving up (minimum 1)
+EMPTY_RESPONSE_MAX_ATTEMPTS = max(1, _env_int("ANTIGRAVITY_EMPTY_RESPONSE_ATTEMPTS", 6))
 EMPTY_RESPONSE_RETRY_DELAY = _env_int("ANTIGRAVITY_EMPTY_RESPONSE_RETRY_DELAY", 2)
 # Model alias mappings (internal ↔ public)

src/rotator_library/usage_manager.py CHANGED Viewed

@@ -1993,3 +1993,479 @@ class UsageManager:
         """
         # Disabled - see docstring above
         pass

         """
         # Disabled - see docstring above
         pass
+    async def get_stats_for_endpoint(
+        self,
+        provider_filter: Optional[str] = None,
+        include_global: bool = True,
+    ) -> Dict[str, Any]:
+        """
+        Get usage stats formatted for the /v1/quota-stats endpoint.
+        Aggregates data from key_usage.json grouped by provider.
+        Includes both current period stats and global (lifetime) stats.
+        Args:
+            provider_filter: If provided, only return stats for this provider
+            include_global: If True, include global/lifetime stats alongside current
+        Returns:
+            {
+                "providers": {
+                    "provider_name": {
+                        "credential_count": int,
+                        "active_count": int,
+                        "on_cooldown_count": int,
+                        "total_requests": int,
+                        "tokens": {
+                            "input_cached": int,
+                            "input_uncached": int,
+                            "input_cache_pct": float,
+                            "output": int
+                        },
+                        "approx_cost": float | None,
+                        "credentials": [...],
+                        "global": {...}  # If include_global is True
+                    }
+                },
+                "summary": {...},
+                "global_summary": {...},  # If include_global is True
+                "timestamp": float
+            }
+        """
+        await self._lazy_init()
+        now_ts = time.time()
+        providers: Dict[str, Dict[str, Any]] = {}
+        # Track global stats separately
+        global_providers: Dict[str, Dict[str, Any]] = {}
+        async with self._data_lock:
+            if not self._usage_data:
+                return {
+                    "providers": {},
+                    "summary": {
+                        "total_providers": 0,
+                        "total_credentials": 0,
+                        "active_credentials": 0,
+                        "exhausted_credentials": 0,
+                        "total_requests": 0,
+                        "tokens": {
+                            "input_cached": 0,
+                            "input_uncached": 0,
+                            "input_cache_pct": 0,
+                            "output": 0,
+                        },
+                        "approx_total_cost": 0.0,
+                    },
+                    "global_summary": {
+                        "total_providers": 0,
+                        "total_credentials": 0,
+                        "total_requests": 0,
+                        "tokens": {
+                            "input_cached": 0,
+                            "input_uncached": 0,
+                            "input_cache_pct": 0,
+                            "output": 0,
+                        },
+                        "approx_total_cost": 0.0,
+                    },
+                    "data_source": "cache",
+                    "timestamp": now_ts,
+                }
+            for credential, cred_data in self._usage_data.items():
+                # Extract provider from credential path
+                provider = self._get_provider_from_credential(credential)
+                if not provider:
+                    continue
+                # Apply filter if specified
+                if provider_filter and provider != provider_filter:
+                    continue
+                # Initialize provider entry
+                if provider not in providers:
+                    providers[provider] = {
+                        "credential_count": 0,
+                        "active_count": 0,
+                        "on_cooldown_count": 0,
+                        "exhausted_count": 0,
+                        "total_requests": 0,
+                        "tokens": {
+                            "input_cached": 0,
+                            "input_uncached": 0,
+                            "input_cache_pct": 0,
+                            "output": 0,
+                        },
+                        "approx_cost": 0.0,
+                        "credentials": [],
+                    }
+                    global_providers[provider] = {
+                        "total_requests": 0,
+                        "tokens": {
+                            "input_cached": 0,
+                            "input_uncached": 0,
+                            "input_cache_pct": 0,
+                            "output": 0,
+                        },
+                        "approx_cost": 0.0,
+                    }
+                prov_stats = providers[provider]
+                prov_stats["credential_count"] += 1
+                # Determine credential status and cooldowns
+                key_cooldown = cred_data.get("key_cooldown_until", 0) or 0
+                model_cooldowns = cred_data.get("model_cooldowns", {})
+                # Build active cooldowns with remaining time
+                active_cooldowns = {}
+                for model, cooldown_ts in model_cooldowns.items():
+                    if cooldown_ts > now_ts:
+                        remaining_seconds = int(cooldown_ts - now_ts)
+                        active_cooldowns[model] = {
+                            "until_ts": cooldown_ts,
+                            "remaining_seconds": remaining_seconds,
+                        }
+                key_cooldown_remaining = None
+                if key_cooldown > now_ts:
+                    key_cooldown_remaining = int(key_cooldown - now_ts)
+                has_active_cooldown = key_cooldown > now_ts or len(active_cooldowns) > 0
+                # Check if exhausted (all quota groups exhausted for Antigravity)
+                is_exhausted = False
+                models_data = cred_data.get("models", {})
+                if models_data:
+                    # Check if any model has remaining quota
+                    all_exhausted = True
+                    for model_stats in models_data.values():
+                        if isinstance(model_stats, dict):
+                            baseline = model_stats.get("baseline_remaining_fraction")
+                            if baseline is None or baseline > 0:
+                                all_exhausted = False
+                                break
+                    if all_exhausted and len(models_data) > 0:
+                        is_exhausted = True
+                if is_exhausted:
+                    prov_stats["exhausted_count"] += 1
+                    status = "exhausted"
+                elif has_active_cooldown:
+                    prov_stats["on_cooldown_count"] += 1
+                    status = "cooldown"
+                else:
+                    prov_stats["active_count"] += 1
+                    status = "active"
+                # Aggregate token stats (current period)
+                cred_tokens = {
+                    "input_cached": 0,
+                    "input_uncached": 0,
+                    "output": 0,
+                }
+                cred_requests = 0
+                cred_cost = 0.0
+                # Aggregate global token stats
+                cred_global_tokens = {
+                    "input_cached": 0,
+                    "input_uncached": 0,
+                    "output": 0,
+                }
+                cred_global_requests = 0
+                cred_global_cost = 0.0
+                # Handle per-model structure (current period)
+                if models_data:
+                    for model_name, model_stats in models_data.items():
+                        if not isinstance(model_stats, dict):
+                            continue
+                        # Prefer request_count if available and non-zero, else fall back to success+failure
+                        req_count = model_stats.get("request_count", 0)
+                        if req_count > 0:
+                            cred_requests += req_count
+                        else:
+                            cred_requests += model_stats.get("success_count", 0)
+                            cred_requests += model_stats.get("failure_count", 0)
+                        # Token stats - track cached separately
+                        cred_tokens["input_cached"] += model_stats.get(
+                            "prompt_tokens_cached", 0
+                        )
+                        cred_tokens["input_uncached"] += model_stats.get(
+                            "prompt_tokens", 0
+                        )
+                        cred_tokens["output"] += model_stats.get("completion_tokens", 0)
+                        cred_cost += model_stats.get("approx_cost", 0.0)
+                # Handle legacy daily structure
+                daily_data = cred_data.get("daily", {})
+                daily_models = daily_data.get("models", {})
+                for model_name, model_stats in daily_models.items():
+                    if not isinstance(model_stats, dict):
+                        continue
+                    cred_requests += model_stats.get("success_count", 0)
+                    cred_tokens["input_cached"] += model_stats.get(
+                        "prompt_tokens_cached", 0
+                    )
+                    cred_tokens["input_uncached"] += model_stats.get("prompt_tokens", 0)
+                    cred_tokens["output"] += model_stats.get("completion_tokens", 0)
+                    cred_cost += model_stats.get("approx_cost", 0.0)
+                # Handle global stats
+                global_data = cred_data.get("global", {})
+                global_models = global_data.get("models", {})
+                for model_name, model_stats in global_models.items():
+                    if not isinstance(model_stats, dict):
+                        continue
+                    cred_global_requests += model_stats.get("success_count", 0)
+                    cred_global_tokens["input_cached"] += model_stats.get(
+                        "prompt_tokens_cached", 0
+                    )
+                    cred_global_tokens["input_uncached"] += model_stats.get(
+                        "prompt_tokens", 0
+                    )
+                    cred_global_tokens["output"] += model_stats.get(
+                        "completion_tokens", 0
+                    )
+                    cred_global_cost += model_stats.get("approx_cost", 0.0)
+                # Add current period stats to global totals
+                cred_global_requests += cred_requests
+                cred_global_tokens["input_cached"] += cred_tokens["input_cached"]
+                cred_global_tokens["input_uncached"] += cred_tokens["input_uncached"]
+                cred_global_tokens["output"] += cred_tokens["output"]
+                cred_global_cost += cred_cost
+                # Build credential entry
+                # Mask credential identifier for display
+                if credential.startswith("env://"):
+                    identifier = credential
+                else:
+                    identifier = Path(credential).name
+                cred_entry = {
+                    "identifier": identifier,
+                    "full_path": credential,
+                    "status": status,
+                    "last_used_ts": cred_data.get("last_used_ts"),
+                    "requests": cred_requests,
+                    "tokens": cred_tokens,
+                    "approx_cost": cred_cost if cred_cost > 0 else None,
+                }
+                # Add cooldown info
+                if key_cooldown_remaining is not None:
+                    cred_entry["key_cooldown_remaining"] = key_cooldown_remaining
+                if active_cooldowns:
+                    cred_entry["model_cooldowns"] = active_cooldowns
+                # Add global stats for this credential
+                if include_global:
+                    # Calculate global cache percentage
+                    global_total_input = (
+                        cred_global_tokens["input_cached"]
+                        + cred_global_tokens["input_uncached"]
+                    )
+                    global_cache_pct = (
+                        round(
+                            cred_global_tokens["input_cached"]
+                            / global_total_input
+                            * 100,
+                            1,
+                        )
+                        if global_total_input > 0
+                        else 0
+                    )
+                    cred_entry["global"] = {
+                        "requests": cred_global_requests,
+                        "tokens": {
+                            "input_cached": cred_global_tokens["input_cached"],
+                            "input_uncached": cred_global_tokens["input_uncached"],
+                            "input_cache_pct": global_cache_pct,
+                            "output": cred_global_tokens["output"],
+                        },
+                        "approx_cost": cred_global_cost
+                        if cred_global_cost > 0
+                        else None,
+                    }
+                # Add model-specific data for providers with per-model tracking
+                if models_data:
+                    cred_entry["models"] = {}
+                    for model_name, model_stats in models_data.items():
+                        if not isinstance(model_stats, dict):
+                            continue
+                        cred_entry["models"][model_name] = {
+                            "requests": model_stats.get("success_count", 0)
+                            + model_stats.get("failure_count", 0),
+                            "request_count": model_stats.get("request_count", 0),
+                            "success_count": model_stats.get("success_count", 0),
+                            "failure_count": model_stats.get("failure_count", 0),
+                            "prompt_tokens": model_stats.get("prompt_tokens", 0),
+                            "prompt_tokens_cached": model_stats.get(
+                                "prompt_tokens_cached", 0
+                            ),
+                            "completion_tokens": model_stats.get(
+                                "completion_tokens", 0
+                            ),
+                            "approx_cost": model_stats.get("approx_cost", 0.0),
+                            "window_start_ts": model_stats.get("window_start_ts"),
+                            "quota_reset_ts": model_stats.get("quota_reset_ts"),
+                            # Quota baseline fields (Antigravity-specific)
+                            "baseline_remaining_fraction": model_stats.get(
+                                "baseline_remaining_fraction"
+                            ),
+                            "baseline_fetched_at": model_stats.get(
+                                "baseline_fetched_at"
+                            ),
+                            "quota_max_requests": model_stats.get("quota_max_requests"),
+                            "quota_display": model_stats.get("quota_display"),
+                        }
+                prov_stats["credentials"].append(cred_entry)
+                # Aggregate to provider totals (current period)
+                prov_stats["total_requests"] += cred_requests
+                prov_stats["tokens"]["input_cached"] += cred_tokens["input_cached"]
+                prov_stats["tokens"]["input_uncached"] += cred_tokens["input_uncached"]
+                prov_stats["tokens"]["output"] += cred_tokens["output"]
+                if cred_cost > 0:
+                    prov_stats["approx_cost"] += cred_cost
+                # Aggregate to global provider totals
+                global_providers[provider]["total_requests"] += cred_global_requests
+                global_providers[provider]["tokens"]["input_cached"] += (
+                    cred_global_tokens["input_cached"]
+                )
+                global_providers[provider]["tokens"]["input_uncached"] += (
+                    cred_global_tokens["input_uncached"]
+                )
+                global_providers[provider]["tokens"]["output"] += cred_global_tokens[
+                    "output"
+                ]
+                global_providers[provider]["approx_cost"] += cred_global_cost
+        # Calculate cache percentages for each provider
+        for provider, prov_stats in providers.items():
+            total_input = (
+                prov_stats["tokens"]["input_cached"]
+                + prov_stats["tokens"]["input_uncached"]
+            )
+            if total_input > 0:
+                prov_stats["tokens"]["input_cache_pct"] = round(
+                    prov_stats["tokens"]["input_cached"] / total_input * 100, 1
+                )
+            # Set cost to None if 0
+            if prov_stats["approx_cost"] == 0:
+                prov_stats["approx_cost"] = None
+            # Calculate global cache percentages
+            if include_global and provider in global_providers:
+                gp = global_providers[provider]
+                global_total = (
+                    gp["tokens"]["input_cached"] + gp["tokens"]["input_uncached"]
+                )
+                if global_total > 0:
+                    gp["tokens"]["input_cache_pct"] = round(
+                        gp["tokens"]["input_cached"] / global_total * 100, 1
+                    )
+                if gp["approx_cost"] == 0:
+                    gp["approx_cost"] = None
+                prov_stats["global"] = gp
+        # Build summary (current period)
+        total_creds = sum(p["credential_count"] for p in providers.values())
+        active_creds = sum(p["active_count"] for p in providers.values())
+        exhausted_creds = sum(p["exhausted_count"] for p in providers.values())
+        total_requests = sum(p["total_requests"] for p in providers.values())
+        total_input_cached = sum(
+            p["tokens"]["input_cached"] for p in providers.values()
+        )
+        total_input_uncached = sum(
+            p["tokens"]["input_uncached"] for p in providers.values()
+        )
+        total_output = sum(p["tokens"]["output"] for p in providers.values())
+        total_cost = sum(p["approx_cost"] or 0 for p in providers.values())
+        total_input = total_input_cached + total_input_uncached
+        input_cache_pct = (
+            round(total_input_cached / total_input * 100, 1) if total_input > 0 else 0
+        )
+        result = {
+            "providers": providers,
+            "summary": {
+                "total_providers": len(providers),
+                "total_credentials": total_creds,
+                "active_credentials": active_creds,
+                "exhausted_credentials": exhausted_creds,
+                "total_requests": total_requests,
+                "tokens": {
+                    "input_cached": total_input_cached,
+                    "input_uncached": total_input_uncached,
+                    "input_cache_pct": input_cache_pct,
+                    "output": total_output,
+                },
+                "approx_total_cost": total_cost if total_cost > 0 else None,
+            },
+            "data_source": "cache",
+            "timestamp": now_ts,
+        }
+        # Build global summary
+        if include_global:
+            global_total_requests = sum(
+                gp["total_requests"] for gp in global_providers.values()
+            )
+            global_total_input_cached = sum(
+                gp["tokens"]["input_cached"] for gp in global_providers.values()
+            )
+            global_total_input_uncached = sum(
+                gp["tokens"]["input_uncached"] for gp in global_providers.values()
+            )
+            global_total_output = sum(
+                gp["tokens"]["output"] for gp in global_providers.values()
+            )
+            global_total_cost = sum(
+                gp["approx_cost"] or 0 for gp in global_providers.values()
+            )
+            global_total_input = global_total_input_cached + global_total_input_uncached
+            global_input_cache_pct = (
+                round(global_total_input_cached / global_total_input * 100, 1)
+                if global_total_input > 0
+                else 0
+            )
+            result["global_summary"] = {
+                "total_providers": len(global_providers),
+                "total_credentials": total_creds,
+                "total_requests": global_total_requests,
+                "tokens": {
+                    "input_cached": global_total_input_cached,
+                    "input_uncached": global_total_input_uncached,
+                    "input_cache_pct": global_input_cache_pct,
+                    "output": global_total_output,
+                },
+                "approx_total_cost": global_total_cost
+                if global_total_cost > 0
+                else None,
+            }
+        return result
+    async def reload_from_disk(self) -> None:
+        """
+        Force reload usage data from disk.
+        Useful when another process may have updated the file.
+        """
+        async with self._init_lock:
+            self._initialized.clear()
+            await self._load_usage()
+            await self._reset_daily_stats_if_needed()
+            self._initialized.set()