Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Dec 16, 2025

Commit

06b3f7d

1 Parent(s): 1af1879

feat(quota): restructure quota aggregation with cumulative counts and tier registry

Remove averaged percentage calculations in favor of summing actual request counts across all credentials in a group. Introduce per-tier credential registries that distinguish active from exhausted allocations. Simplify viewer presentation to show absolute usage totals alongside compact tier status markers, deprecating the previous exhaustion counting approach.

Files changed (2) hide show

src/proxy_app/quota_viewer.py +42 -16
src/rotator_library/client.py +60 -9

src/proxy_app/quota_viewer.py CHANGED Viewed

@@ -417,25 +417,51 @@ class QuotaViewer:
                 if quota_groups:
                     quota_lines = []
                     for group_name, group_stats in quota_groups.items():
-                        avg_pct = group_stats.get("avg_remaining_pct", 0)
-                        exhausted = group_stats.get("credentials_exhausted", 0)
-                        total = group_stats.get("credentials_total", 0)
-                        # Determine color based on remaining
-                        if exhausted > 0:
-                            color = "red"
-                            status = f"({exhausted}/{total} exhausted)"
-                        elif avg_pct < 20:
-                            color = "yellow"
-                            status = ""
                         else:
-                            color = "green"
-                            status = ""
-                        bar = create_progress_bar(avg_pct)
-                        display_name = group_name[:10]
                         quota_lines.append(
-                            f"[{color}]{display_name}: {avg_pct}% {bar}[/{color}] {status}"
                         )
                     # First line goes in the main row

                 if quota_groups:
                     quota_lines = []
                     for group_name, group_stats in quota_groups.items():
+                        # Use total requests for global view
+                        total_used = group_stats.get("total_requests_used", 0)
+                        total_max = group_stats.get("total_requests_max", 0)
+                        total_pct = group_stats.get("total_remaining_pct")
+                        tiers = group_stats.get("tiers", {})
+                        # Format tier info: "5(15)f/2s" = 5 active out of 15 free, 2 standard all active
+                        tier_parts = []
+                        for tier_name, tier_info in sorted(tiers.items()):
+                            if tier_name == "unknown":
+                                continue  # Skip unknown tiers in display
+                            total_t = tier_info.get("total", 0)
+                            active_t = tier_info.get("active", 0)
+                            # Use first letter: standard-tier -> s, free-tier -> f
+                            short = tier_name.replace("-tier", "")[0]
+                            if active_t < total_t:
+                                # Some exhausted - show active(total)
+                                tier_parts.append(f"{active_t}({total_t}){short}")
+                            else:
+                                # All active - just show total
+                                tier_parts.append(f"{total_t}{short}")
+                        tier_str = "/".join(tier_parts) if tier_parts else ""
+                        # Determine color based purely on remaining percentage
+                        if total_pct is not None:
+                            if total_pct <= 10:
+                                color = "red"
+                            elif total_pct < 30:
+                                color = "yellow"
+                            else:
+                                color = "green"
                         else:
+                            color = "dim"
+                        bar = create_progress_bar(total_pct)
+                        display_name = group_name[:11]
+                        pct_str = f"{total_pct}%" if total_pct is not None else "?"
+                        # Build status suffix (just tiers now, no outer parens)
+                        status = tier_str
+                        # Compact format: "claude: 1228/1625 24% ████░░░░░░ (5(15)f/2s)"
                         quota_lines.append(
+                            f"[{color}]{display_name}: {total_used}/{total_max} {pct_str} {bar}[/{color}] {status}"
                         )
                     # First line goes in the main row

src/rotator_library/client.py CHANGED Viewed

@@ -2657,6 +2657,11 @@ class RotatingClient:
                         "credentials_exhausted": 0,
                         "avg_remaining_pct": 0,
                         "total_remaining_pcts": [],
                     }
                     # Calculate per-credential quota for this group
@@ -2664,17 +2669,44 @@ class RotatingClient:
                         models_data = cred.get("models", {})
                         group_stats["credentials_total"] += 1
-                        # Find any model from this group (try all with alias fallback)
                         model_stats = None
                         for model in group_models:
-                            model_stats = self._find_model_stats_in_data(
                                 models_data, model, provider, provider_instance
                             )
-                            if model_stats:
-                                break
                         if model_stats:
                             baseline = model_stats.get("baseline_remaining_fraction")
                             if baseline is not None:
                                 remaining_pct = int(baseline * 100)
                                 group_stats["total_remaining_pcts"].append(
@@ -2682,8 +2714,11 @@ class RotatingClient:
                                 )
                                 if baseline <= 0:
                                     group_stats["credentials_exhausted"] += 1
-                    # Calculate average remaining percentage
                     if group_stats["total_remaining_pcts"]:
                         group_stats["avg_remaining_pct"] = int(
                             sum(group_stats["total_remaining_pcts"])
@@ -2691,6 +2726,16 @@ class RotatingClient:
                         )
                     del group_stats["total_remaining_pcts"]
                     prov_stats["quota_groups"][group_name] = group_stats
                 # Also enrich each credential with formatted quota group info
@@ -2699,14 +2744,20 @@ class RotatingClient:
                     models_data = cred.get("models", {})
                     for group_name, group_models in quota_groups.items():
-                        # Find representative model from this group (try all with alias fallback)
                         model_stats = None
                         for model in group_models:
-                            model_stats = self._find_model_stats_in_data(
                                 models_data, model, provider, provider_instance
                             )
-                            if model_stats:
-                                break
                         if model_stats:
                             baseline = model_stats.get("baseline_remaining_fraction")

                         "credentials_exhausted": 0,
                         "avg_remaining_pct": 0,
                         "total_remaining_pcts": [],
+                        # Total requests tracking across all credentials
+                        "total_requests_used": 0,
+                        "total_requests_max": 0,
+                        # Tier breakdown: tier_name -> {"total": N, "active": M}
+                        "tiers": {},
                     }
                     # Calculate per-credential quota for this group
                         models_data = cred.get("models", {})
                         group_stats["credentials_total"] += 1
+                        # Track tier - get directly from provider cache since cred["tier"] not set yet
+                        tier = cred.get("tier")
+                        if not tier and hasattr(
+                            provider_instance, "project_tier_cache"
+                        ):
+                            cred_path = cred.get("full_path", "")
+                            tier = provider_instance.project_tier_cache.get(cred_path)
+                        tier = tier or "unknown"
+                        # Initialize tier entry if needed
+                        if tier not in group_stats["tiers"]:
+                            group_stats["tiers"][tier] = {"total": 0, "active": 0}
+                        group_stats["tiers"][tier]["total"] += 1
+                        # Find model with VALID baseline (not just any model with stats)
                         model_stats = None
                         for model in group_models:
+                            candidate = self._find_model_stats_in_data(
                                 models_data, model, provider, provider_instance
                             )
+                            if candidate:
+                                baseline = candidate.get("baseline_remaining_fraction")
+                                if baseline is not None:
+                                    model_stats = candidate
+                                    break
+                                # Keep first found as fallback (for request counts)
+                                if model_stats is None:
+                                    model_stats = candidate
                         if model_stats:
                             baseline = model_stats.get("baseline_remaining_fraction")
+                            req_count = model_stats.get("request_count", 0)
+                            max_req = model_stats.get("quota_max_requests") or 0
+                            # Accumulate totals (one model per group per credential)
+                            group_stats["total_requests_used"] += req_count
+                            group_stats["total_requests_max"] += max_req
                             if baseline is not None:
                                 remaining_pct = int(baseline * 100)
                                 group_stats["total_remaining_pcts"].append(
                                 )
                                 if baseline <= 0:
                                     group_stats["credentials_exhausted"] += 1
+                                else:
+                                    # Credential is active (has quota remaining)
+                                    group_stats["tiers"][tier]["active"] += 1
+                    # Calculate average remaining percentage (per-credential average)
                     if group_stats["total_remaining_pcts"]:
                         group_stats["avg_remaining_pct"] = int(
                             sum(group_stats["total_remaining_pcts"])
                         )
                     del group_stats["total_remaining_pcts"]
+                    # Calculate total remaining percentage (global)
+                    if group_stats["total_requests_max"] > 0:
+                        used = group_stats["total_requests_used"]
+                        max_r = group_stats["total_requests_max"]
+                        group_stats["total_remaining_pct"] = max(
+                            0, int((1 - used / max_r) * 100)
+                        )
+                    else:
+                        group_stats["total_remaining_pct"] = None
                     prov_stats["quota_groups"][group_name] = group_stats
                 # Also enrich each credential with formatted quota group info
                     models_data = cred.get("models", {})
                     for group_name, group_models in quota_groups.items():
+                        # Find model with VALID baseline (prefer over any model with stats)
                         model_stats = None
                         for model in group_models:
+                            candidate = self._find_model_stats_in_data(
                                 models_data, model, provider, provider_instance
                             )
+                            if candidate:
+                                baseline = candidate.get("baseline_remaining_fraction")
+                                if baseline is not None:
+                                    model_stats = candidate
+                                    break
+                                # Keep first found as fallback
+                                if model_stats is None:
+                                    model_stats = candidate
                         if model_stats:
                             baseline = model_stats.get("baseline_remaining_fraction")