Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Dec 8, 2025

Commit

aefb706

1 Parent(s): 136eb6c

feat(concurrency): ✨ add priority-based concurrency multipliers for credential tiers

This commit introduces a flexible priority-based concurrency multiplier system that allows higher-priority credentials (e.g., paid tiers) to handle more concurrent requests than lower-priority credentials, regardless of rotation mode.

Key changes:
- Added `default_priority_multipliers` and `default_sequential_fallback_multiplier` to `ProviderInterface` for provider-level configuration
- Implemented multiplier lookup with mode-specific overrides via environment variables (format: `CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>[_<MODE>]=<multiplier>`)
- Modified `UsageManager` to calculate effective concurrency limits by applying multipliers to base `MAX_CONCURRENT_REQUESTS_PER_KEY` values
- Added `PriorityMultiplierManager` to `settings_tool.py` for runtime configuration and display of multipliers
- Configured default multipliers for Antigravity (P1: 5x, P2: 3x, sequential fallback: 2x) and Gemini CLI (P1: 5x, P2: 3x)
- Introduced `model_usage_weights` to account for models with different quota consumption rates (e.g., Opus counts 2x vs Sonnet)
- Implemented `_get_grouped_usage_count()` for weighted usage calculation across quota groups
- Refactored `_sort_sequential()` to return sorted lists instead of single selection, allowing multipliers to enable multiple concurrent requests in sequential mode
- Enhanced logging to display effective concurrency limits and priority tiers during credential acquisition
- Added comprehensive documentation in `.env.example` explaining the multiplier system and configuration options

The multiplier system preserves existing rotation behavior while allowing paid credentials to maximize throughput. In sequential mode, multipliers enable controlled concurrency while maintaining cache-preserving stickiness. In balanced mode, multipliers provide fair load distribution with tier-appropriate capacity.

Files changed (7) hide show

.env.example +31 -0
src/proxy_app/settings_tool.py +254 -2
src/rotator_library/client.py +84 -0
src/rotator_library/providers/antigravity_provider.py +20 -1
src/rotator_library/providers/gemini_cli_provider.py +10 -0
src/rotator_library/providers/provider_interface.py +41 -0
src/rotator_library/usage_manager.py +188 -58

.env.example CHANGED Viewed

@@ -185,6 +185,37 @@ MAX_CONCURRENT_REQUESTS_PER_KEY_IFLOW=1
 # ROTATION_MODE_GEMINI=balanced
 # ROTATION_MODE_ANTIGRAVITY=sequential
 # --- Model Quota Groups ---
 # Models that share quota/cooldown timing. When one model in a group hits
 # quota exhausted (429), all models in the group receive the same cooldown timestamp.

 # ROTATION_MODE_GEMINI=balanced
 # ROTATION_MODE_ANTIGRAVITY=sequential
+# --- Priority-Based Concurrency Multipliers ---
+# Credentials can be assigned to priority tiers (1=highest, 2, 3, etc.).
+# Each tier can have a concurrency multiplier that increases the effective
+# concurrent request limit for credentials in that tier.
+#
+# How it works:
+#   effective_concurrent_limit = MAX_CONCURRENT_REQUESTS_PER_KEY * tier_multiplier
+#
+# This allows paid/premium credentials to handle more concurrent requests than
+# free tier credentials, regardless of rotation mode.
+#
+# Provider Defaults (built into provider classes):
+#   Antigravity:
+#     Priority 1: 5x (paid ultra tier)
+#     Priority 2: 3x (standard paid tier)
+#     Priority 3+: 2x (sequential mode) or 1x (balanced mode)
+#   Gemini CLI:
+#     Priority 1: 5x
+#     Priority 2: 3x
+#     Others: 1x (all modes)
+#
+# Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>=<multiplier>
+#
+# Mode-specific overrides (optional):
+# Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>_<MODE>=<multiplier>
+#
+# Examples:
+# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_1=10   # Override P1 to 10x
+# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_3=1    # Override P3 to 1x
+# CONCURRENCY_MULTIPLIER_ANTIGRAVITY_PRIORITY_2_BALANCED=1  # P2 = 1x in balanced mode only
 # --- Model Quota Groups ---
 # Models that share quota/cooldown timing. When one model in a group hits
 # quota exhausted (429), all models in the group receive the same cooldown timestamp.

src/proxy_app/settings_tool.py CHANGED Viewed

@@ -234,6 +234,94 @@ class RotationModeManager:
         self.settings.remove(key)
 # =============================================================================
 # PROVIDER-SPECIFIC SETTINGS DEFINITIONS
 # =============================================================================
@@ -424,6 +512,7 @@ class SettingsTool:
         self.model_mgr = ModelDefinitionManager(self.settings)
         self.concurrency_mgr = ConcurrencyManager(self.settings)
         self.rotation_mgr = RotationModeManager(self.settings)
         self.provider_settings_mgr = ProviderSettingsManager(self.settings)
         self.running = True
@@ -1268,14 +1357,15 @@ class SettingsTool:
             self.console.print()
             self.console.print("   1. ➕ Set Rotation Mode for Provider")
             self.console.print("   2. 🗑️  Reset to Provider Default")
-            self.console.print("   3. ↩️  Back to Settings Menu")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
             choice = Prompt.ask(
-                "Select option", choices=["1", "2", "3"], show_choices=False
             )
             if choice == "1":
@@ -1368,8 +1458,170 @@ class SettingsTool:
                     input("\nPress Enter to continue...")
             elif choice == "3":
                 break
     def manage_concurrency_limits(self):
         """Manage concurrency limits"""
         while True:

         self.settings.remove(key)
+class PriorityMultiplierManager:
+    """Manages CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N> settings"""
+    def __init__(self, settings: AdvancedSettings):
+        self.settings = settings
+    def get_provider_defaults(self, provider: str) -> Dict[int, int]:
+        """Get default priority multipliers from provider class"""
+        try:
+            from rotator_library.providers import PROVIDER_PLUGINS
+            provider_class = PROVIDER_PLUGINS.get(provider.lower())
+            if provider_class and hasattr(
+                provider_class, "default_priority_multipliers"
+            ):
+                return dict(provider_class.default_priority_multipliers)
+        except ImportError:
+            pass
+        return {}
+    def get_sequential_fallback(self, provider: str) -> int:
+        """Get sequential fallback multiplier from provider class"""
+        try:
+            from rotator_library.providers import PROVIDER_PLUGINS
+            provider_class = PROVIDER_PLUGINS.get(provider.lower())
+            if provider_class and hasattr(
+                provider_class, "default_sequential_fallback_multiplier"
+            ):
+                return provider_class.default_sequential_fallback_multiplier
+        except ImportError:
+            pass
+        return 1
+    def get_current_multipliers(self) -> Dict[str, Dict[int, int]]:
+        """Get currently configured priority multipliers from env vars"""
+        multipliers: Dict[str, Dict[int, int]] = {}
+        for key, value in os.environ.items():
+            if key.startswith("CONCURRENCY_MULTIPLIER_") and "_PRIORITY_" in key:
+                try:
+                    # Parse: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>
+                    parts = key.split("_PRIORITY_")
+                    provider = parts[0].replace("CONCURRENCY_MULTIPLIER_", "").lower()
+                    remainder = parts[1]
+                    # Check if mode-specific (has _SEQUENTIAL or _BALANCED suffix)
+                    if "_" in remainder:
+                        continue  # Skip mode-specific for now (show in separate view)
+                    priority = int(remainder)
+                    multiplier = int(value)
+                    if provider not in multipliers:
+                        multipliers[provider] = {}
+                    multipliers[provider][priority] = multiplier
+                except (ValueError, IndexError):
+                    pass
+        return multipliers
+    def get_effective_multiplier(self, provider: str, priority: int) -> int:
+        """Get effective multiplier (configured, provider default, or 1)"""
+        # Check env var override
+        current = self.get_current_multipliers()
+        if provider.lower() in current:
+            if priority in current[provider.lower()]:
+                return current[provider.lower()][priority]
+        # Check provider defaults
+        defaults = self.get_provider_defaults(provider)
+        if priority in defaults:
+            return defaults[priority]
+        # Return 1 (no multiplier)
+        return 1
+    def set_multiplier(self, provider: str, priority: int, multiplier: int):
+        """Set priority multiplier for a provider"""
+        if multiplier < 1:
+            raise ValueError("Multiplier must be >= 1")
+        key = f"CONCURRENCY_MULTIPLIER_{provider.upper()}_PRIORITY_{priority}"
+        self.settings.set(key, str(multiplier))
+    def remove_multiplier(self, provider: str, priority: int):
+        """Remove multiplier (reset to provider default)"""
+        key = f"CONCURRENCY_MULTIPLIER_{provider.upper()}_PRIORITY_{priority}"
+        self.settings.remove(key)
 # =============================================================================
 # PROVIDER-SPECIFIC SETTINGS DEFINITIONS
 # =============================================================================
         self.model_mgr = ModelDefinitionManager(self.settings)
         self.concurrency_mgr = ConcurrencyManager(self.settings)
         self.rotation_mgr = RotationModeManager(self.settings)
+        self.priority_multiplier_mgr = PriorityMultiplierManager(self.settings)
         self.provider_settings_mgr = ProviderSettingsManager(self.settings)
         self.running = True
             self.console.print()
             self.console.print("   1. ➕ Set Rotation Mode for Provider")
             self.console.print("   2. 🗑️  Reset to Provider Default")
+            self.console.print("   3. ⚡ Configure Priority Concurrency Multipliers")
+            self.console.print("   4. ↩️  Back to Settings Menu")
             self.console.print()
             self.console.print("━" * 70)
             self.console.print()
             choice = Prompt.ask(
+                "Select option", choices=["1", "2", "3", "4"], show_choices=False
             )
             if choice == "1":
                     input("\nPress Enter to continue...")
             elif choice == "3":
+                self.manage_priority_multipliers()
+            elif choice == "4":
                 break
+    def manage_priority_multipliers(self):
+        """Manage priority-based concurrency multipliers per provider"""
+        clear_screen()
+        current_multipliers = self.priority_multiplier_mgr.get_current_multipliers()
+        available_providers = self.get_available_providers()
+        self.console.print(
+            Panel.fit(
+                "[bold cyan]⚡ Priority Concurrency Multipliers[/bold cyan]",
+                border_style="cyan",
+            )
+        )
+        self.console.print()
+        self.console.print("[bold]📋 Current Priority Multiplier Settings[/bold]")
+        self.console.print("━" * 70)
+        # Show all providers with their priority multipliers
+        has_settings = False
+        for provider in available_providers:
+            defaults = self.priority_multiplier_mgr.get_provider_defaults(provider)
+            overrides = current_multipliers.get(provider, {})
+            seq_fallback = self.priority_multiplier_mgr.get_sequential_fallback(
+                provider
+            )
+            rotation_mode = self.rotation_mgr.get_effective_mode(provider)
+            if defaults or overrides or seq_fallback != 1:
+                has_settings = True
+                self.console.print(
+                    f"\n   [bold]{provider}[/bold] ({rotation_mode} mode)"
+                )
+                # Combine and display priorities
+                all_priorities = set(defaults.keys()) | set(overrides.keys())
+                for priority in sorted(all_priorities):
+                    default_val = defaults.get(priority, 1)
+                    override_val = overrides.get(priority)
+                    if override_val is not None:
+                        self.console.print(
+                            f"      Priority {priority}: [cyan]{override_val}x[/cyan] (override, default: {default_val}x)"
+                        )
+                    else:
+                        self.console.print(
+                            f"      Priority {priority}: {default_val}x [dim](default)[/dim]"
+                        )
+                # Show sequential fallback if applicable
+                if rotation_mode == "sequential" and seq_fallback != 1:
+                    self.console.print(
+                        f"      Others (seq): {seq_fallback}x [dim](fallback)[/dim]"
+                    )
+        if not has_settings:
+            self.console.print("   [dim]No priority multipliers configured[/dim]")
+        self.console.print()
+        self.console.print("[bold]ℹ️  About Priority Multipliers:[/bold]")
+        self.console.print(
+            "   Higher priority tiers (lower numbers) can have higher multipliers."
+        )
+        self.console.print("   Example: Priority 1 = 5x, Priority 2 = 3x, Others = 1x")
+        self.console.print()
+        self.console.print("━" * 70)
+        self.console.print()
+        self.console.print("   1. ✏️  Set Priority Multiplier")
+        self.console.print("   2. 🔄 Reset to Provider Default")
+        self.console.print("   3. ↩️  Back")
+        choice = Prompt.ask(
+            "Select option", choices=["1", "2", "3"], show_choices=False
+        )
+        if choice == "1":
+            if not available_providers:
+                self.console.print("\n[yellow]No providers available[/yellow]")
+                input("\nPress Enter to continue...")
+                return
+            # Select provider
+            self.console.print("\n[bold]Select provider:[/bold]")
+            for idx, prov in enumerate(available_providers, 1):
+                self.console.print(f"   {idx}. {prov}")
+            prov_idx = IntPrompt.ask(
+                "Provider",
+                choices=[str(i) for i in range(1, len(available_providers) + 1)],
+            )
+            provider = available_providers[prov_idx - 1]
+            # Get priority level
+            priority = IntPrompt.ask("Priority level (e.g., 1, 2, 3)")
+            # Get current value
+            current = self.priority_multiplier_mgr.get_effective_multiplier(
+                provider, priority
+            )
+            self.console.print(
+                f"\nCurrent multiplier for priority {priority}: {current}x"
+            )
+            multiplier = IntPrompt.ask("New multiplier (1-10)", default=current)
+            if 1 <= multiplier <= 10:
+                self.priority_multiplier_mgr.set_multiplier(
+                    provider, priority, multiplier
+                )
+                self.console.print(
+                    f"\n[green]✅ Priority {priority} multiplier for '{provider}' set to {multiplier}x[/green]"
+                )
+            else:
+                self.console.print(
+                    "\n[yellow]Multiplier must be between 1 and 10[/yellow]"
+                )
+            input("\nPress Enter to continue...")
+        elif choice == "2":
+            # Find providers with overrides
+            providers_with_overrides = [
+                p for p in available_providers if p in current_multipliers
+            ]
+            if not providers_with_overrides:
+                self.console.print("\n[yellow]No custom multipliers to reset[/yellow]")
+                input("\nPress Enter to continue...")
+                return
+            self.console.print("\n[bold]Select provider to reset:[/bold]")
+            for idx, prov in enumerate(providers_with_overrides, 1):
+                self.console.print(f"   {idx}. {prov}")
+            prov_idx = IntPrompt.ask(
+                "Provider",
+                choices=[str(i) for i in range(1, len(providers_with_overrides) + 1)],
+            )
+            provider = providers_with_overrides[prov_idx - 1]
+            # Get priority to reset
+            overrides = current_multipliers.get(provider, {})
+            if len(overrides) == 1:
+                priority = list(overrides.keys())[0]
+            else:
+                self.console.print(f"\nOverrides for {provider}: {overrides}")
+                priority = IntPrompt.ask("Priority level to reset")
+            if priority in overrides:
+                self.priority_multiplier_mgr.remove_multiplier(provider, priority)
+                default = self.priority_multiplier_mgr.get_effective_multiplier(
+                    provider, priority
+                )
+                self.console.print(
+                    f"\n[green]✅ Reset priority {priority} for '{provider}' to default ({default}x)[/green]"
+                )
+            else:
+                self.console.print(
+                    f"\n[yellow]No override for priority {priority}[/yellow]"
+                )
+            input("\nPress Enter to continue...")
     def manage_concurrency_limits(self):
         """Manage concurrency limits"""
         while True:

src/rotator_library/client.py CHANGED Viewed

@@ -161,11 +161,95 @@ class RotatingClient:
             if mode != "balanced":
                 lib_logger.info(f"Provider '{provider}' using rotation mode: {mode}")
         self.usage_manager = UsageManager(
             file_path=usage_file_path,
             rotation_tolerance=rotation_tolerance,
             provider_rotation_modes=provider_rotation_modes,
             provider_plugins=PROVIDER_PLUGINS,
         )
         self._model_list_cache = {}
         self.http_client = httpx.AsyncClient()

             if mode != "balanced":
                 lib_logger.info(f"Provider '{provider}' using rotation mode: {mode}")
+        # Build priority-based concurrency multiplier maps
+        # These are universal multipliers based on credential tier/priority
+        priority_multipliers: Dict[str, Dict[int, int]] = {}
+        priority_multipliers_by_mode: Dict[str, Dict[str, Dict[int, int]]] = {}
+        sequential_fallback_multipliers: Dict[str, int] = {}
+        for provider in self.all_credentials.keys():
+            provider_class = self._provider_plugins.get(provider)
+            # Start with provider class defaults
+            if provider_class:
+                # Get default priority multipliers from provider class
+                if hasattr(provider_class, "default_priority_multipliers"):
+                    default_multipliers = provider_class.default_priority_multipliers
+                    if default_multipliers:
+                        priority_multipliers[provider] = dict(default_multipliers)
+                # Get sequential fallback from provider class
+                if hasattr(provider_class, "default_sequential_fallback_multiplier"):
+                    fallback = provider_class.default_sequential_fallback_multiplier
+                    if fallback != 1:  # Only store if different from global default
+                        sequential_fallback_multipliers[provider] = fallback
+            # Override with environment variables
+            # Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>=<multiplier>
+            # Format: CONCURRENCY_MULTIPLIER_<PROVIDER>_PRIORITY_<N>_<MODE>=<multiplier>
+            for key, value in os.environ.items():
+                prefix = f"CONCURRENCY_MULTIPLIER_{provider.upper()}_PRIORITY_"
+                if key.startswith(prefix):
+                    remainder = key[len(prefix) :]
+                    try:
+                        multiplier = int(value)
+                        if multiplier < 1:
+                            lib_logger.warning(f"Invalid {key}: {value}. Must be >= 1.")
+                            continue
+                        # Check if mode-specific (e.g., _PRIORITY_1_SEQUENTIAL)
+                        if "_" in remainder:
+                            parts = remainder.rsplit("_", 1)
+                            priority = int(parts[0])
+                            mode = parts[1].lower()
+                            if mode in ("sequential", "balanced"):
+                                # Mode-specific override
+                                if provider not in priority_multipliers_by_mode:
+                                    priority_multipliers_by_mode[provider] = {}
+                                if mode not in priority_multipliers_by_mode[provider]:
+                                    priority_multipliers_by_mode[provider][mode] = {}
+                                priority_multipliers_by_mode[provider][mode][
+                                    priority
+                                ] = multiplier
+                                lib_logger.info(
+                                    f"Provider '{provider}' priority {priority} ({mode} mode) multiplier: {multiplier}x"
+                                )
+                            else:
+                                # Assume it's part of the priority number (unlikely but handle gracefully)
+                                lib_logger.warning(f"Unknown mode in {key}: {mode}")
+                        else:
+                            # Universal priority multiplier
+                            priority = int(remainder)
+                            if provider not in priority_multipliers:
+                                priority_multipliers[provider] = {}
+                            priority_multipliers[provider][priority] = multiplier
+                            lib_logger.info(
+                                f"Provider '{provider}' priority {priority} multiplier: {multiplier}x"
+                            )
+                    except ValueError:
+                        lib_logger.warning(
+                            f"Invalid {key}: {value}. Could not parse priority or multiplier."
+                        )
+        # Log configured multipliers
+        for provider, multipliers in priority_multipliers.items():
+            if multipliers:
+                lib_logger.info(
+                    f"Provider '{provider}' priority multipliers: {multipliers}"
+                )
+        for provider, fallback in sequential_fallback_multipliers.items():
+            lib_logger.info(
+                f"Provider '{provider}' sequential fallback multiplier: {fallback}x"
+            )
         self.usage_manager = UsageManager(
             file_path=usage_file_path,
             rotation_tolerance=rotation_tolerance,
             provider_rotation_modes=provider_rotation_modes,
             provider_plugins=PROVIDER_PLUGINS,
+            priority_multipliers=priority_multipliers,
+            priority_multipliers_by_mode=priority_multipliers_by_mode,
+            sequential_fallback_multipliers=sequential_fallback_multipliers,
         )
         self._model_list_cache = {}
         self.http_client = httpx.AsyncClient()

src/rotator_library/providers/antigravity_provider.py CHANGED Viewed

@@ -539,10 +539,29 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
     }
     # Model quota groups (can be overridden via QUOTA_GROUPS_ANTIGRAVITY_CLAUDE)
     model_quota_groups: QuotaGroupMap = {
-        # "claude": ["claude-sonnet-4-5", "claude-opus-4-5"], - commented out for later use if needed
     }
     @staticmethod
     def parse_quota_error(
         error: Exception, error_body: Optional[str] = None

     }
     # Model quota groups (can be overridden via QUOTA_GROUPS_ANTIGRAVITY_CLAUDE)
+    # Models in the same group share quota - when one is exhausted, all are
     model_quota_groups: QuotaGroupMap = {
+        #"claude": ["claude-sonnet-4-5", "claude-opus-4-5"], - commented out for later use if needed
     }
+    # Model usage weights for grouped usage calculation
+    # Opus consumes more quota per request, so its usage counts 2x when
+    # comparing credentials for selection
+    model_usage_weights = {
+        "claude-opus-4-5": 2,
+    }
+    # Priority-based concurrency multipliers
+    # Higher priority credentials (lower number) get higher multipliers
+    # Priority 1 (paid ultra): 5x concurrent requests
+    # Priority 2 (standard paid): 3x concurrent requests
+    # Others: Use sequential fallback (2x) or balanced default (1x)
+    default_priority_multipliers = {1: 5, 2: 3}
+    # For sequential mode, lower priority tiers still get 2x to maintain stickiness
+    # For balanced mode, this doesn't apply (falls back to 1x)
+    default_sequential_fallback_multiplier = 2
     @staticmethod
     def parse_quota_error(
         error: Exception, error_body: Optional[str] = None

src/rotator_library/providers/gemini_cli_provider.py CHANGED Viewed

@@ -219,6 +219,16 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
     # No quota groups defined for Gemini CLI
     # (Models don't share quotas)
     @staticmethod
     def parse_quota_error(
         error: Exception, error_body: Optional[str] = None

     # No quota groups defined for Gemini CLI
     # (Models don't share quotas)
+    # Priority-based concurrency multipliers
+    # Same structure as Antigravity (by coincidence, tiers share naming)
+    # Priority 1 (paid ultra): 5x concurrent requests
+    # Priority 2 (standard paid): 3x concurrent requests
+    # Others: 1x (no sequential fallback, uses global default)
+    default_priority_multipliers = {1: 5, 2: 3}
+    # No sequential fallback for Gemini CLI (uses balanced mode default)
+    # default_sequential_fallback_multiplier = 1  (inherited from ProviderInterface)
     @staticmethod
     def parse_quota_error(
         error: Exception, error_body: Optional[str] = None

src/rotator_library/providers/provider_interface.py CHANGED Viewed

@@ -88,6 +88,30 @@ class ProviderInterface(ABC):
     # Can be overridden via env: QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2"
     model_quota_groups: QuotaGroupMap = {}
     @abstractmethod
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
@@ -505,3 +529,20 @@ class ProviderInterface(ABC):
             Empty list if group doesn't exist.
         """
         return self._get_quota_group_models(group)

     # Can be overridden via env: QUOTA_GROUPS_{PROVIDER}_{GROUP}="model1,model2"
     model_quota_groups: QuotaGroupMap = {}
+    # Model usage weights for grouped usage calculation
+    # When calculating combined usage for quota groups, each model's usage
+    # is multiplied by its weight. This accounts for models that consume
+    # more quota per request (e.g., Opus uses more than Sonnet).
+    # Models not in the map default to weight 1.
+    # Example: {"claude-opus-4-5": 2} means Opus usage counts 2x
+    model_usage_weights: Dict[str, int] = {}
+    # =========================================================================
+    # PRIORITY CONCURRENCY MULTIPLIERS - Override in subclass
+    # =========================================================================
+    # Priority-based concurrency multipliers (universal, applies to all modes)
+    # Maps priority level -> multiplier
+    # Higher priority credentials (lower number) can have higher multipliers
+    # to allow more concurrent requests
+    # Example: {1: 5, 2: 3} means Priority 1 gets 5x, Priority 2 gets 3x
+    default_priority_multipliers: Dict[int, int] = {}
+    # Fallback multiplier for sequential mode when priority not in default_priority_multipliers
+    # This is used for lower-priority tiers in sequential mode to maintain some stickiness
+    # Default: 1 (no multiplier effect)
+    default_sequential_fallback_multiplier: int = 1
     @abstractmethod
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
             Empty list if group doesn't exist.
         """
         return self._get_quota_group_models(group)
+    def get_model_usage_weight(self, model: str) -> int:
+        """
+        Returns the usage weight for a model when calculating grouped usage.
+        Models with higher weights contribute more to the combined group usage.
+        This accounts for models that consume more quota per request.
+        Args:
+            model: Model name (with or without provider prefix)
+        Returns:
+            Weight multiplier (default 1 if not configured)
+        """
+        # Strip provider prefix if present
+        clean_model = model.split("/")[-1] if "/" in model else model
+        return self.model_usage_weights.get(clean_model, 1)

src/rotator_library/usage_manager.py CHANGED Viewed

@@ -55,6 +55,11 @@ class UsageManager:
         rotation_tolerance: float = 0.0,
         provider_rotation_modes: Optional[Dict[str, str]] = None,
         provider_plugins: Optional[Dict[str, Any]] = None,
     ):
         """
         Initialize the UsageManager.
@@ -71,11 +76,22 @@ class UsageManager:
                 - "sequential": Use one credential until exhausted (preserves caching)
             provider_plugins: Dict mapping provider names to provider plugin instances.
                 Used for per-provider usage reset configuration (window durations, field names).
         """
         self.file_path = file_path
         self.rotation_tolerance = rotation_tolerance
         self.provider_rotation_modes = provider_rotation_modes or {}
         self.provider_plugins = provider_plugins or PROVIDER_PLUGINS
         self._provider_instances: Dict[str, Any] = {}  # Cache for provider instances
         self.key_states: Dict[str, Dict[str, Any]] = {}
@@ -107,6 +123,48 @@ class UsageManager:
         """
         return self.provider_rotation_modes.get(provider, "balanced")
     def _get_provider_from_credential(self, credential: str) -> Optional[str]:
         """
         Extract provider name from credential path or identifier.
@@ -238,6 +296,60 @@ class UsageManager:
         return []
     def _get_usage_field_name(self, credential: str) -> str:
         """
         Get the usage tracking field name for a credential.
@@ -360,59 +472,64 @@ class UsageManager:
         return data
-    def _select_sequential(
         self,
         candidates: List[Tuple[str, int]],
         credential_priorities: Optional[Dict[str, int]] = None,
-    ) -> str:
         """
-        Select credential in strict sequential order for cache-preserving rotation.
-        This method ensures the same credential is reused until it hits a cooldown,
-        which preserves provider-side caching (e.g., thinking signature caches).
-        Selection logic:
-        1. Sort by priority (lowest number = highest priority)
-        2. Within same priority, sort by last_used_ts (most recent first = sticky)
-        3. Return the first candidate
         Args:
             candidates: List of (credential_id, usage_count) tuples
             credential_priorities: Optional dict mapping credentials to priority levels
         Returns:
-            Selected credential ID
         """
         if not candidates:
-            raise ValueError("Cannot select from empty candidate list")
         if len(candidates) == 1:
-            return candidates[0][0]
-        def sort_key(item: Tuple[str, int]) -> Tuple[int, float]:
-            cred, _ = item
-            # Priority: lower is better (1 = highest priority)
             priority = (
                 credential_priorities.get(cred, 999) if credential_priorities else 999
             )
-            # Last used: higher (more recent) is better for stickiness
             last_used = (
                 self._usage_data.get(cred, {}).get("last_used_ts", 0)
                 if self._usage_data
                 else 0
             )
-            # Negative last_used so most recent sorts first
-            return (priority, -last_used)
         sorted_candidates = sorted(candidates, key=sort_key)
-        selected = sorted_candidates[0][0]
-        lib_logger.debug(
-            f"Sequential selection: chose {mask_credential(selected)} "
-            f"(priority={credential_priorities.get(selected, 999) if credential_priorities else 'N/A'})"
-        )
-        return selected
     async def _lazy_init(self):
         """Initializes the usage data by loading it from the file asynchronously."""
@@ -966,7 +1083,8 @@ class UsageManager:
                         priority = credential_priorities.get(key, 999)
                         # Get usage count for load balancing within priority groups
-                        usage_count = self._get_usage_count(key, model)
                         # Group by priority
                         if priority not in priority_groups:
@@ -979,6 +1097,16 @@ class UsageManager:
                 for priority_level in sorted_priorities:
                     keys_in_priority = priority_groups[priority_level]
                     # Within each priority group, use existing tier1/tier2 logic
                     tier1_keys, tier2_keys = [], []
                     for key, usage_count in keys_in_priority:
@@ -988,30 +1116,24 @@ class UsageManager:
                         if not key_state["models_in_use"]:
                             tier1_keys.append((key, usage_count))
                         # Tier 2: Keys that can accept more concurrent requests
-                        elif key_state["models_in_use"].get(model, 0) < max_concurrent:
                             tier2_keys.append((key, usage_count))
-                    # Determine selection method based on provider's rotation mode
-                    provider = model.split("/")[0] if "/" in model else ""
-                    rotation_mode = self._get_rotation_mode(provider)
                     if rotation_mode == "sequential":
-                        # Sequential mode: stick with same credential until exhausted
                         selection_method = "sequential"
                         if tier1_keys:
-                            selected_key = self._select_sequential(
                                 tier1_keys, credential_priorities
                             )
-                            tier1_keys = [
-                                (k, u) for k, u in tier1_keys if k == selected_key
-                            ]
                         if tier2_keys:
-                            selected_key = self._select_sequential(
                                 tier2_keys, credential_priorities
                             )
-                            tier2_keys = [
-                                (k, u) for k, u in tier2_keys if k == selected_key
-                            ]
                     elif self.rotation_tolerance > 0:
                         # Balanced mode with weighted randomness
                         selection_method = "weighted-random"
@@ -1057,7 +1179,7 @@ class UsageManager:
                         state = self.key_states[key]
                         async with state["lock"]:
                             current_count = state["models_in_use"].get(model, 0)
-                            if current_count < max_concurrent:
                                 state["models_in_use"][model] = current_count + 1
                                 tier_name = (
                                     credential_tier_names.get(key, "unknown")
@@ -1066,7 +1188,7 @@ class UsageManager:
                                 )
                                 lib_logger.info(
                                     f"Acquired key {mask_credential(key)} for model {model} "
-                                    f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{max_concurrent}, usage: {usage})"
                                 )
                                 return key
@@ -1095,6 +1217,19 @@ class UsageManager:
             else:
                 # Original logic when no priorities specified
                 tier1_keys, tier2_keys = [], []
                 # First, filter the list of available keys to exclude any on cooldown.
@@ -1108,37 +1243,32 @@ class UsageManager:
                             continue
                         # Prioritize keys based on their current usage to ensure load balancing.
-                        usage_count = self._get_usage_count(key, model)
                         key_state = self.key_states[key]
                         # Tier 1: Completely idle keys (preferred).
                         if not key_state["models_in_use"]:
                             tier1_keys.append((key, usage_count))
                         # Tier 2: Keys that can accept more concurrent requests for this model.
-                        elif key_state["models_in_use"].get(model, 0) < max_concurrent:
                             tier2_keys.append((key, usage_count))
-                # Determine selection method based on provider's rotation mode
-                provider = model.split("/")[0] if "/" in model else ""
-                rotation_mode = self._get_rotation_mode(provider)
                 if rotation_mode == "sequential":
-                    # Sequential mode: stick with same credential until exhausted
                     selection_method = "sequential"
                     if tier1_keys:
-                        selected_key = self._select_sequential(
                             tier1_keys, credential_priorities
                         )
-                        tier1_keys = [
-                            (k, u) for k, u in tier1_keys if k == selected_key
-                        ]
                     if tier2_keys:
-                        selected_key = self._select_sequential(
                             tier2_keys, credential_priorities
                         )
-                        tier2_keys = [
-                            (k, u) for k, u in tier2_keys if k == selected_key
-                        ]
                 elif self.rotation_tolerance > 0:
                     # Balanced mode with weighted randomness
                     selection_method = "weighted-random"
@@ -1185,7 +1315,7 @@ class UsageManager:
                     state = self.key_states[key]
                     async with state["lock"]:
                         current_count = state["models_in_use"].get(model, 0)
-                        if current_count < max_concurrent:
                             state["models_in_use"][model] = current_count + 1
                             tier_name = (
                                 credential_tier_names.get(key)
@@ -1195,7 +1325,7 @@ class UsageManager:
                             tier_info = f"tier: {tier_name}, " if tier_name else ""
                             lib_logger.info(
                                 f"Acquired key {mask_credential(key)} for model {model} "
-                                f"({tier_info}selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{max_concurrent}, usage: {usage})"
                             )
                             return key

         rotation_tolerance: float = 0.0,
         provider_rotation_modes: Optional[Dict[str, str]] = None,
         provider_plugins: Optional[Dict[str, Any]] = None,
+        priority_multipliers: Optional[Dict[str, Dict[int, int]]] = None,
+        priority_multipliers_by_mode: Optional[
+            Dict[str, Dict[str, Dict[int, int]]]
+        ] = None,
+        sequential_fallback_multipliers: Optional[Dict[str, int]] = None,
     ):
         """
         Initialize the UsageManager.
                 - "sequential": Use one credential until exhausted (preserves caching)
             provider_plugins: Dict mapping provider names to provider plugin instances.
                 Used for per-provider usage reset configuration (window durations, field names).
+            priority_multipliers: Dict mapping provider -> priority -> multiplier.
+                Universal multipliers that apply regardless of rotation mode.
+                Example: {"antigravity": {1: 5, 2: 3}}
+            priority_multipliers_by_mode: Dict mapping provider -> mode -> priority -> multiplier.
+                Mode-specific overrides. Example: {"antigravity": {"balanced": {3: 1}}}
+            sequential_fallback_multipliers: Dict mapping provider -> fallback multiplier.
+                Used in sequential mode when priority not in priority_multipliers.
+                Example: {"antigravity": 2}
         """
         self.file_path = file_path
         self.rotation_tolerance = rotation_tolerance
         self.provider_rotation_modes = provider_rotation_modes or {}
         self.provider_plugins = provider_plugins or PROVIDER_PLUGINS
+        self.priority_multipliers = priority_multipliers or {}
+        self.priority_multipliers_by_mode = priority_multipliers_by_mode or {}
+        self.sequential_fallback_multipliers = sequential_fallback_multipliers or {}
         self._provider_instances: Dict[str, Any] = {}  # Cache for provider instances
         self.key_states: Dict[str, Dict[str, Any]] = {}
         """
         return self.provider_rotation_modes.get(provider, "balanced")
+    def _get_priority_multiplier(
+        self, provider: str, priority: int, rotation_mode: str
+    ) -> int:
+        """
+        Get the concurrency multiplier for a provider/priority/mode combination.
+        Lookup order:
+        1. Mode-specific tier override: priority_multipliers_by_mode[provider][mode][priority]
+        2. Universal tier multiplier: priority_multipliers[provider][priority]
+        3. Sequential fallback (if mode is sequential): sequential_fallback_multipliers[provider]
+        4. Global default: 1 (no multiplier effect)
+        Args:
+            provider: Provider name (e.g., "antigravity")
+            priority: Priority level (1 = highest priority)
+            rotation_mode: Current rotation mode ("sequential" or "balanced")
+        Returns:
+            Multiplier value
+        """
+        provider_lower = provider.lower()
+        # 1. Check mode-specific override
+        if provider_lower in self.priority_multipliers_by_mode:
+            mode_multipliers = self.priority_multipliers_by_mode[provider_lower]
+            if rotation_mode in mode_multipliers:
+                if priority in mode_multipliers[rotation_mode]:
+                    return mode_multipliers[rotation_mode][priority]
+        # 2. Check universal tier multiplier
+        if provider_lower in self.priority_multipliers:
+            if priority in self.priority_multipliers[provider_lower]:
+                return self.priority_multipliers[provider_lower][priority]
+        # 3. Sequential fallback (only for sequential mode)
+        if rotation_mode == "sequential":
+            if provider_lower in self.sequential_fallback_multipliers:
+                return self.sequential_fallback_multipliers[provider_lower]
+        # 4. Global default
+        return 1
     def _get_provider_from_credential(self, credential: str) -> Optional[str]:
         """
         Extract provider name from credential path or identifier.
         return []
+    def _get_model_usage_weight(self, credential: str, model: str) -> int:
+        """
+        Get the usage weight for a model when calculating grouped usage.
+        Args:
+            credential: The credential identifier
+            model: Model name (with or without provider prefix)
+        Returns:
+            Weight multiplier (default 1 if not configured)
+        """
+        provider = self._get_provider_from_credential(credential)
+        plugin_instance = self._get_provider_instance(provider)
+        if plugin_instance and hasattr(plugin_instance, "get_model_usage_weight"):
+            return plugin_instance.get_model_usage_weight(model)
+        return 1
+    def _get_grouped_usage_count(self, key: str, model: str) -> int:
+        """
+        Get usage count for credential selection, considering quota groups.
+        If the model belongs to a quota group, returns the weighted combined usage
+        across all models in the group. Otherwise returns individual model usage.
+        Weights are applied per-model to account for models that consume more quota
+        per request (e.g., Opus might count 2x compared to Sonnet).
+        Args:
+            key: Credential identifier
+            model: Model name (with provider prefix, e.g., "antigravity/claude-sonnet-4-5")
+        Returns:
+            Weighted combined usage if grouped, otherwise individual model usage
+        """
+        # Check if model is in a quota group
+        group = self._get_model_quota_group(key, model)
+        if group:
+            # Get all models in the group
+            grouped_models = self._get_grouped_models(key, group)
+            # Sum weighted usage across all models in the group
+            total_weighted_usage = 0
+            for grouped_model in grouped_models:
+                usage = self._get_usage_count(key, grouped_model)
+                weight = self._get_model_usage_weight(key, grouped_model)
+                total_weighted_usage += usage * weight
+            return total_weighted_usage
+        # Not grouped - return individual model usage (no weight applied)
+        return self._get_usage_count(key, model)
     def _get_usage_field_name(self, credential: str) -> str:
         """
         Get the usage tracking field name for a credential.
         return data
+    def _sort_sequential(
         self,
         candidates: List[Tuple[str, int]],
         credential_priorities: Optional[Dict[str, int]] = None,
+    ) -> List[Tuple[str, int]]:
         """
+        Sort credentials for sequential mode with position retention.
+        Credentials maintain their position based on established usage patterns,
+        ensuring that actively-used credentials remain primary until exhausted.
+        Sorting order (within each sort key, lower value = higher priority):
+        1. Priority tier (lower number = higher priority)
+        2. Usage count (higher = more established in rotation, maintains position)
+        3. Last used timestamp (higher = more recent, tiebreaker for stickiness)
+        4. Credential ID (alphabetical, stable ordering)
         Args:
             candidates: List of (credential_id, usage_count) tuples
             credential_priorities: Optional dict mapping credentials to priority levels
         Returns:
+            Sorted list of candidates (same format as input)
         """
         if not candidates:
+            return []
         if len(candidates) == 1:
+            return candidates
+        def sort_key(item: Tuple[str, int]) -> Tuple[int, int, float, str]:
+            cred, usage_count = item
             priority = (
                 credential_priorities.get(cred, 999) if credential_priorities else 999
             )
             last_used = (
                 self._usage_data.get(cred, {}).get("last_used_ts", 0)
                 if self._usage_data
                 else 0
             )
+            return (
+                priority,  # ASC: lower priority number = higher priority
+                -usage_count,  # DESC: higher usage = more established
+                -last_used,  # DESC: more recent = preferred for ties
+                cred,  # ASC: stable alphabetical ordering
+            )
         sorted_candidates = sorted(candidates, key=sort_key)
+        # Debug logging - show top 3 credentials in ordering
+        if lib_logger.isEnabledFor(logging.DEBUG):
+            order_info = [
+                f"{mask_credential(c)}(p={credential_priorities.get(c, 999) if credential_priorities else 'N/A'}, u={u})"
+                for c, u in sorted_candidates[:3]
+            ]
+            lib_logger.debug(f"Sequential ordering: {' → '.join(order_info)}")
+        return sorted_candidates
     async def _lazy_init(self):
         """Initializes the usage data by loading it from the file asynchronously."""
                         priority = credential_priorities.get(key, 999)
                         # Get usage count for load balancing within priority groups
+                        # Uses grouped usage if model is in a quota group
+                        usage_count = self._get_grouped_usage_count(key, model)
                         # Group by priority
                         if priority not in priority_groups:
                 for priority_level in sorted_priorities:
                     keys_in_priority = priority_groups[priority_level]
+                    # Determine selection method based on provider's rotation mode
+                    provider = model.split("/")[0] if "/" in model else ""
+                    rotation_mode = self._get_rotation_mode(provider)
+                    # Calculate effective concurrency based on priority tier
+                    multiplier = self._get_priority_multiplier(
+                        provider, priority_level, rotation_mode
+                    )
+                    effective_max_concurrent = max_concurrent * multiplier
                     # Within each priority group, use existing tier1/tier2 logic
                     tier1_keys, tier2_keys = [], []
                     for key, usage_count in keys_in_priority:
                         if not key_state["models_in_use"]:
                             tier1_keys.append((key, usage_count))
                         # Tier 2: Keys that can accept more concurrent requests
+                        elif (
+                            key_state["models_in_use"].get(model, 0)
+                            < effective_max_concurrent
+                        ):
                             tier2_keys.append((key, usage_count))
                     if rotation_mode == "sequential":
+                        # Sequential mode: sort credentials by priority, usage, recency
+                        # Keep all candidates in sorted order (no filtering to single key)
                         selection_method = "sequential"
                         if tier1_keys:
+                            tier1_keys = self._sort_sequential(
                                 tier1_keys, credential_priorities
                             )
                         if tier2_keys:
+                            tier2_keys = self._sort_sequential(
                                 tier2_keys, credential_priorities
                             )
                     elif self.rotation_tolerance > 0:
                         # Balanced mode with weighted randomness
                         selection_method = "weighted-random"
                         state = self.key_states[key]
                         async with state["lock"]:
                             current_count = state["models_in_use"].get(model, 0)
+                            if current_count < effective_max_concurrent:
                                 state["models_in_use"][model] = current_count + 1
                                 tier_name = (
                                     credential_tier_names.get(key, "unknown")
                                 )
                                 lib_logger.info(
                                     f"Acquired key {mask_credential(key)} for model {model} "
+                                    f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{effective_max_concurrent}, usage: {usage})"
                                 )
                                 return key
             else:
                 # Original logic when no priorities specified
+                # Determine selection method based on provider's rotation mode
+                provider = model.split("/")[0] if "/" in model else ""
+                rotation_mode = self._get_rotation_mode(provider)
+                # Calculate effective concurrency for default priority (999)
+                # When no priorities are specified, all credentials get default priority
+                default_priority = 999
+                multiplier = self._get_priority_multiplier(
+                    provider, default_priority, rotation_mode
+                )
+                effective_max_concurrent = max_concurrent * multiplier
                 tier1_keys, tier2_keys = [], []
                 # First, filter the list of available keys to exclude any on cooldown.
                             continue
                         # Prioritize keys based on their current usage to ensure load balancing.
+                        # Uses grouped usage if model is in a quota group
+                        usage_count = self._get_grouped_usage_count(key, model)
                         key_state = self.key_states[key]
                         # Tier 1: Completely idle keys (preferred).
                         if not key_state["models_in_use"]:
                             tier1_keys.append((key, usage_count))
                         # Tier 2: Keys that can accept more concurrent requests for this model.
+                        elif (
+                            key_state["models_in_use"].get(model, 0)
+                            < effective_max_concurrent
+                        ):
                             tier2_keys.append((key, usage_count))
                 if rotation_mode == "sequential":
+                    # Sequential mode: sort credentials by priority, usage, recency
+                    # Keep all candidates in sorted order (no filtering to single key)
                     selection_method = "sequential"
                     if tier1_keys:
+                        tier1_keys = self._sort_sequential(
                             tier1_keys, credential_priorities
                         )
                     if tier2_keys:
+                        tier2_keys = self._sort_sequential(
                             tier2_keys, credential_priorities
                         )
                 elif self.rotation_tolerance > 0:
                     # Balanced mode with weighted randomness
                     selection_method = "weighted-random"
                     state = self.key_states[key]
                     async with state["lock"]:
                         current_count = state["models_in_use"].get(model, 0)
+                        if current_count < effective_max_concurrent:
                             state["models_in_use"][model] = current_count + 1
                             tier_name = (
                                 credential_tier_names.get(key)
                             tier_info = f"tier: {tier_name}, " if tier_name else ""
                             lib_logger.info(
                                 f"Acquired key {mask_credential(key)} for model {model} "
+                                f"({tier_info}selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{effective_max_concurrent}, usage: {usage})"
                             )
                             return key