Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Nov 27, 2025

Commit

b5da45c

1 Parent(s): 8a839ed

feat(client): ✨ add credential prioritization system for tier-based model access

Implements a comprehensive credential prioritization system that enables providers to enforce tier-based access controls and optimize credential selection based on account types.

Key changes:

- Added `get_credential_priority()` and `get_model_tier_requirement()` methods to ProviderInterface, allowing providers to define credential tiers and model restrictions
- Enhanced UsageManager.acquire_key() to respect credential priorities, always attempting highest-priority credentials first before falling back to lower tiers
- Implemented Gemini-specific tier detection in GeminiCliProvider, mapping paid tier credentials to priority 1, free tier to priority 2, and unknown to priority 10
- Added model-based filtering in RotatingClient to exclude incompatible credentials before acquisition (e.g., Gemini 3 models require paid-tier credentials)
- Improved logging to show priority-aware credential selection and tier compatibility warnings

The system gracefully handles unknown credential tiers by treating them as potentially compatible until their actual tier is discovered on first use. Within each priority level, load balancing by usage count is preserved.

Files changed (4) hide show

src/rotator_library/client.py +138 -2
src/rotator_library/providers/gemini_cli_provider.py +53 -0
src/rotator_library/providers/provider_interface.py +46 -1
src/rotator_library/usage_manager.py +191 -67

src/rotator_library/client.py CHANGED Viewed

@@ -672,6 +672,73 @@ class RotatingClient:
             lib_logger.info(f"Resolved model '{model}' to '{resolved_model}'")
             model = resolved_model
             kwargs["model"] = model  # Ensure kwargs has the resolved model for litellm
         while (
             len(tried_creds) < len(credentials_for_provider) and time.time() < deadline
@@ -710,7 +777,8 @@ class RotatingClient:
                 max_concurrent = self.max_concurrent_requests_per_key.get(provider, 1)
                 current_cred = await self.usage_manager.acquire_key(
                     available_keys=creds_to_try, model=model, deadline=deadline,
-                    max_concurrent=max_concurrent
                 )
                 key_acquired = True
                 tried_creds.add(current_cred)
@@ -1047,6 +1115,73 @@ class RotatingClient:
             lib_logger.info(f"Resolved model '{model}' to '{resolved_model}'")
             model = resolved_model
             kwargs["model"] = model  # Ensure kwargs has the resolved model for litellm
         try:
             while (
@@ -1086,7 +1221,8 @@ class RotatingClient:
                     max_concurrent = self.max_concurrent_requests_per_key.get(provider, 1)
                     current_cred = await self.usage_manager.acquire_key(
                         available_keys=creds_to_try, model=model, deadline=deadline,
-                        max_concurrent=max_concurrent
                     )
                     key_acquired = True
                     tried_creds.add(current_cred)

             lib_logger.info(f"Resolved model '{model}' to '{resolved_model}'")
             model = resolved_model
             kwargs["model"] = model  # Ensure kwargs has the resolved model for litellm
+        # [NEW] Filter by model tier requirement and build priority map
+        credential_priorities = None
+        if provider_plugin and hasattr(provider_plugin, 'get_model_tier_requirement'):
+            required_tier = provider_plugin.get_model_tier_requirement(model)
+            if required_tier is not None:
+                # Filter OUT only credentials we KNOW are too low priority
+                # Keep credentials with unknown priority (None) - they might be high priority
+                incompatible_creds = []
+                compatible_creds = []
+                unknown_creds = []
+                for cred in credentials_for_provider:
+                    if hasattr(provider_plugin, 'get_credential_priority'):
+                        priority = provider_plugin.get_credential_priority(cred)
+                        if priority is None:
+                            # Unknown priority - keep it, will be discovered on first use
+                            unknown_creds.append(cred)
+                        elif priority <= required_tier:
+                            # Known compatible priority
+                            compatible_creds.append(cred)
+                        else:
+                            # Known incompatible priority (too low)
+                            incompatible_creds.append(cred)
+                    else:
+                        # Provider doesn't support priorities - keep all
+                        unknown_creds.append(cred)
+                # If we have any known-compatible or unknown credentials, use them
+                tier_compatible_creds = compatible_creds + unknown_creds
+                if tier_compatible_creds:
+                    credentials_for_provider = tier_compatible_creds
+                    if compatible_creds and unknown_creds:
+                        lib_logger.info(
+                            f"Model {model} requires priority <= {required_tier}. "
+                            f"Using {len(compatible_creds)} known-compatible + {len(unknown_creds)} unknown-tier credentials."
+                        )
+                    elif compatible_creds:
+                        lib_logger.info(
+                            f"Model {model} requires priority <= {required_tier}. "
+                            f"Using {len(compatible_creds)} known-compatible credentials."
+                        )
+                    else:
+                        lib_logger.info(
+                            f"Model {model} requires priority <= {required_tier}. "
+                            f"Using {len(unknown_creds)} unknown-tier credentials (will discover on use)."
+                        )
+                elif incompatible_creds:
+                    # Only known-incompatible credentials remain
+                    lib_logger.warning(
+                        f"Model {model} requires priority <= {required_tier} credentials, "
+                        f"but all {len(incompatible_creds)} known credentials have priority > {required_tier}. "
+                        f"Request will likely fail."
+                    )
+        # Build priority map for usage_manager
+        if provider_plugin and hasattr(provider_plugin, 'get_credential_priority'):
+            credential_priorities = {}
+            for cred in credentials_for_provider:
+                priority = provider_plugin.get_credential_priority(cred)
+                if priority is not None:
+                    credential_priorities[cred] = priority
+            if credential_priorities:
+                lib_logger.debug(
+                    f"Credential priorities for {provider}: {', '.join(f'P{p}={len([c for c in credentials_for_provider if credential_priorities.get(c)==p])}' for p in sorted(set(credential_priorities.values())))}"
+                )
         while (
             len(tried_creds) < len(credentials_for_provider) and time.time() < deadline
                 max_concurrent = self.max_concurrent_requests_per_key.get(provider, 1)
                 current_cred = await self.usage_manager.acquire_key(
                     available_keys=creds_to_try, model=model, deadline=deadline,
+                    max_concurrent=max_concurrent,
+                    credential_priorities=credential_priorities
                 )
                 key_acquired = True
                 tried_creds.add(current_cred)
             lib_logger.info(f"Resolved model '{model}' to '{resolved_model}'")
             model = resolved_model
             kwargs["model"] = model  # Ensure kwargs has the resolved model for litellm
+        # [NEW] Filter by model tier requirement and build priority map
+        credential_priorities = None
+        if provider_plugin and hasattr(provider_plugin, 'get_model_tier_requirement'):
+            required_tier = provider_plugin.get_model_tier_requirement(model)
+            if required_tier is not None:
+                # Filter OUT only credentials we KNOW are too low priority
+                # Keep credentials with unknown priority (None) - they might be high priority
+                incompatible_creds = []
+                compatible_creds = []
+                unknown_creds = []
+                for cred in credentials_for_provider:
+                    if hasattr(provider_plugin, 'get_credential_priority'):
+                        priority = provider_plugin.get_credential_priority(cred)
+                        if priority is None:
+                            # Unknown priority - keep it, will be discovered on first use
+                            unknown_creds.append(cred)
+                        elif priority <= required_tier:
+                            # Known compatible priority
+                            compatible_creds.append(cred)
+                        else:
+                            # Known incompatible priority (too low)
+                            incompatible_creds.append(cred)
+                    else:
+                        # Provider doesn't support priorities - keep all
+                        unknown_creds.append(cred)
+                # If we have any known-compatible or unknown credentials, use them
+                tier_compatible_creds = compatible_creds + unknown_creds
+                if tier_compatible_creds:
+                    credentials_for_provider = tier_compatible_creds
+                    if compatible_creds and unknown_creds:
+                        lib_logger.info(
+                            f"Model {model} requires priority <= {required_tier}. "
+                            f"Using {len(compatible_creds)} known-compatible + {len(unknown_creds)} unknown-tier credentials."
+                        )
+                    elif compatible_creds:
+                        lib_logger.info(
+                            f"Model {model} requires priority <= {required_tier}. "
+                            f"Using {len(compatible_creds)} known-compatible credentials."
+                        )
+                    else:
+                        lib_logger.info(
+                            f"Model {model} requires priority <= {required_tier}. "
+                            f"Using {len(unknown_creds)} unknown-tier credentials (will discover on use)."
+                        )
+                elif incompatible_creds:
+                    # Only known-incompatible credentials remain
+                    lib_logger.warning(
+                        f"Model {model} requires priority <= {required_tier} credentials, "
+                        f"but all {len(incompatible_creds)} known credentials have priority > {required_tier}. "
+                        f"Request will likely fail."
+                    )
+        # Build priority map for usage_manager
+        if provider_plugin and hasattr(provider_plugin, 'get_credential_priority'):
+            credential_priorities = {}
+            for cred in credentials_for_provider:
+                priority = provider_plugin.get_credential_priority(cred)
+                if priority is not None:
+                    credential_priorities[cred] = priority
+            if credential_priorities:
+                lib_logger.debug(
+                    f"Credential priorities for {provider}: {', '.join(f'P{p}={len([c for c in credentials_for_provider if credential_priorities.get(c)==p])}' for p in sorted(set(credential_priorities.values())))}"
+                )
         try:
             while (
                     max_concurrent = self.max_concurrent_requests_per_key.get(provider, 1)
                     current_cred = await self.usage_manager.acquire_key(
                         available_keys=creds_to_try, model=model, deadline=deadline,
+                        max_concurrent=max_concurrent,
+                        credential_priorities=credential_priorities
                     )
                     key_acquired = True
                     tried_creds.add(current_cred)

src/rotator_library/providers/gemini_cli_provider.py CHANGED Viewed

@@ -165,6 +165,59 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
             f"cache={self._enable_signature_cache}, gemini3_fix={self._enable_gemini3_tool_fix}"
         )
     # =========================================================================
     # MODEL UTILITIES
     # =========================================================================

             f"cache={self._enable_signature_cache}, gemini3_fix={self._enable_gemini3_tool_fix}"
         )
+    # =========================================================================
+    # CREDENTIAL PRIORITIZATION
+    # =========================================================================
+    def get_credential_priority(self, credential: str) -> Optional[int]:
+        """
+        Returns priority based on Gemini tier.
+        Paid tiers: priority 1 (highest)
+        Free/Legacy tiers: priority 2
+        Unknown: priority 10 (lowest)
+        Args:
+            credential: The credential path
+        Returns:
+            Priority level (1-10) or None if tier not yet discovered
+        """
+        tier = self.project_tier_cache.get(credential)
+        if not tier:
+            return None  # Not yet discovered
+        # Paid tiers get highest priority
+        if tier not in ['free-tier', 'legacy-tier', 'unknown']:
+            return 1
+        # Free tier gets lower priority
+        if tier == 'free-tier':
+            return 2
+        # Legacy and unknown get even lower
+        return 10
+    def get_model_tier_requirement(self, model: str) -> Optional[int]:
+        """
+        Returns the minimum priority tier required for a model.
+        Gemini 3 requires paid tier (priority 1).
+        Args:
+            model: The model name (with or without provider prefix)
+        Returns:
+            Minimum required priority level or None if no restrictions
+        """
+        model_name = model.split('/')[-1].replace(':thinking', '')
+        # Gemini 3 requires paid tier
+        if model_name.startswith("gemini-3-"):
+            return 1  # Only priority 1 (paid) credentials
+        return None  # All other models have no restrictions
     # =========================================================================
     # MODEL UTILITIES
     # =========================================================================

src/rotator_library/providers/provider_interface.py CHANGED Viewed

@@ -66,4 +66,49 @@ class ProviderInterface(ABC):
         """
         Proactively refreshes a token if it's nearing expiry.
         """
-        pass

         """
         Proactively refreshes a token if it's nearing expiry.
         """
+        pass
+    # [NEW] Credential Prioritization System
+    def get_credential_priority(self, credential: str) -> Optional[int]:
+        """
+        Returns the priority level for a credential.
+        Lower numbers = higher priority (1 is highest).
+        Returns None if provider doesn't use priorities.
+        This allows providers to auto-detect credential tiers (e.g., paid vs free)
+        and ensure higher-tier credentials are always tried first.
+        Args:
+            credential: The credential identifier (API key or path)
+        Returns:
+            Priority level (1-10) or None if no priority system
+        Example:
+            For Gemini CLI:
+            - Paid tier credentials: priority 1 (highest)
+            - Free tier credentials: priority 2
+            - Unknown tier: priority 10 (lowest)
+        """
+        return None
+    def get_model_tier_requirement(self, model: str) -> Optional[int]:
+        """
+        Returns the minimum priority tier required for a model.
+        If a model requires priority 1, only credentials with priority <= 1 can use it.
+        This allows providers to restrict certain models to specific credential tiers.
+        For example, Gemini 3 models require paid-tier credentials.
+        Args:
+            model: The model name (with or without provider prefix)
+        Returns:
+            Minimum required priority level or None if no restrictions
+        Example:
+            For Gemini CLI:
+            - gemini-3-*: requires priority 1 (paid tier only)
+            - gemini-2.5-*: no restriction (None)
+        """
+        return None

src/rotator_library/usage_manager.py CHANGED Viewed

@@ -162,11 +162,31 @@ class UsageManager:
     async def acquire_key(
         self, available_keys: List[str], model: str, deadline: float,
-        max_concurrent: int = 1
     ) -> str:
         """
         Acquires the best available key using a tiered, model-aware locking strategy,
-        respecting a global deadline.
         """
         await self._lazy_init()
         await self._reset_daily_stats_if_needed()
@@ -174,78 +194,180 @@ class UsageManager:
         # This loop continues as long as the global deadline has not been met.
         while time.time() < deadline:
-            tier1_keys, tier2_keys = [], []
             now = time.time()
-            # First, filter the list of available keys to exclude any on cooldown.
-            async with self._data_lock:
-                for key in available_keys:
-                    key_data = self._usage_data.get(key, {})
-                    if (key_data.get("key_cooldown_until") or 0) > now or (
-                        key_data.get("model_cooldowns", {}).get(model) or 0
-                    ) > now:
-                        continue
-                    # Prioritize keys based on their current usage to ensure load balancing.
-                    usage_count = (
-                        key_data.get("daily", {})
-                        .get("models", {})
-                        .get(model, {})
-                        .get("success_count", 0)
-                    )
-                    key_state = self.key_states[key]
-                    # Tier 1: Completely idle keys (preferred).
-                    if not key_state["models_in_use"]:
-                        tier1_keys.append((key, usage_count))
-                    # Tier 2: Keys that can accept more concurrent requests for this model.
-                    elif key_state["models_in_use"].get(model, 0) < max_concurrent:
-                        tier2_keys.append((key, usage_count))
-            tier1_keys.sort(key=lambda x: x[1])
-            tier2_keys.sort(key=lambda x: x[1])
-            # Attempt to acquire a key from Tier 1 first.
-            for key, _ in tier1_keys:
-                state = self.key_states[key]
-                async with state["lock"]:
-                    if not state["models_in_use"]:
-                        state["models_in_use"][model] = 1
-                        lib_logger.info(
-                            f"Acquired Tier 1 key ...{key[-6:]} for model {model}"
                         )
-                        return key
-            # If no Tier 1 keys are available, try Tier 2.
-            for key, _ in tier2_keys:
-                state = self.key_states[key]
-                async with state["lock"]:
-                    current_count = state["models_in_use"].get(model, 0)
-                    if current_count < max_concurrent:
-                        state["models_in_use"][model] = current_count + 1
-                        lib_logger.info(
-                            f"Acquired Tier 2 key ...{key[-6:]} for model {model} "
-                            f"(concurrent: {state['models_in_use'][model]}/{max_concurrent})"
                         )
-                        return key
-            # If all eligible keys are locked, wait for a key to be released.
-            lib_logger.info(
-                "All eligible keys are currently locked for this model. Waiting..."
-            )
-            all_potential_keys = tier1_keys + tier2_keys
-            if not all_potential_keys:
-                lib_logger.warning(
-                    "No keys are eligible (all on cooldown). Waiting before re-evaluating."
                 )
-                await asyncio.sleep(1)
-                continue
-            # Wait on the condition of the key with the lowest current usage.
-            best_wait_key = min(all_potential_keys, key=lambda x: x[1])[0]
-            wait_condition = self.key_states[best_wait_key]["condition"]
             try:
                 async with wait_condition:
@@ -266,6 +388,8 @@ class UsageManager:
             f"Could not acquire a key for model {model} within the global time budget."
         )
     async def release_key(self, key: str, model: str):
         """Releases a key's lock for a specific model and notifies waiting tasks."""
         if key not in self.key_states:

     async def acquire_key(
         self, available_keys: List[str], model: str, deadline: float,
+        max_concurrent: int = 1,
+        credential_priorities: Optional[Dict[str, int]] = None
     ) -> str:
         """
         Acquires the best available key using a tiered, model-aware locking strategy,
+        respecting a global deadline and credential priorities.
+        Priority Logic:
+        - Groups credentials by priority level (1=highest, 2=lower, etc.)
+        - Always tries highest priority (lowest number) first
+        - Within same priority, sorts by usage count (load balancing)
+        - Only moves to next priority if all higher-priority keys exhausted/busy
+        Args:
+            available_keys: List of credential identifiers to choose from
+            model: Model name being requested
+            deadline: Timestamp after which to stop trying
+            max_concurrent: Maximum concurrent requests allowed per credential
+            credential_priorities: Optional dict mapping credentials to priority levels (1=highest)
+        Returns:
+            Selected credential identifier
+        Raises:
+            NoAvailableKeysError: If no key could be acquired within the deadline
         """
         await self._lazy_init()
         await self._reset_daily_stats_if_needed()
         # This loop continues as long as the global deadline has not been met.
         while time.time() < deadline:
             now = time.time()
+            # Group credentials by priority level (if priorities provided)
+            if credential_priorities:
+                # Group keys by priority level
+                priority_groups = {}
+                async with self._data_lock:
+                    for key in available_keys:
+                        key_data = self._usage_data.get(key, {})
+                        # Skip keys on cooldown
+                        if (key_data.get("key_cooldown_until") or 0) > now or (
+                            key_data.get("model_cooldowns", {}).get(model) or 0
+                        ) > now:
+                            continue
+                        # Get priority for this key (default to 999 if not specified)
+                        priority = credential_priorities.get(key, 999)
+                        # Get usage count for load balancing within priority groups
+                        usage_count = (
+                            key_data.get("daily", {})
+                            .get("models", {})
+                            .get(model, {})
+                            .get("success_count", 0)
                         )
+                        # Group by priority
+                        if priority not in priority_groups:
+                            priority_groups[priority] = []
+                        priority_groups[priority].append((key, usage_count))
+                # Try priority groups in order (1, 2, 3, ...)
+                sorted_priorities = sorted(priority_groups.keys())
+                for priority_level in sorted_priorities:
+                    keys_in_priority = priority_groups[priority_level]
+                    # Within each priority group, use existing tier1/tier2 logic
+                    tier1_keys, tier2_keys = [], []
+                    for key, usage_count in keys_in_priority:
+                        key_state = self.key_states[key]
+                        # Tier 1: Completely idle keys (preferred)
+                        if not key_state["models_in_use"]:
+                            tier1_keys.append((key, usage_count))
+                        # Tier 2: Keys that can accept more concurrent requests
+                        elif key_state["models_in_use"].get(model, 0) < max_concurrent:
+                            tier2_keys.append((key, usage_count))
+                    # Sort by usage within each tier
+                    tier1_keys.sort(key=lambda x: x[1])
+                    tier2_keys.sort(key=lambda x: x[1])
+                    # Try to acquire from Tier 1 first
+                    for key, usage in tier1_keys:
+                        state = self.key_states[key]
+                        async with state["lock"]:
+                            if not state["models_in_use"]:
+                                state["models_in_use"][model] = 1
+                                lib_logger.info(
+                                    f"Acquired Priority-{priority_level} Tier-1 key ...{key[-6:]} for model {model} (usage: {usage})"
+                                )
+                                return key
+                    # Then try Tier 2
+                    for key, usage in tier2_keys:
+                        state = self.key_states[key]
+                        async with state["lock"]:
+                            current_count = state["models_in_use"].get(model, 0)
+                            if current_count < max_concurrent:
+                                state["models_in_use"][model] = current_count + 1
+                                lib_logger.info(
+                                    f"Acquired Priority-{priority_level} Tier-2 key ...{key[-6:]} for model {model} "
+                                    f"(concurrent: {state['models_in_use'][model]}/{max_concurrent}, usage: {usage})"
+                                )
+                                return key
+                # If we get here, all priority groups were exhausted but keys might become available
+                # Collect all keys across all priorities for waiting
+                all_potential_keys = []
+                for keys_list in priority_groups.values():
+                    all_potential_keys.extend(keys_list)
+                if not all_potential_keys:
+                    lib_logger.warning(
+                        "No keys are eligible (all on cooldown or filtered out). Waiting before re-evaluating."
+                    )
+                    await asyncio.sleep(1)
+                    continue
+                # Wait for the highest priority key with lowest usage
+                best_priority = min(priority_groups.keys())
+                best_priority_keys = priority_groups[best_priority]
+                best_wait_key = min(best_priority_keys, key=lambda x: x[1])[0]
+                wait_condition = self.key_states[best_wait_key]["condition"]
+                lib_logger.info(
+                    f"All Priority-{best_priority} keys are busy. Waiting for highest priority credential to become available..."
+                )
+            else:
+                # Original logic when no priorities specified
+                tier1_keys, tier2_keys = [], []
+                # First, filter the list of available keys to exclude any on cooldown.
+                async with self._data_lock:
+                    for key in available_keys:
+                        key_data = self._usage_data.get(key, {})
+                        if (key_data.get("key_cooldown_until") or 0) > now or (
+                            key_data.get("model_cooldowns", {}).get(model) or 0
+                        ) > now:
+                            continue
+                        # Prioritize keys based on their current usage to ensure load balancing.
+                        usage_count = (
+                            key_data.get("daily", {})
+                            .get("models", {})
+                            .get(model, {})
+                            .get("success_count", 0)
                         )
+                        key_state = self.key_states[key]
+                        # Tier 1: Completely idle keys (preferred).
+                        if not key_state["models_in_use"]:
+                            tier1_keys.append((key, usage_count))
+                        # Tier 2: Keys that can accept more concurrent requests for this model.
+                        elif key_state["models_in_use"].get(model, 0) < max_concurrent:
+                            tier2_keys.append((key, usage_count))
+                tier1_keys.sort(key=lambda x: x[1])
+                tier2_keys.sort(key=lambda x: x[1])
+                # Attempt to acquire a key from Tier 1 first.
+                for key, _ in tier1_keys:
+                    state = self.key_states[key]
+                    async with state["lock"]:
+                        if not state["models_in_use"]:
+                            state["models_in_use"][model] = 1
+                            lib_logger.info(
+                                f"Acquired Tier 1 key ...{key[-6:]} for model {model}"
+                            )
+                            return key
+                # If no Tier 1 keys are available, try Tier 2.
+                for key, _ in tier2_keys:
+                    state = self.key_states[key]
+                    async with state["lock"]:
+                        current_count = state["models_in_use"].get(model, 0)
+                        if current_count < max_concurrent:
+                            state["models_in_use"][model] = current_count + 1
+                            lib_logger.info(
+                                f"Acquired Tier 2 key ...{key[-6:]} for model {model} "
+                                f"(concurrent: {state['models_in_use'][model]}/{max_concurrent})"
+                            )
+                            return key
+                # If all eligible keys are locked, wait for a key to be released.
+                lib_logger.info(
+                    "All eligible keys are currently locked for this model. Waiting..."
                 )
+                all_potential_keys = tier1_keys + tier2_keys
+                if not all_potential_keys:
+                    lib_logger.warning(
+                        "No keys are eligible (all on cooldown). Waiting before re-evaluating."
+                    )
+                    await asyncio.sleep(1)
+                    continue
+                # Wait on the condition of the key with the lowest current usage.
+                best_wait_key = min(all_potential_keys, key=lambda x: x[1])[0]
+                wait_condition = self.key_states[best_wait_key]["condition"]
             try:
                 async with wait_condition:
             f"Could not acquire a key for model {model} within the global time budget."
         )
     async def release_key(self, key: str, model: str):
         """Releases a key's lock for a specific model and notifies waiting tasks."""
         if key not in self.key_states: