Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Dec 6, 2025

Commit

4dfb828

1 Parent(s): f03c448

feat(providers): ✨ implement credential tier initialization and persistence system

This commit introduces a comprehensive credential tier management system across the library, enabling automatic tier detection, persistence, and intelligent credential prioritization at startup.

- Add `initialize_credentials()` method to `ProviderInterface` for startup credential loading
- Add `get_credential_tier_name()` method to expose human-readable tier names for logging
- Implement tier persistence in credential files via `_proxy_metadata` field
- Add lazy-loading fallback for tier data when not in memory cache
- Introduce `BackgroundRefresher._initialize_credentials()` to pre-load all provider tiers before refresh loop
- Pass `credential_tier_names` map through client to usage_manager for enhanced logging
- Update `UsageManager.acquire_key()` to display tier information in acquisition logs
- Make `ModelDefinitions` a singleton to prevent duplicate loading across providers
- Add comprehensive 3-line startup summary showing provider counts, credentials, and tier breakdown
- Implement tier-aware logging in Antigravity and GeminiCli providers with disk persistence
- Fix provider instance lookup for OAuth providers by handling `_oauth` suffix correctly

This ensures all credential priorities are known before any API calls, preventing unknown credentials from getting priority 999 and improving load balancing from the first request.

Files changed (7) hide show

src/rotator_library/background_refresher.py +100 -9
src/rotator_library/client.py +30 -3
src/rotator_library/model_definitions.py +19 -2
src/rotator_library/providers/antigravity_provider.py +125 -0
src/rotator_library/providers/gemini_cli_provider.py +0 -0
src/rotator_library/providers/provider_interface.py +62 -21
src/rotator_library/usage_manager.py +123 -75

src/rotator_library/background_refresher.py CHANGED Viewed

@@ -8,28 +8,35 @@ from typing import TYPE_CHECKING, Optional
 if TYPE_CHECKING:
     from .client import RotatingClient
-lib_logger = logging.getLogger('rotator_library')
 class BackgroundRefresher:
     """
     A background task that periodically checks and refreshes OAuth tokens
     to ensure they remain valid.
     """
-    def __init__(self, client: 'RotatingClient'):
         try:
             interval_str = os.getenv("OAUTH_REFRESH_INTERVAL", "600")
             self._interval = int(interval_str)
         except ValueError:
-            lib_logger.warning(f"Invalid OAUTH_REFRESH_INTERVAL '{interval_str}'. Falling back to 600s.")
             self._interval = 600
         self._client = client
         self._task: Optional[asyncio.Task] = None
     def start(self):
         """Starts the background refresh task."""
         if self._task is None:
             self._task = asyncio.create_task(self._run())
-            lib_logger.info(f"Background token refresher started. Check interval: {self._interval} seconds.")
             # [NEW] Log if custom interval is set
     async def stop(self):
@@ -42,23 +49,107 @@ class BackgroundRefresher:
                 pass
             lib_logger.info("Background token refresher stopped.")
     async def _run(self):
         """The main loop for the background task."""
         while True:
             try:
-                #lib_logger.info("Running proactive token refresh check...")
                 oauth_configs = self._client.get_oauth_credentials()
                 for provider, paths in oauth_configs.items():
-                    provider_plugin = self._client._get_provider_instance(f"{provider}_oauth")
-                    if provider_plugin and hasattr(provider_plugin, 'proactively_refresh'):
                         for path in paths:
                             try:
                                 await provider_plugin.proactively_refresh(path)
                             except Exception as e:
-                                lib_logger.error(f"Error during proactive refresh for '{path}': {e}")
                 await asyncio.sleep(self._interval)
             except asyncio.CancelledError:
                 break
             except Exception as e:
-                lib_logger.error(f"Unexpected error in background refresher loop: {e}")

 if TYPE_CHECKING:
     from .client import RotatingClient
+lib_logger = logging.getLogger("rotator_library")
 class BackgroundRefresher:
     """
     A background task that periodically checks and refreshes OAuth tokens
     to ensure they remain valid.
     """
+    def __init__(self, client: "RotatingClient"):
         try:
             interval_str = os.getenv("OAUTH_REFRESH_INTERVAL", "600")
             self._interval = int(interval_str)
         except ValueError:
+            lib_logger.warning(
+                f"Invalid OAUTH_REFRESH_INTERVAL '{interval_str}'. Falling back to 600s."
+            )
             self._interval = 600
         self._client = client
         self._task: Optional[asyncio.Task] = None
+        self._initialized = False
     def start(self):
         """Starts the background refresh task."""
         if self._task is None:
             self._task = asyncio.create_task(self._run())
+            lib_logger.info(
+                f"Background token refresher started. Check interval: {self._interval} seconds."
+            )
             # [NEW] Log if custom interval is set
     async def stop(self):
                 pass
             lib_logger.info("Background token refresher stopped.")
+    async def _initialize_credentials(self):
+        """
+        Initialize all providers by loading credentials and persisted tier data.
+        Called once before the main refresh loop starts.
+        """
+        if self._initialized:
+            return
+        api_summary = {}  # provider -> count
+        oauth_summary = {}  # provider -> {"count": N, "tiers": {tier: count}}
+        all_credentials = self._client.all_credentials
+        oauth_providers = self._client.oauth_providers
+        for provider, credentials in all_credentials.items():
+            if not credentials:
+                continue
+            provider_plugin = self._client._get_provider_instance(provider)
+            # Call initialize_credentials if provider supports it
+            if provider_plugin and hasattr(provider_plugin, "initialize_credentials"):
+                try:
+                    await provider_plugin.initialize_credentials(credentials)
+                except Exception as e:
+                    lib_logger.error(
+                        f"Error initializing credentials for provider '{provider}': {e}"
+                    )
+            # Build summary based on provider type
+            if provider in oauth_providers:
+                tier_breakdown = {}
+                if provider_plugin and hasattr(
+                    provider_plugin, "get_credential_tier_name"
+                ):
+                    for cred in credentials:
+                        tier = provider_plugin.get_credential_tier_name(cred)
+                        if tier:
+                            tier_breakdown[tier] = tier_breakdown.get(tier, 0) + 1
+                oauth_summary[provider] = {
+                    "count": len(credentials),
+                    "tiers": tier_breakdown,
+                }
+            else:
+                api_summary[provider] = len(credentials)
+        # Log 3-line summary
+        total_providers = len(api_summary) + len(oauth_summary)
+        total_credentials = sum(api_summary.values()) + sum(
+            d["count"] for d in oauth_summary.values()
+        )
+        if total_providers > 0:
+            lib_logger.info(
+                f"Providers initialized: {total_providers} providers, {total_credentials} credentials"
+            )
+            # API providers line
+            if api_summary:
+                api_parts = [f"{p}:{c}" for p, c in sorted(api_summary.items())]
+                lib_logger.info(f"  API: {', '.join(api_parts)}")
+            # OAuth providers line with tier breakdown
+            if oauth_summary:
+                oauth_parts = []
+                for provider, data in sorted(oauth_summary.items()):
+                    if data["tiers"]:
+                        tier_str = ", ".join(
+                            f"{t}:{c}" for t, c in sorted(data["tiers"].items())
+                        )
+                        oauth_parts.append(f"{provider}:{data['count']} ({tier_str})")
+                    else:
+                        oauth_parts.append(f"{provider}:{data['count']}")
+                lib_logger.info(f"  OAuth: {', '.join(oauth_parts)}")
+        self._initialized = True
     async def _run(self):
         """The main loop for the background task."""
+        # Initialize credentials (load persisted tiers) before starting the refresh loop
+        await self._initialize_credentials()
         while True:
             try:
+                # lib_logger.info("Running proactive token refresh check...")
                 oauth_configs = self._client.get_oauth_credentials()
                 for provider, paths in oauth_configs.items():
+                    provider_plugin = self._client._get_provider_instance(provider)
+                    if provider_plugin and hasattr(
+                        provider_plugin, "proactively_refresh"
+                    ):
                         for path in paths:
                             try:
                                 await provider_plugin.proactively_refresh(path)
                             except Exception as e:
+                                lib_logger.error(
+                                    f"Error during proactive refresh for '{path}': {e}"
+                                )
                 await asyncio.sleep(self._interval)
             except asyncio.CancelledError:
                 break
             except Exception as e:
+                lib_logger.error(f"Unexpected error in background refresher loop: {e}")

src/rotator_library/client.py CHANGED Viewed

@@ -447,12 +447,23 @@ class RotatingClient:
         Args:
             provider_name: The name of the provider to get an instance for.
         Returns:
             Provider instance if credentials exist, None otherwise.
         """
         # Only initialize providers for which we have credentials
-        if provider_name not in self.all_credentials:
             lib_logger.debug(
                 f"Skipping provider '{provider_name}' initialization: no credentials configured"
             )
@@ -824,13 +835,20 @@ class RotatingClient:
                         f"Request will likely fail."
                     )
-        # Build priority map for usage_manager
         if provider_plugin and hasattr(provider_plugin, "get_credential_priority"):
             credential_priorities = {}
             for cred in credentials_for_provider:
                 priority = provider_plugin.get_credential_priority(cred)
                 if priority is not None:
                     credential_priorities[cred] = priority
             if credential_priorities:
                 lib_logger.debug(
@@ -883,6 +901,7 @@ class RotatingClient:
                     deadline=deadline,
                     max_concurrent=max_concurrent,
                     credential_priorities=credential_priorities,
                 )
                 key_acquired = True
                 tried_creds.add(current_cred)
@@ -1371,13 +1390,20 @@ class RotatingClient:
                         f"Request will likely fail."
                     )
-        # Build priority map for usage_manager
         if provider_plugin and hasattr(provider_plugin, "get_credential_priority"):
             credential_priorities = {}
             for cred in credentials_for_provider:
                 priority = provider_plugin.get_credential_priority(cred)
                 if priority is not None:
                     credential_priorities[cred] = priority
             if credential_priorities:
                 lib_logger.debug(
@@ -1433,6 +1459,7 @@ class RotatingClient:
                         deadline=deadline,
                         max_concurrent=max_concurrent,
                         credential_priorities=credential_priorities,
                     )
                     key_acquired = True
                     tried_creds.add(current_cred)

         Args:
             provider_name: The name of the provider to get an instance for.
+                          For OAuth providers, this may include "_oauth" suffix
+                          (e.g., "antigravity_oauth"), but credentials are stored
+                          under the base name (e.g., "antigravity").
         Returns:
             Provider instance if credentials exist, None otherwise.
         """
+        # For OAuth providers, credentials are stored under base name (without _oauth suffix)
+        # e.g., "antigravity_oauth" plugin → credentials under "antigravity"
+        credential_key = provider_name
+        if provider_name.endswith("_oauth"):
+            base_name = provider_name[:-6]  # Remove "_oauth"
+            if base_name in self.oauth_providers:
+                credential_key = base_name
         # Only initialize providers for which we have credentials
+        if credential_key not in self.all_credentials:
             lib_logger.debug(
                 f"Skipping provider '{provider_name}' initialization: no credentials configured"
             )
                         f"Request will likely fail."
                     )
+        # Build priority map and tier names map for usage_manager
+        credential_tier_names = None
         if provider_plugin and hasattr(provider_plugin, "get_credential_priority"):
             credential_priorities = {}
+            credential_tier_names = {}
             for cred in credentials_for_provider:
                 priority = provider_plugin.get_credential_priority(cred)
                 if priority is not None:
                     credential_priorities[cred] = priority
+                # Also get tier name for logging
+                if hasattr(provider_plugin, "get_credential_tier_name"):
+                    tier_name = provider_plugin.get_credential_tier_name(cred)
+                    if tier_name:
+                        credential_tier_names[cred] = tier_name
             if credential_priorities:
                 lib_logger.debug(
                     deadline=deadline,
                     max_concurrent=max_concurrent,
                     credential_priorities=credential_priorities,
+                    credential_tier_names=credential_tier_names,
                 )
                 key_acquired = True
                 tried_creds.add(current_cred)
                         f"Request will likely fail."
                     )
+        # Build priority map and tier names map for usage_manager
+        credential_tier_names = None
         if provider_plugin and hasattr(provider_plugin, "get_credential_priority"):
             credential_priorities = {}
+            credential_tier_names = {}
             for cred in credentials_for_provider:
                 priority = provider_plugin.get_credential_priority(cred)
                 if priority is not None:
                     credential_priorities[cred] = priority
+                # Also get tier name for logging
+                if hasattr(provider_plugin, "get_credential_tier_name"):
+                    tier_name = provider_plugin.get_credential_tier_name(cred)
+                    if tier_name:
+                        credential_tier_names[cred] = tier_name
             if credential_priorities:
                 lib_logger.debug(
                         deadline=deadline,
                         max_concurrent=max_concurrent,
                         credential_priorities=credential_priorities,
+                        credential_tier_names=credential_tier_names,
                     )
                     key_acquired = True
                     tried_creds.add(current_cred)

src/rotator_library/model_definitions.py CHANGED Viewed

@@ -24,10 +24,23 @@ class ModelDefinitions:
     - IFLOW_MODELS='{"glm-4.6": {}}' - dict format, uses "glm-4.6" as both name and ID
     - IFLOW_MODELS='{"custom-name": {"id": "actual-id"}}' - dict format with custom ID
     - IFLOW_MODELS='{"model": {"id": "id", "options": {"temperature": 0.7}}}' - with options
     """
     def __init__(self, config_path: Optional[str] = None):
-        """Initialize model definitions loader."""
         self.config_path = config_path
         self.definitions = {}
         self._load_definitions()
@@ -49,7 +62,11 @@ class ModelDefinitions:
                     # Handle array format: ["model-1", "model-2", "model-3"]
                     elif isinstance(models_json, list):
                         # Convert array to dict format with empty definitions
-                        models_dict = {model_name: {} for model_name in models_json if isinstance(model_name, str)}
                         self.definitions[provider_name] = models_dict
                         lib_logger.info(
                             f"Loaded {len(models_dict)} models for provider: {provider_name} (array format)"

     - IFLOW_MODELS='{"glm-4.6": {}}' - dict format, uses "glm-4.6" as both name and ID
     - IFLOW_MODELS='{"custom-name": {"id": "actual-id"}}' - dict format with custom ID
     - IFLOW_MODELS='{"model": {"id": "id", "options": {"temperature": 0.7}}}' - with options
+    This class is a singleton - instantiated once and shared across all providers.
     """
+    _instance: Optional["ModelDefinitions"] = None
+    _initialized: bool = False
+    def __new__(cls, config_path: Optional[str] = None):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
     def __init__(self, config_path: Optional[str] = None):
+        """Initialize model definitions loader (only runs once due to singleton)."""
+        if ModelDefinitions._initialized:
+            return
+        ModelDefinitions._initialized = True
         self.config_path = config_path
         self.definitions = {}
         self._load_definitions()
                     # Handle array format: ["model-1", "model-2", "model-3"]
                     elif isinstance(models_json, list):
                         # Convert array to dict format with empty definitions
+                        models_dict = {
+                            model_name: {}
+                            for model_name in models_json
+                            if isinstance(model_name, str)
+                        }
                         self.definitions[provider_name] = models_dict
                         lib_logger.info(
                             f"Loaded {len(models_dict)} models for provider: {provider_name} (array format)"

src/rotator_library/providers/antigravity_provider.py CHANGED Viewed

@@ -595,6 +595,11 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
             Priority level (1-10) or None if tier not yet discovered
         """
         tier = self.project_tier_cache.get(credential)
         if not tier:
             return None  # Not yet discovered
@@ -609,6 +614,60 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
         # Legacy and unknown get even lower
         return 10
     def get_model_tier_requirement(self, model: str) -> Optional[int]:
         """
         Returns the minimum priority tier required for a model.
@@ -622,6 +681,72 @@ class AntigravityProvider(AntigravityAuthBase, ProviderInterface):
         """
         return None
     # =========================================================================
     # MODEL UTILITIES
     # =========================================================================

             Priority level (1-10) or None if tier not yet discovered
         """
         tier = self.project_tier_cache.get(credential)
+        # Lazy load from file if not in cache
+        if not tier:
+            tier = self._load_tier_from_file(credential)
         if not tier:
             return None  # Not yet discovered
         # Legacy and unknown get even lower
         return 10
+    def _load_tier_from_file(self, credential_path: str) -> Optional[str]:
+        """
+        Load tier from credential file's _proxy_metadata and cache it.
+        This is used as a fallback when the tier isn't in the memory cache,
+        typically on first access before initialize_credentials() has run.
+        Args:
+            credential_path: Path to the credential file
+        Returns:
+            Tier string if found, None otherwise
+        """
+        # Skip env:// paths (environment-based credentials)
+        if self._parse_env_credential_path(credential_path) is not None:
+            return None
+        try:
+            with open(credential_path, "r") as f:
+                creds = json.load(f)
+            metadata = creds.get("_proxy_metadata", {})
+            tier = metadata.get("tier")
+            project_id = metadata.get("project_id")
+            if tier:
+                self.project_tier_cache[credential_path] = tier
+                lib_logger.debug(
+                    f"Lazy-loaded tier '{tier}' for credential: {Path(credential_path).name}"
+                )
+            if project_id and credential_path not in self.project_id_cache:
+                self.project_id_cache[credential_path] = project_id
+            return tier
+        except (FileNotFoundError, json.JSONDecodeError, KeyError) as e:
+            lib_logger.debug(f"Could not lazy-load tier from {credential_path}: {e}")
+            return None
+    def get_credential_tier_name(self, credential: str) -> Optional[str]:
+        """
+        Returns the human-readable tier name for a credential.
+        Args:
+            credential: The credential path
+        Returns:
+            Tier name string (e.g., "free-tier") or None if unknown
+        """
+        tier = self.project_tier_cache.get(credential)
+        if not tier:
+            tier = self._load_tier_from_file(credential)
+        return tier
     def get_model_tier_requirement(self, model: str) -> Optional[int]:
         """
         Returns the minimum priority tier required for a model.
         """
         return None
+    async def initialize_credentials(self, credential_paths: List[str]) -> None:
+        """
+        Load persisted tier information from credential files at startup.
+        This ensures all credential priorities are known before any API calls,
+        preventing unknown credentials from getting priority 999.
+        """
+        await self._load_persisted_tiers(credential_paths)
+    async def _load_persisted_tiers(
+        self, credential_paths: List[str]
+    ) -> Dict[str, str]:
+        """
+        Load persisted tier information from credential files into memory cache.
+        Args:
+            credential_paths: List of credential file paths
+        Returns:
+            Dict mapping credential path to tier name for logging purposes
+        """
+        loaded = {}
+        for path in credential_paths:
+            # Skip env:// paths (environment-based credentials)
+            if self._parse_env_credential_path(path) is not None:
+                continue
+            # Skip if already in cache
+            if path in self.project_tier_cache:
+                continue
+            try:
+                with open(path, "r") as f:
+                    creds = json.load(f)
+                metadata = creds.get("_proxy_metadata", {})
+                tier = metadata.get("tier")
+                project_id = metadata.get("project_id")
+                if tier:
+                    self.project_tier_cache[path] = tier
+                    loaded[path] = tier
+                    lib_logger.debug(
+                        f"Loaded persisted tier '{tier}' for credential: {Path(path).name}"
+                    )
+                if project_id:
+                    self.project_id_cache[path] = project_id
+            except (FileNotFoundError, json.JSONDecodeError, KeyError) as e:
+                lib_logger.debug(f"Could not load persisted tier from {path}: {e}")
+        if loaded:
+            # Log summary at debug level
+            tier_counts: Dict[str, int] = {}
+            for tier in loaded.values():
+                tier_counts[tier] = tier_counts.get(tier, 0) + 1
+            lib_logger.debug(
+                f"Antigravity: Loaded {len(loaded)} credential tiers from disk: "
+                + ", ".join(
+                    f"{tier}={count}" for tier, count in sorted(tier_counts.items())
+                )
+            )
+        return loaded
     # =========================================================================
     # MODEL UTILITIES
     # =========================================================================

src/rotator_library/providers/gemini_cli_provider.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

src/rotator_library/providers/provider_interface.py CHANGED Viewed

@@ -3,13 +3,15 @@ from typing import List, Dict, Any, Optional, AsyncGenerator, Union
 import httpx
 import litellm
 class ProviderInterface(ABC):
     """
     An interface for API provider-specific functionality, including model
     discovery and custom API call handling for non-standard providers.
     """
     skip_cost_calculation: bool = False
     @abstractmethod
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
@@ -32,28 +34,38 @@ class ProviderInterface(ABC):
         """
         return False
-    async def acompletion(self, client: httpx.AsyncClient, **kwargs) -> Union[litellm.ModelResponse, AsyncGenerator[litellm.ModelResponse, None]]:
         """
         Handles the entire completion call for non-standard providers.
         """
-        raise NotImplementedError(f"{self.__class__.__name__} does not implement custom acompletion.")
-    async def aembedding(self, client: httpx.AsyncClient, **kwargs) -> litellm.EmbeddingResponse:
         """Handles the entire embedding call for non-standard providers."""
-        raise NotImplementedError(f"{self.__class__.__name__} does not implement custom aembedding.")
-    def convert_safety_settings(self, settings: Dict[str, str]) -> Optional[List[Dict[str, Any]]]:
         """
         Converts a generic safety settings dictionary to the provider-specific format.
         Args:
             settings: A dictionary with generic harm categories and thresholds.
         Returns:
             A list of provider-specific safety setting objects or None.
         """
         return None
     # [NEW] Add new methods for OAuth providers
     async def get_auth_header(self, credential_identifier: str) -> Dict[str, str]:
         """
@@ -67,23 +79,23 @@ class ProviderInterface(ABC):
         Proactively refreshes a token if it's nearing expiry.
         """
         pass
     # [NEW] Credential Prioritization System
     def get_credential_priority(self, credential: str) -> Optional[int]:
         """
         Returns the priority level for a credential.
         Lower numbers = higher priority (1 is highest).
         Returns None if provider doesn't use priorities.
         This allows providers to auto-detect credential tiers (e.g., paid vs free)
         and ensure higher-tier credentials are always tried first.
         Args:
             credential: The credential identifier (API key or path)
         Returns:
             Priority level (1-10) or None if no priority system
         Example:
             For Gemini CLI:
             - Paid tier credentials: priority 1 (highest)
@@ -91,24 +103,53 @@ class ProviderInterface(ABC):
             - Unknown tier: priority 10 (lowest)
         """
         return None
     def get_model_tier_requirement(self, model: str) -> Optional[int]:
         """
         Returns the minimum priority tier required for a model.
         If a model requires priority 1, only credentials with priority <= 1 can use it.
         This allows providers to restrict certain models to specific credential tiers.
         For example, Gemini 3 models require paid-tier credentials.
         Args:
             model: The model name (with or without provider prefix)
         Returns:
             Minimum required priority level or None if no restrictions
         Example:
             For Gemini CLI:
             - gemini-3-*: requires priority 1 (paid tier only)
             - gemini-2.5-*: no restriction (None)
         """
-        return None

 import httpx
 import litellm
 class ProviderInterface(ABC):
     """
     An interface for API provider-specific functionality, including model
     discovery and custom API call handling for non-standard providers.
     """
     skip_cost_calculation: bool = False
     @abstractmethod
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
         """
         return False
+    async def acompletion(
+        self, client: httpx.AsyncClient, **kwargs
+    ) -> Union[litellm.ModelResponse, AsyncGenerator[litellm.ModelResponse, None]]:
         """
         Handles the entire completion call for non-standard providers.
         """
+        raise NotImplementedError(
+            f"{self.__class__.__name__} does not implement custom acompletion."
+        )
+    async def aembedding(
+        self, client: httpx.AsyncClient, **kwargs
+    ) -> litellm.EmbeddingResponse:
         """Handles the entire embedding call for non-standard providers."""
+        raise NotImplementedError(
+            f"{self.__class__.__name__} does not implement custom aembedding."
+        )
+    def convert_safety_settings(
+        self, settings: Dict[str, str]
+    ) -> Optional[List[Dict[str, Any]]]:
         """
         Converts a generic safety settings dictionary to the provider-specific format.
         Args:
             settings: A dictionary with generic harm categories and thresholds.
         Returns:
             A list of provider-specific safety setting objects or None.
         """
         return None
     # [NEW] Add new methods for OAuth providers
     async def get_auth_header(self, credential_identifier: str) -> Dict[str, str]:
         """
         Proactively refreshes a token if it's nearing expiry.
         """
         pass
     # [NEW] Credential Prioritization System
     def get_credential_priority(self, credential: str) -> Optional[int]:
         """
         Returns the priority level for a credential.
         Lower numbers = higher priority (1 is highest).
         Returns None if provider doesn't use priorities.
         This allows providers to auto-detect credential tiers (e.g., paid vs free)
         and ensure higher-tier credentials are always tried first.
         Args:
             credential: The credential identifier (API key or path)
         Returns:
             Priority level (1-10) or None if no priority system
         Example:
             For Gemini CLI:
             - Paid tier credentials: priority 1 (highest)
             - Unknown tier: priority 10 (lowest)
         """
         return None
     def get_model_tier_requirement(self, model: str) -> Optional[int]:
         """
         Returns the minimum priority tier required for a model.
         If a model requires priority 1, only credentials with priority <= 1 can use it.
         This allows providers to restrict certain models to specific credential tiers.
         For example, Gemini 3 models require paid-tier credentials.
         Args:
             model: The model name (with or without provider prefix)
         Returns:
             Minimum required priority level or None if no restrictions
         Example:
             For Gemini CLI:
             - gemini-3-*: requires priority 1 (paid tier only)
             - gemini-2.5-*: no restriction (None)
         """
+        return None
+    async def initialize_credentials(self, credential_paths: List[str]) -> None:
+        """
+        Called at startup to initialize provider with all available credentials.
+        Providers can override this to load cached tier data, discover priorities,
+        or perform any other initialization needed before the first API request.
+        This is called once during startup by the BackgroundRefresher before
+        the main refresh loop begins.
+        Args:
+            credential_paths: List of credential file paths for this provider
+        """
+        pass
+    def get_credential_tier_name(self, credential: str) -> Optional[str]:
+        """
+        Returns the human-readable tier name for a credential.
+        This is used for logging purposes to show which plan tier a credential belongs to.
+        Args:
+            credential: The credential identifier (API key or path)
+        Returns:
+            Tier name string (e.g., "free-tier", "paid-tier") or None if unknown
+        """
+        return None

src/rotator_library/usage_manager.py CHANGED Viewed

@@ -22,24 +22,24 @@ class UsageManager:
     """
     Manages usage statistics and cooldowns for API keys with asyncio-safe locking,
     asynchronous file I/O, lazy-loading mechanism, and weighted random credential rotation.
     The credential rotation strategy can be configured via the `rotation_tolerance` parameter:
     - **tolerance = 0.0**: Deterministic least-used selection. The credential with
       the lowest usage count is always selected. This provides predictable, perfectly balanced
       load distribution but may be vulnerable to fingerprinting.
     - **tolerance = 2.0 - 4.0 (default, recommended)**: Balanced weighted randomness. Credentials are selected
       randomly with weights biased toward less-used ones. Credentials within 2 uses of the
       maximum can still be selected with reasonable probability. This provides security through
       unpredictability while maintaining good load balance.
     - **tolerance = 5.0+**: High randomness. Even heavily-used credentials have significant
       selection probability. Useful for stress testing or maximum unpredictability, but may
       result in less balanced load distribution.
     The weight formula is: `weight = (max_usage - credential_usage) + tolerance + 1`
     This ensures lower-usage credentials are preferred while tolerance controls how much
     randomness is introduced into the selection process.
     """
@@ -52,7 +52,7 @@ class UsageManager:
     ):
         """
         Initialize the UsageManager.
         Args:
             file_path: Path to the usage data JSON file
             daily_reset_time_utc: Time in UTC when daily stats should reset (HH:MM format)
@@ -139,7 +139,9 @@ class UsageManager:
                     last_reset_dt is None
                     or last_reset_dt < reset_threshold_today <= now_utc
                 ):
-                    lib_logger.debug(f"Performing daily reset for key {mask_credential(key)}")
                     needs_saving = True
                     # Reset cooldowns
@@ -194,24 +196,20 @@ class UsageManager:
                     "models_in_use": {},  # Dict[model_name, concurrent_count]
                 }
-    def _select_weighted_random(
-        self,
-        candidates: List[tuple],
-        tolerance: float
-    ) -> str:
         """
         Selects a credential using weighted random selection based on usage counts.
         Args:
             candidates: List of (credential_id, usage_count) tuples
             tolerance: Tolerance value for weight calculation
         Returns:
             Selected credential ID
         Formula:
             weight = (max_usage - credential_usage) + tolerance + 1
         This formula ensures:
             - Lower usage = higher weight = higher selection probability
             - Tolerance adds variability: higher tolerance means more randomness
@@ -219,63 +217,66 @@ class UsageManager:
         """
         if not candidates:
             raise ValueError("Cannot select from empty candidate list")
         if len(candidates) == 1:
             return candidates[0][0]
         # Extract usage counts
         usage_counts = [usage for _, usage in candidates]
         max_usage = max(usage_counts)
         # Calculate weights using the formula: (max - current) + tolerance + 1
         weights = []
         for credential, usage in candidates:
             weight = (max_usage - usage) + tolerance + 1
             weights.append(weight)
         # Log weight distribution for debugging
         if lib_logger.isEnabledFor(logging.DEBUG):
             total_weight = sum(weights)
             weight_info = ", ".join(
-                f"{mask_credential(cred)}: w={w:.1f} ({w/total_weight*100:.1f}%)"
                 for (cred, _), w in zip(candidates, weights)
             )
-            #lib_logger.debug(f"Weighted selection candidates: {weight_info}")
         # Random selection with weights
         selected_credential = random.choices(
-            [cred for cred, _ in candidates],
-            weights=weights,
-            k=1
         )[0]
         return selected_credential
     async def acquire_key(
-        self, available_keys: List[str], model: str, deadline: float,
         max_concurrent: int = 1,
-        credential_priorities: Optional[Dict[str, int]] = None
     ) -> str:
         """
         Acquires the best available key using a tiered, model-aware locking strategy,
         respecting a global deadline and credential priorities.
         Priority Logic:
         - Groups credentials by priority level (1=highest, 2=lower, etc.)
         - Always tries highest priority (lowest number) first
         - Within same priority, sorts by usage count (load balancing)
         - Only moves to next priority if all higher-priority keys exhausted/busy
         Args:
             available_keys: List of credential identifiers to choose from
             model: Model name being requested
             deadline: Timestamp after which to stop trying
             max_concurrent: Maximum concurrent requests allowed per credential
             credential_priorities: Optional dict mapping credentials to priority levels (1=highest)
         Returns:
             Selected credential identifier
         Raises:
             NoAvailableKeysError: If no key could be acquired within the deadline
         """
@@ -294,16 +295,16 @@ class UsageManager:
                 async with self._data_lock:
                     for key in available_keys:
                         key_data = self._usage_data.get(key, {})
                         # Skip keys on cooldown
                         if (key_data.get("key_cooldown_until") or 0) > now or (
                             key_data.get("model_cooldowns", {}).get(model) or 0
                         ) > now:
                             continue
                         # Get priority for this key (default to 999 if not specified)
                         priority = credential_priorities.get(key, 999)
                         # Get usage count for load balancing within priority groups
                         usage_count = (
                             key_data.get("daily", {})
@@ -311,58 +312,75 @@ class UsageManager:
                             .get(model, {})
                             .get("success_count", 0)
                         )
                         # Group by priority
                         if priority not in priority_groups:
                             priority_groups[priority] = []
                         priority_groups[priority].append((key, usage_count))
                 # Try priority groups in order (1, 2, 3, ...)
                 sorted_priorities = sorted(priority_groups.keys())
                 for priority_level in sorted_priorities:
                     keys_in_priority = priority_groups[priority_level]
                     # Within each priority group, use existing tier1/tier2 logic
                     tier1_keys, tier2_keys = [], []
                     for key, usage_count in keys_in_priority:
                         key_state = self.key_states[key]
                         # Tier 1: Completely idle keys (preferred)
                         if not key_state["models_in_use"]:
                             tier1_keys.append((key, usage_count))
                         # Tier 2: Keys that can accept more concurrent requests
                         elif key_state["models_in_use"].get(model, 0) < max_concurrent:
                             tier2_keys.append((key, usage_count))
                     # Apply weighted random selection or deterministic sorting
-                    selection_method = "weighted-random" if self.rotation_tolerance > 0 else "least-used"
                     if self.rotation_tolerance > 0:
                         # Weighted random selection within each tier
                         if tier1_keys:
-                            selected_key = self._select_weighted_random(tier1_keys, self.rotation_tolerance)
-                            tier1_keys = [(k, u) for k, u in tier1_keys if k == selected_key]
                         if tier2_keys:
-                            selected_key = self._select_weighted_random(tier2_keys, self.rotation_tolerance)
-                            tier2_keys = [(k, u) for k, u in tier2_keys if k == selected_key]
                     else:
                         # Deterministic: sort by usage within each tier
                         tier1_keys.sort(key=lambda x: x[1])
                         tier2_keys.sort(key=lambda x: x[1])
                     # Try to acquire from Tier 1 first
                     for key, usage in tier1_keys:
                         state = self.key_states[key]
                         async with state["lock"]:
                             if not state["models_in_use"]:
                                 state["models_in_use"][model] = 1
                                 lib_logger.info(
-                                    f"Acquired Priority-{priority_level} Tier-1 key {mask_credential(key)} for model {model} "
-                                    f"(selection: {selection_method}, usage: {usage})"
                                 )
                                 return key
                     # Then try Tier 2
                     for key, usage in tier2_keys:
                         state = self.key_states[key]
@@ -370,35 +388,40 @@ class UsageManager:
                             current_count = state["models_in_use"].get(model, 0)
                             if current_count < max_concurrent:
                                 state["models_in_use"][model] = current_count + 1
                                 lib_logger.info(
-                                    f"Acquired Priority-{priority_level} Tier-2 key {mask_credential(key)} for model {model} "
-                                    f"(selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{max_concurrent}, usage: {usage})"
                                 )
                                 return key
                 # If we get here, all priority groups were exhausted but keys might become available
                 # Collect all keys across all priorities for waiting
                 all_potential_keys = []
                 for keys_list in priority_groups.values():
                     all_potential_keys.extend(keys_list)
                 if not all_potential_keys:
                     lib_logger.warning(
                         "No keys are eligible (all on cooldown or filtered out). Waiting before re-evaluating."
                     )
                     await asyncio.sleep(1)
                     continue
                 # Wait for the highest priority key with lowest usage
                 best_priority = min(priority_groups.keys())
                 best_priority_keys = priority_groups[best_priority]
                 best_wait_key = min(best_priority_keys, key=lambda x: x[1])[0]
                 wait_condition = self.key_states[best_wait_key]["condition"]
                 lib_logger.info(
                     f"All Priority-{best_priority} keys are busy. Waiting for highest priority credential to become available..."
                 )
             else:
                 # Original logic when no priorities specified
                 tier1_keys, tier2_keys = [], []
@@ -430,16 +453,26 @@ class UsageManager:
                             tier2_keys.append((key, usage_count))
                 # Apply weighted random selection or deterministic sorting
-                selection_method = "weighted-random" if self.rotation_tolerance > 0 else "least-used"
                 if self.rotation_tolerance > 0:
                     # Weighted random selection within each tier
                     if tier1_keys:
-                        selected_key = self._select_weighted_random(tier1_keys, self.rotation_tolerance)
-                        tier1_keys = [(k, u) for k, u in tier1_keys if k == selected_key]
                     if tier2_keys:
-                        selected_key = self._select_weighted_random(tier2_keys, self.rotation_tolerance)
-                        tier2_keys = [(k, u) for k, u in tier2_keys if k == selected_key]
                 else:
                     # Deterministic: sort by usage within each tier
                     tier1_keys.sort(key=lambda x: x[1])
@@ -451,9 +484,15 @@ class UsageManager:
                     async with state["lock"]:
                         if not state["models_in_use"]:
                             state["models_in_use"][model] = 1
                             lib_logger.info(
-                                f"Acquired Tier 1 key {mask_credential(key)} for model {model} "
-                                f"(selection: {selection_method}, usage: {usage})"
                             )
                             return key
@@ -464,9 +503,15 @@ class UsageManager:
                         current_count = state["models_in_use"].get(model, 0)
                         if current_count < max_concurrent:
                             state["models_in_use"][model] = current_count + 1
                             lib_logger.info(
-                                f"Acquired Tier 2 key {mask_credential(key)} for model {model} "
-                                f"(selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{max_concurrent}, usage: {usage})"
                             )
                             return key
@@ -506,8 +551,6 @@ class UsageManager:
             f"Could not acquire a key for model {model} within the global time budget."
         )
     async def release_key(self, key: str, model: str):
         """Releases a key's lock for a specific model and notifies waiting tasks."""
         if key not in self.key_states:
@@ -640,8 +683,11 @@ class UsageManager:
         await self._save_usage()
     async def record_failure(
-        self, key: str, model: str, classified_error: ClassifiedError,
-        increment_consecutive_failures: bool = True
     ):
         """Records a failure and applies cooldowns based on an escalating backoff strategy.
@@ -705,7 +751,9 @@ class UsageManager:
                 # If cooldown wasn't set by specific error type, use escalating backoff
                 if cooldown_seconds is None:
                     backoff_tiers = {1: 10, 2: 30, 3: 60, 4: 120}
-                    cooldown_seconds = backoff_tiers.get(count, 7200)  # Default to 2 hours for "spent" keys
                     lib_logger.warning(
                         f"Failure #{count} for key {mask_credential(key)} with model {model}. "
                         f"Error type: {classified_error.error_type}"

     """
     Manages usage statistics and cooldowns for API keys with asyncio-safe locking,
     asynchronous file I/O, lazy-loading mechanism, and weighted random credential rotation.
     The credential rotation strategy can be configured via the `rotation_tolerance` parameter:
     - **tolerance = 0.0**: Deterministic least-used selection. The credential with
       the lowest usage count is always selected. This provides predictable, perfectly balanced
       load distribution but may be vulnerable to fingerprinting.
     - **tolerance = 2.0 - 4.0 (default, recommended)**: Balanced weighted randomness. Credentials are selected
       randomly with weights biased toward less-used ones. Credentials within 2 uses of the
       maximum can still be selected with reasonable probability. This provides security through
       unpredictability while maintaining good load balance.
     - **tolerance = 5.0+**: High randomness. Even heavily-used credentials have significant
       selection probability. Useful for stress testing or maximum unpredictability, but may
       result in less balanced load distribution.
     The weight formula is: `weight = (max_usage - credential_usage) + tolerance + 1`
     This ensures lower-usage credentials are preferred while tolerance controls how much
     randomness is introduced into the selection process.
     """
     ):
         """
         Initialize the UsageManager.
         Args:
             file_path: Path to the usage data JSON file
             daily_reset_time_utc: Time in UTC when daily stats should reset (HH:MM format)
                     last_reset_dt is None
                     or last_reset_dt < reset_threshold_today <= now_utc
                 ):
+                    lib_logger.debug(
+                        f"Performing daily reset for key {mask_credential(key)}"
+                    )
                     needs_saving = True
                     # Reset cooldowns
                     "models_in_use": {},  # Dict[model_name, concurrent_count]
                 }
+    def _select_weighted_random(self, candidates: List[tuple], tolerance: float) -> str:
         """
         Selects a credential using weighted random selection based on usage counts.
         Args:
             candidates: List of (credential_id, usage_count) tuples
             tolerance: Tolerance value for weight calculation
         Returns:
             Selected credential ID
         Formula:
             weight = (max_usage - credential_usage) + tolerance + 1
         This formula ensures:
             - Lower usage = higher weight = higher selection probability
             - Tolerance adds variability: higher tolerance means more randomness
         """
         if not candidates:
             raise ValueError("Cannot select from empty candidate list")
         if len(candidates) == 1:
             return candidates[0][0]
         # Extract usage counts
         usage_counts = [usage for _, usage in candidates]
         max_usage = max(usage_counts)
         # Calculate weights using the formula: (max - current) + tolerance + 1
         weights = []
         for credential, usage in candidates:
             weight = (max_usage - usage) + tolerance + 1
             weights.append(weight)
         # Log weight distribution for debugging
         if lib_logger.isEnabledFor(logging.DEBUG):
             total_weight = sum(weights)
             weight_info = ", ".join(
+                f"{mask_credential(cred)}: w={w:.1f} ({w / total_weight * 100:.1f}%)"
                 for (cred, _), w in zip(candidates, weights)
             )
+            # lib_logger.debug(f"Weighted selection candidates: {weight_info}")
         # Random selection with weights
         selected_credential = random.choices(
+            [cred for cred, _ in candidates], weights=weights, k=1
         )[0]
         return selected_credential
     async def acquire_key(
+        self,
+        available_keys: List[str],
+        model: str,
+        deadline: float,
         max_concurrent: int = 1,
+        credential_priorities: Optional[Dict[str, int]] = None,
+        credential_tier_names: Optional[Dict[str, str]] = None,
     ) -> str:
         """
         Acquires the best available key using a tiered, model-aware locking strategy,
         respecting a global deadline and credential priorities.
         Priority Logic:
         - Groups credentials by priority level (1=highest, 2=lower, etc.)
         - Always tries highest priority (lowest number) first
         - Within same priority, sorts by usage count (load balancing)
         - Only moves to next priority if all higher-priority keys exhausted/busy
         Args:
             available_keys: List of credential identifiers to choose from
             model: Model name being requested
             deadline: Timestamp after which to stop trying
             max_concurrent: Maximum concurrent requests allowed per credential
             credential_priorities: Optional dict mapping credentials to priority levels (1=highest)
+            credential_tier_names: Optional dict mapping credentials to tier names (for logging)
         Returns:
             Selected credential identifier
         Raises:
             NoAvailableKeysError: If no key could be acquired within the deadline
         """
                 async with self._data_lock:
                     for key in available_keys:
                         key_data = self._usage_data.get(key, {})
                         # Skip keys on cooldown
                         if (key_data.get("key_cooldown_until") or 0) > now or (
                             key_data.get("model_cooldowns", {}).get(model) or 0
                         ) > now:
                             continue
                         # Get priority for this key (default to 999 if not specified)
                         priority = credential_priorities.get(key, 999)
                         # Get usage count for load balancing within priority groups
                         usage_count = (
                             key_data.get("daily", {})
                             .get(model, {})
                             .get("success_count", 0)
                         )
                         # Group by priority
                         if priority not in priority_groups:
                             priority_groups[priority] = []
                         priority_groups[priority].append((key, usage_count))
                 # Try priority groups in order (1, 2, 3, ...)
                 sorted_priorities = sorted(priority_groups.keys())
                 for priority_level in sorted_priorities:
                     keys_in_priority = priority_groups[priority_level]
                     # Within each priority group, use existing tier1/tier2 logic
                     tier1_keys, tier2_keys = [], []
                     for key, usage_count in keys_in_priority:
                         key_state = self.key_states[key]
                         # Tier 1: Completely idle keys (preferred)
                         if not key_state["models_in_use"]:
                             tier1_keys.append((key, usage_count))
                         # Tier 2: Keys that can accept more concurrent requests
                         elif key_state["models_in_use"].get(model, 0) < max_concurrent:
                             tier2_keys.append((key, usage_count))
                     # Apply weighted random selection or deterministic sorting
+                    selection_method = (
+                        "weighted-random"
+                        if self.rotation_tolerance > 0
+                        else "least-used"
+                    )
                     if self.rotation_tolerance > 0:
                         # Weighted random selection within each tier
                         if tier1_keys:
+                            selected_key = self._select_weighted_random(
+                                tier1_keys, self.rotation_tolerance
+                            )
+                            tier1_keys = [
+                                (k, u) for k, u in tier1_keys if k == selected_key
+                            ]
                         if tier2_keys:
+                            selected_key = self._select_weighted_random(
+                                tier2_keys, self.rotation_tolerance
+                            )
+                            tier2_keys = [
+                                (k, u) for k, u in tier2_keys if k == selected_key
+                            ]
                     else:
                         # Deterministic: sort by usage within each tier
                         tier1_keys.sort(key=lambda x: x[1])
                         tier2_keys.sort(key=lambda x: x[1])
                     # Try to acquire from Tier 1 first
                     for key, usage in tier1_keys:
                         state = self.key_states[key]
                         async with state["lock"]:
                             if not state["models_in_use"]:
                                 state["models_in_use"][model] = 1
+                                tier_name = (
+                                    credential_tier_names.get(key, "unknown")
+                                    if credential_tier_names
+                                    else "unknown"
+                                )
                                 lib_logger.info(
+                                    f"Acquired key {mask_credential(key)} for model {model} "
+                                    f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, usage: {usage})"
                                 )
                                 return key
                     # Then try Tier 2
                     for key, usage in tier2_keys:
                         state = self.key_states[key]
                             current_count = state["models_in_use"].get(model, 0)
                             if current_count < max_concurrent:
                                 state["models_in_use"][model] = current_count + 1
+                                tier_name = (
+                                    credential_tier_names.get(key, "unknown")
+                                    if credential_tier_names
+                                    else "unknown"
+                                )
                                 lib_logger.info(
+                                    f"Acquired key {mask_credential(key)} for model {model} "
+                                    f"(tier: {tier_name}, priority: {priority_level}, selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{max_concurrent}, usage: {usage})"
                                 )
                                 return key
                 # If we get here, all priority groups were exhausted but keys might become available
                 # Collect all keys across all priorities for waiting
                 all_potential_keys = []
                 for keys_list in priority_groups.values():
                     all_potential_keys.extend(keys_list)
                 if not all_potential_keys:
                     lib_logger.warning(
                         "No keys are eligible (all on cooldown or filtered out). Waiting before re-evaluating."
                     )
                     await asyncio.sleep(1)
                     continue
                 # Wait for the highest priority key with lowest usage
                 best_priority = min(priority_groups.keys())
                 best_priority_keys = priority_groups[best_priority]
                 best_wait_key = min(best_priority_keys, key=lambda x: x[1])[0]
                 wait_condition = self.key_states[best_wait_key]["condition"]
                 lib_logger.info(
                     f"All Priority-{best_priority} keys are busy. Waiting for highest priority credential to become available..."
                 )
             else:
                 # Original logic when no priorities specified
                 tier1_keys, tier2_keys = [], []
                             tier2_keys.append((key, usage_count))
                 # Apply weighted random selection or deterministic sorting
+                selection_method = (
+                    "weighted-random" if self.rotation_tolerance > 0 else "least-used"
+                )
                 if self.rotation_tolerance > 0:
                     # Weighted random selection within each tier
                     if tier1_keys:
+                        selected_key = self._select_weighted_random(
+                            tier1_keys, self.rotation_tolerance
+                        )
+                        tier1_keys = [
+                            (k, u) for k, u in tier1_keys if k == selected_key
+                        ]
                     if tier2_keys:
+                        selected_key = self._select_weighted_random(
+                            tier2_keys, self.rotation_tolerance
+                        )
+                        tier2_keys = [
+                            (k, u) for k, u in tier2_keys if k == selected_key
+                        ]
                 else:
                     # Deterministic: sort by usage within each tier
                     tier1_keys.sort(key=lambda x: x[1])
                     async with state["lock"]:
                         if not state["models_in_use"]:
                             state["models_in_use"][model] = 1
+                            tier_name = (
+                                credential_tier_names.get(key)
+                                if credential_tier_names
+                                else None
+                            )
+                            tier_info = f"tier: {tier_name}, " if tier_name else ""
                             lib_logger.info(
+                                f"Acquired key {mask_credential(key)} for model {model} "
+                                f"({tier_info}selection: {selection_method}, usage: {usage})"
                             )
                             return key
                         current_count = state["models_in_use"].get(model, 0)
                         if current_count < max_concurrent:
                             state["models_in_use"][model] = current_count + 1
+                            tier_name = (
+                                credential_tier_names.get(key)
+                                if credential_tier_names
+                                else None
+                            )
+                            tier_info = f"tier: {tier_name}, " if tier_name else ""
                             lib_logger.info(
+                                f"Acquired key {mask_credential(key)} for model {model} "
+                                f"({tier_info}selection: {selection_method}, concurrent: {state['models_in_use'][model]}/{max_concurrent}, usage: {usage})"
                             )
                             return key
             f"Could not acquire a key for model {model} within the global time budget."
         )
     async def release_key(self, key: str, model: str):
         """Releases a key's lock for a specific model and notifies waiting tasks."""
         if key not in self.key_states:
         await self._save_usage()
     async def record_failure(
+        self,
+        key: str,
+        model: str,
+        classified_error: ClassifiedError,
+        increment_consecutive_failures: bool = True,
     ):
         """Records a failure and applies cooldowns based on an escalating backoff strategy.
                 # If cooldown wasn't set by specific error type, use escalating backoff
                 if cooldown_seconds is None:
                     backoff_tiers = {1: 10, 2: 30, 3: 60, 4: 120}
+                    cooldown_seconds = backoff_tiers.get(
+                        count, 7200
+                    )  # Default to 2 hours for "spent" keys
                     lib_logger.warning(
                         f"Failure #{count} for key {mask_credential(key)} with model {model}. "
                         f"Error type: {classified_error.error_type}"