Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Nov 14, 2025

Commit

39e01ca

1 Parent(s): 225c46e

feat(core): enable static model configuration and LiteLLM compatibility for custom providers

introduce static model definitions and runtime parameter conversion for custom endpoints.

This change significantly improves compatibility with self-hosted or dynamically configured OpenAI-compatible APIs:

- **Model Definitions:** Adds a new `ModelDefinitions` utility to load static model configurations (IDs, options like `reasoning_effort`) from environment variables (e.g., `PROVIDER_MODELS`).
- **Dynamic Providers:** Extends provider discovery to dynamically register new providers whenever an `_API_BASE` environment variable is detected.
- **Client Conversion:** Implements `_convert_model_params_for_litellm` in the RotatingClient to rewrite the model argument (to `openai/{model_id}`) and inject the necessary `api_base` and `custom_llm_provider` kwargs right before calling LiteLLM.
- **Option Application:** Ensures that model options loaded from the static definitions are merged into the LiteLLM request arguments.
- **Cost Management:** Configures custom providers to skip cost calculation in the `UsageManager`.

Files changed (6) hide show

src/rotator_library/client.py +63 -2
src/rotator_library/error_handler.py +1 -1
src/rotator_library/model_definitions.py +68 -0
src/rotator_library/providers/__init__.py +99 -3
src/rotator_library/providers/openai_compatible_provider.py +56 -9
src/rotator_library/usage_manager.py +188 -74

src/rotator_library/client.py CHANGED Viewed

@@ -297,6 +297,32 @@ class RotatingClient:
         return kwargs
     def get_oauth_credentials(self) -> Dict[str, List[str]]:
         return self.oauth_credentials
@@ -566,6 +592,18 @@ class RotatingClient:
                     }
                 provider_plugin = self._get_provider_instance(provider)
                 if provider_plugin and provider_plugin.has_custom_logic():
                     lib_logger.debug(
                         f"Provider '{provider}' has custom logic. Delegating call."
@@ -666,8 +704,13 @@ class RotatingClient:
                                             f"Pre-request callback failed but abort_on_callback_error is False. Proceeding with request. Error: {e}"
                                         )
                             response = await api_call(
-                                **litellm_kwargs,
                                 logger_fn=self._litellm_logger_callback,
                             )
@@ -912,6 +955,19 @@ class RotatingClient:
                         }
                     provider_plugin = self._get_provider_instance(provider)
                     if provider_plugin and provider_plugin.has_custom_logic():
                         lib_logger.debug(
                             f"Provider '{provider}' has custom logic. Delegating call."
@@ -1121,8 +1177,13 @@ class RotatingClient:
                                         )
                             # lib_logger.info(f"DEBUG: litellm.acompletion kwargs: {litellm_kwargs}")
                             response = await litellm.acompletion(
-                                **litellm_kwargs,
                                 logger_fn=self._litellm_logger_callback,
                             )

         return kwargs
+    def _convert_model_params_for_litellm(self, **kwargs) -> Dict[str, Any]:
+        """
+        Converts model parameters specifically for LiteLLM calls.
+        This is called right before calling LiteLLM to handle custom providers.
+        """
+        model = kwargs.get("model")
+        if not model:
+            return kwargs
+        provider = model.split("/")[0]
+        # Handle custom OpenAI-compatible providers
+        # Check if this is a custom provider by looking for API_BASE environment variable
+        import os
+        api_base_env = f"{provider.upper()}_API_BASE"
+        if os.getenv(api_base_env):
+            # For custom providers, tell LiteLLM to use openai provider with custom model name
+            # This preserves original model name in logs but converts for LiteLLM
+            kwargs = kwargs.copy()  # Don't modify original
+            kwargs["model"] = f"openai/{model.split('/', 1)[1]}"
+            kwargs["api_base"] = os.getenv(api_base_env).rstrip("/")
+            kwargs["custom_llm_provider"] = "openai"
+        return kwargs
     def get_oauth_credentials(self) -> Dict[str, List[str]]:
         return self.oauth_credentials
                     }
                 provider_plugin = self._get_provider_instance(provider)
+                # Apply model-specific options for custom providers
+                if provider_plugin and hasattr(provider_plugin, "get_model_options"):
+                    model_options = provider_plugin.get_model_options(model)
+                    if model_options:
+                        # Merge model options into litellm_kwargs
+                        for key, value in model_options.items():
+                            if key == "reasoning_effort":
+                                litellm_kwargs["reasoning_effort"] = value
+                            elif key not in litellm_kwargs:
+                                litellm_kwargs[key] = value
                 if provider_plugin and provider_plugin.has_custom_logic():
                     lib_logger.debug(
                         f"Provider '{provider}' has custom logic. Delegating call."
                                             f"Pre-request callback failed but abort_on_callback_error is False. Proceeding with request. Error: {e}"
                                         )
+                            # Convert model parameters for custom providers right before LiteLLM call
+                            final_kwargs = self._convert_model_params_for_litellm(
+                                **litellm_kwargs
+                            )
                             response = await api_call(
+                                **final_kwargs,
                                 logger_fn=self._litellm_logger_callback,
                             )
                         }
                     provider_plugin = self._get_provider_instance(provider)
+                    # Apply model-specific options for custom providers
+                    if provider_plugin and hasattr(
+                        provider_plugin, "get_model_options"
+                    ):
+                        model_options = provider_plugin.get_model_options(model)
+                        if model_options:
+                            # Merge model options into litellm_kwargs
+                            for key, value in model_options.items():
+                                if key == "reasoning_effort":
+                                    litellm_kwargs["reasoning_effort"] = value
+                                elif key not in litellm_kwargs:
+                                    litellm_kwargs[key] = value
                     if provider_plugin and provider_plugin.has_custom_logic():
                         lib_logger.debug(
                             f"Provider '{provider}' has custom logic. Delegating call."
                                         )
                             # lib_logger.info(f"DEBUG: litellm.acompletion kwargs: {litellm_kwargs}")
+                            # Convert model parameters for custom providers right before LiteLLM call
+                            final_kwargs = self._convert_model_params_for_litellm(
+                                **litellm_kwargs
+                            )
                             response = await litellm.acompletion(
+                                **final_kwargs,
                                 logger_fn=self._litellm_logger_callback,
                             )

src/rotator_library/error_handler.py CHANGED Viewed

@@ -269,7 +269,7 @@ class AllProviders:
                 api_base = os.getenv(env_var)
                 if api_base:
                     self.providers[provider_name] = {
-                        "api_base": api_base.rstrip("/") if api_base else None,
                         "model_prefix": None,  # No prefix for custom providers
                     }

                 api_base = os.getenv(env_var)
                 if api_base:
                     self.providers[provider_name] = {
+                        "api_base": api_base.rstrip("/") if api_base else "",
                         "model_prefix": None,  # No prefix for custom providers
                     }

src/rotator_library/model_definitions.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import json
+import os
+import logging
+from typing import Dict, Any, Optional
+lib_logger = logging.getLogger("rotator_library")
+lib_logger.propagate = False
+if not lib_logger.handlers:
+    lib_logger.addHandler(logging.NullHandler())
+class ModelDefinitions:
+    """
+    Simple model definitions loader from environment variables.
+    Format: PROVIDER_MODELS={"model1": {"id": "id1"}, "model2": {"id": "id2", "options": {"reasoning_effort": "high"}}}
+    """
+    def __init__(self, config_path: Optional[str] = None):
+        """Initialize model definitions loader."""
+        self.config_path = config_path
+        self.definitions = {}
+        self._load_definitions()
+    def _load_definitions(self):
+        """Load model definitions from environment variables."""
+        for env_var, env_value in os.environ.items():
+            if env_var.endswith("_MODELS"):
+                provider_name = env_var[:-7].lower()  # Remove "_MODELS" (7 characters)
+                try:
+                    models_json = json.loads(env_value)
+                    if isinstance(models_json, dict):
+                        self.definitions[provider_name] = models_json
+                        lib_logger.info(
+                            f"Loaded {len(models_json)} models for provider: {provider_name}"
+                        )
+                except (json.JSONDecodeError, TypeError) as e:
+                    lib_logger.warning(f"Invalid JSON in {env_var}: {e}")
+    def get_provider_models(self, provider_name: str) -> Dict[str, Any]:
+        """Get all models for a provider."""
+        return self.definitions.get(provider_name, {})
+    def get_model_definition(
+        self, provider_name: str, model_name: str
+    ) -> Optional[Dict[str, Any]]:
+        """Get a specific model definition."""
+        provider_models = self.get_provider_models(provider_name)
+        return provider_models.get(model_name)
+    def get_model_options(self, provider_name: str, model_name: str) -> Dict[str, Any]:
+        """Get options for a specific model."""
+        model_def = self.get_model_definition(provider_name, model_name)
+        return model_def.get("options", {}) if model_def else {}
+    def get_model_id(self, provider_name: str, model_name: str) -> Optional[str]:
+        """Get model ID for a specific model."""
+        model_def = self.get_model_definition(provider_name, model_name)
+        return model_def.get("id") if model_def else None
+    def get_all_provider_models(self, provider_name: str) -> list:
+        """Get all model names with provider prefix."""
+        provider_models = self.get_provider_models(provider_name)
+        return [f"{provider_name}/{model}" for model in provider_models.keys()]
+    def reload_definitions(self):
+        """Reload model definitions from environment variables."""
+        self.definitions.clear()
+        self._load_definitions()

src/rotator_library/providers/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import importlib
 import pkgutil
 from typing import Dict, Type
 from .provider_interface import ProviderInterface
@@ -8,31 +9,126 @@ from .provider_interface import ProviderInterface
 # Dictionary to hold discovered provider classes, mapping provider name to class
 PROVIDER_PLUGINS: Dict[str, Type[ProviderInterface]] = {}
 def _register_providers():
     """
     Dynamically discovers and imports provider plugins from this directory.
     """
     package_path = __path__
     package_name = __name__
     for _, module_name, _ in pkgutil.iter_modules(package_path):
         # Construct the full module path
         full_module_path = f"{package_name}.{module_name}"
         # Import the module
         module = importlib.import_module(full_module_path)
         # Look for a class that inherits from ProviderInterface
         for attribute_name in dir(module):
             attribute = getattr(module, attribute_name)
-            if isinstance(attribute, type) and issubclass(attribute, ProviderInterface) and attribute is not ProviderInterface:
                 # Derives 'gemini_cli' from 'gemini_cli_provider.py'
                 # Remap 'nvidia' to 'nvidia_nim' to align with litellm's provider name
                 provider_name = module_name.replace("_provider", "")
                 if provider_name == "nvidia":
                     provider_name = "nvidia_nim"
                 PROVIDER_PLUGINS[provider_name] = attribute
-                #print(f"Registered provider: {provider_name}")
 # Discover and register providers when the package is imported
 _register_providers()

 import importlib
 import pkgutil
+import os
 from typing import Dict, Type
 from .provider_interface import ProviderInterface
 # Dictionary to hold discovered provider classes, mapping provider name to class
 PROVIDER_PLUGINS: Dict[str, Type[ProviderInterface]] = {}
+class DynamicOpenAICompatibleProvider:
+    """
+    Dynamic provider class for custom OpenAI-compatible providers.
+    Created at runtime for providers with API_BASE environment variables.
+    """
+    def __init__(self, provider_name: str):
+        self.provider_name = provider_name
+        # Get API base URL from environment
+        self.api_base = os.getenv(f"{provider_name.upper()}_API_BASE")
+        if not self.api_base:
+            raise ValueError(
+                f"Environment variable {provider_name.upper()}_API_BASE is required for OpenAI-compatible provider"
+            )
+        # Import model definitions
+        from ..model_definitions import ModelDefinitions
+        self.model_definitions = ModelDefinitions()
+    def skip_cost_calculation(self) -> bool:
+        """Custom providers should skip cost calculation."""
+        return True
+    def get_models(self, api_key: str, client):
+        """Delegate to OpenAI-compatible provider implementation."""
+        from .openai_compatible_provider import OpenAICompatibleProvider
+        # Create temporary instance to reuse logic
+        temp_provider = OpenAICompatibleProvider(self.provider_name)
+        return temp_provider.get_models(api_key, client)
+    def get_model_options(self, model_name: str) -> Dict[str, any]:
+        """Get model options from static definitions."""
+        # Extract model name without provider prefix if present
+        if "/" in model_name:
+            model_name = model_name.split("/")[-1]
+        return self.model_definitions.get_model_options(self.provider_name, model_name)
+    def has_custom_logic(self) -> bool:
+        """Returns False since we want to use the standard litellm flow."""
+        return False
+    def get_auth_header(self, credential_identifier: str) -> Dict[str, str]:
+        """Returns the standard Bearer token header."""
+        return {"Authorization": f"Bearer {credential_identifier}"}
 def _register_providers():
     """
     Dynamically discovers and imports provider plugins from this directory.
+    Also creates dynamic plugins for custom OpenAI-compatible providers.
     """
     package_path = __path__
     package_name = __name__
+    # First, register file-based providers
     for _, module_name, _ in pkgutil.iter_modules(package_path):
         # Construct the full module path
         full_module_path = f"{package_name}.{module_name}"
         # Import the module
         module = importlib.import_module(full_module_path)
         # Look for a class that inherits from ProviderInterface
         for attribute_name in dir(module):
             attribute = getattr(module, attribute_name)
+            if (
+                isinstance(attribute, type)
+                and issubclass(attribute, ProviderInterface)
+                and attribute is not ProviderInterface
+            ):
                 # Derives 'gemini_cli' from 'gemini_cli_provider.py'
                 # Remap 'nvidia' to 'nvidia_nim' to align with litellm's provider name
                 provider_name = module_name.replace("_provider", "")
                 if provider_name == "nvidia":
                     provider_name = "nvidia_nim"
                 PROVIDER_PLUGINS[provider_name] = attribute
+                # print(f"Registered provider: {provider_name}")
+    # Then, create dynamic plugins for custom OpenAI-compatible providers
+    # Load environment variables to find custom providers
+    from dotenv import load_dotenv
+    load_dotenv()
+    for env_var in os.environ:
+        if env_var.endswith("_API_BASE"):
+            provider_name = env_var[:-9].lower()  # Remove '_API_BASE' suffix
+            # Skip known providers that already have file-based plugins
+            if provider_name in [
+                "openai",
+                "anthropic",
+                "google",
+                "gemini",
+                "nvidia",
+                "mistral",
+                "cohere",
+                "groq",
+                "openrouter",
+                "chutes",
+            ]:
+                continue
+            # Create a dynamic plugin class
+            def create_plugin_class(name):
+                class DynamicPlugin(DynamicOpenAICompatibleProvider):
+                    def __init__(self):
+                        super().__init__(name)
+                return DynamicPlugin
+            # Create and register the plugin class
+            plugin_class = create_plugin_class(provider_name)
+            PROVIDER_PLUGINS[provider_name] = plugin_class
+            # print(f"Registered dynamic provider: {provider_name}")
 # Discover and register providers when the package is imported
 _register_providers()

src/rotator_library/providers/openai_compatible_provider.py CHANGED Viewed

@@ -3,6 +3,7 @@ import httpx
 import logging
 from typing import List, Dict, Any, Optional
 from .provider_interface import ProviderInterface
 lib_logger = logging.getLogger("rotator_library")
 lib_logger.propagate = False
@@ -15,7 +16,11 @@ class OpenAICompatibleProvider(ProviderInterface):
     Generic provider implementation for any OpenAI-compatible API.
     This provider can be configured via environment variables to support
     custom OpenAI-compatible endpoints without requiring code changes.
     """
     def __init__(self, provider_name: str):
         self.provider_name = provider_name
@@ -26,28 +31,70 @@ class OpenAICompatibleProvider(ProviderInterface):
                 f"Environment variable {provider_name.upper()}_API_BASE is required for OpenAI-compatible provider"
             )
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
         Fetches the list of available models from the OpenAI-compatible API.
         """
         try:
             models_url = f"{self.api_base.rstrip('/')}/models"
             response = await client.get(
                 models_url, headers={"Authorization": f"Bearer {api_key}"}
             )
             response.raise_for_status()
-            return [
                 f"{self.provider_name}/{model['id']}"
                 for model in response.json().get("data", [])
             ]
-        except httpx.RequestError as e:
-            lib_logger.error(f"Failed to fetch models for {self.provider_name}: {e}")
-            return []
-        except Exception as e:
-            lib_logger.error(
-                f"Unexpected error fetching models for {self.provider_name}: {e}"
-            )
-            return []
     def has_custom_logic(self) -> bool:
         """

 import logging
 from typing import List, Dict, Any, Optional
 from .provider_interface import ProviderInterface
+from ..model_definitions import ModelDefinitions
 lib_logger = logging.getLogger("rotator_library")
 lib_logger.propagate = False
     Generic provider implementation for any OpenAI-compatible API.
     This provider can be configured via environment variables to support
     custom OpenAI-compatible endpoints without requiring code changes.
+    Supports both dynamic model discovery and static model definitions.
     """
+    skip_cost_calculation: bool = True  # Skip cost calculation for custom providers
     def __init__(self, provider_name: str):
         self.provider_name = provider_name
                 f"Environment variable {provider_name.upper()}_API_BASE is required for OpenAI-compatible provider"
             )
+        # Initialize model definitions loader
+        self.model_definitions = ModelDefinitions()
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
         Fetches the list of available models from the OpenAI-compatible API.
+        Combines dynamic discovery with static model definitions.
         """
+        models = []
+        # First, try to get static model definitions
+        static_models = self.model_definitions.get_all_provider_models(
+            self.provider_name
+        )
+        if static_models:
+            models.extend(static_models)
+            lib_logger.info(
+                f"Loaded {len(static_models)} static models for {self.provider_name}"
+            )
+        # Then, try dynamic discovery to get additional models
         try:
             models_url = f"{self.api_base.rstrip('/')}/models"
             response = await client.get(
                 models_url, headers={"Authorization": f"Bearer {api_key}"}
             )
             response.raise_for_status()
+            dynamic_models = [
                 f"{self.provider_name}/{model['id']}"
                 for model in response.json().get("data", [])
+                if model["id"] not in [m.split("/")[-1] for m in static_models]
             ]
+            if dynamic_models:
+                models.extend(dynamic_models)
+                lib_logger.debug(
+                    f"Discovered {len(dynamic_models)} additional models for {self.provider_name}"
+                )
+        except httpx.RequestError:
+            # Silently ignore dynamic discovery errors
+            pass
+        except Exception:
+            # Silently ignore dynamic discovery errors
+            pass
+        return models
+    def get_model_options(self, model_name: str) -> Dict[str, Any]:
+        """
+        Get options for a specific model from static definitions or environment variables.
+        Args:
+            model_name: Model name (without provider prefix)
+        Returns:
+            Dictionary of model options
+        """
+        # Extract model name without provider prefix if present
+        if "/" in model_name:
+            model_name = model_name.split("/")[-1]
+        return self.model_definitions.get_model_options(self.provider_name, model_name)
     def has_custom_logic(self) -> bool:
         """

src/rotator_library/usage_manager.py CHANGED Viewed

@@ -11,20 +11,26 @@ import litellm
 from .error_handler import ClassifiedError, NoAvailableKeysError
 from .providers import PROVIDER_PLUGINS
-lib_logger = logging.getLogger('rotator_library')
 lib_logger.propagate = False
 if not lib_logger.handlers:
     lib_logger.addHandler(logging.NullHandler())
 class UsageManager:
     """
     Manages usage statistics and cooldowns for API keys with asyncio-safe locking,
     asynchronous file I/O, and a lazy-loading mechanism for usage data.
     """
-    def __init__(self, file_path: str = "key_usage.json", daily_reset_time_utc: Optional[str] = "03:00"):
         self.file_path = file_path
         self.key_states: Dict[str, Dict[str, Any]] = {}
         self._data_lock = asyncio.Lock()
         self._usage_data: Optional[Dict] = None
         self._initialized = asyncio.Event()
@@ -34,8 +40,10 @@ class UsageManager:
         self._claimed_on_timeout: Set[str] = set()
         if daily_reset_time_utc:
-            hour, minute = map(int, daily_reset_time_utc.split(':'))
-            self.daily_reset_time_utc = dt_time(hour=hour, minute=minute, tzinfo=timezone.utc)
         else:
             self.daily_reset_time_utc = None
@@ -54,7 +62,7 @@ class UsageManager:
                 self._usage_data = {}
                 return
             try:
-                async with aiofiles.open(self.file_path, 'r') as f:
                     content = await f.read()
                     self._usage_data = json.loads(content)
             except (json.JSONDecodeError, IOError, FileNotFoundError):
@@ -65,7 +73,7 @@ class UsageManager:
         if self._usage_data is None:
             return
         async with self._data_lock:
-            async with aiofiles.open(self.file_path, 'w') as f:
                 await f.write(json.dumps(self._usage_data, indent=2))
     async def _reset_daily_stats_if_needed(self):
@@ -79,24 +87,31 @@ class UsageManager:
         for key, data in self._usage_data.items():
             last_reset_str = data.get("last_daily_reset", "")
             if last_reset_str != today_str:
                 last_reset_dt = None
                 if last_reset_str:
                     # Ensure the parsed datetime is timezone-aware (UTC)
-                    last_reset_dt = datetime.fromisoformat(last_reset_str).replace(tzinfo=timezone.utc)
                 # Determine the reset threshold for today
-                reset_threshold_today = datetime.combine(now_utc.date(), self.daily_reset_time_utc)
-                if last_reset_dt is None or last_reset_dt < reset_threshold_today <= now_utc:
                     lib_logger.info(f"Performing daily reset for key ...{key[-6:]}")
                     needs_saving = True
                     # Reset cooldowns
                     data["model_cooldowns"] = {}
                     data["key_cooldown_until"] = None
                     # Reset consecutive failures
                     if "failures" in data:
                         data["failures"] = {}
@@ -106,12 +121,28 @@ class UsageManager:
                     if daily_data:
                         global_data = data.setdefault("global", {"models": {}})
                         for model, stats in daily_data.get("models", {}).items():
-                            global_model_stats = global_data["models"].setdefault(model, {"success_count": 0, "prompt_tokens": 0, "completion_tokens": 0, "approx_cost": 0.0})
-                            global_model_stats["success_count"] += stats.get("success_count", 0)
-                            global_model_stats["prompt_tokens"] += stats.get("prompt_tokens", 0)
-                            global_model_stats["completion_tokens"] += stats.get("completion_tokens", 0)
-                            global_model_stats["approx_cost"] += stats.get("approx_cost", 0.0)
                     # Reset daily stats
                     data["daily"] = {"date": today_str, "models": {}}
                     data["last_daily_reset"] = today_str
@@ -126,10 +157,12 @@ class UsageManager:
                 self.key_states[key] = {
                     "lock": asyncio.Lock(),
                     "condition": asyncio.Condition(),
-                    "models_in_use": set()
                 }
-    async def acquire_key(self, available_keys: List[str], model: str, deadline: float) -> str:
         """
         Acquires the best available key using a tiered, model-aware locking strategy,
         respecting a global deadline.
@@ -142,18 +175,24 @@ class UsageManager:
         while time.time() < deadline:
             tier1_keys, tier2_keys = [], []
             now = time.time()
             # First, filter the list of available keys to exclude any on cooldown.
             async with self._data_lock:
                 for key in available_keys:
                     key_data = self._usage_data.get(key, {})
-                    if (key_data.get("key_cooldown_until") or 0) > now or \
-                       (key_data.get("model_cooldowns", {}).get(model) or 0) > now:
                         continue
                     # Prioritize keys based on their current usage to ensure load balancing.
-                    usage_count = key_data.get("daily", {}).get("models", {}).get(model, {}).get("success_count", 0)
                     key_state = self.key_states[key]
                     # Tier 1: Completely idle keys (preferred).
@@ -172,7 +211,9 @@ class UsageManager:
                 async with state["lock"]:
                     if not state["models_in_use"]:
                         state["models_in_use"].add(model)
-                        lib_logger.info(f"Acquired Tier 1 key ...{key[-6:]} for model {model}")
                         return key
             # If no Tier 1 keys are available, try Tier 2.
@@ -181,37 +222,46 @@ class UsageManager:
                 async with state["lock"]:
                     if model not in state["models_in_use"]:
                         state["models_in_use"].add(model)
-                        lib_logger.info(f"Acquired Tier 2 key ...{key[-6:]} for model {model}")
                         return key
             # If all eligible keys are locked, wait for a key to be released.
-            lib_logger.info("All eligible keys are currently locked for this model. Waiting...")
             all_potential_keys = tier1_keys + tier2_keys
             if not all_potential_keys:
-                lib_logger.warning("No keys are eligible (all on cooldown). Waiting before re-evaluating.")
                 await asyncio.sleep(1)
                 continue
             # Wait on the condition of the key with the lowest current usage.
             best_wait_key = min(all_potential_keys, key=lambda x: x[1])[0]
             wait_condition = self.key_states[best_wait_key]["condition"]
             try:
                 async with wait_condition:
                     remaining_budget = deadline - time.time()
                     if remaining_budget <= 0:
-                        break # Exit if the budget has already been exceeded.
                     # Wait for a notification, but no longer than the remaining budget or 1 second.
-                    await asyncio.wait_for(wait_condition.wait(), timeout=min(1, remaining_budget))
                 lib_logger.info("Notified that a key was released. Re-evaluating...")
             except asyncio.TimeoutError:
                 # This is not an error, just a timeout for the wait. The main loop will re-evaluate.
                 lib_logger.info("Wait timed out. Re-evaluating for any available key.")
-        # If the loop exits, it means the deadline was exceeded.
-        raise NoAvailableKeysError(f"Could not acquire a key for model {model} within the global time budget.")
     async def release_key(self, key: str, model: str):
         """Releases a key's lock for a specific model and notifies waiting tasks."""
@@ -224,13 +274,20 @@ class UsageManager:
                 state["models_in_use"].remove(model)
                 lib_logger.info(f"Released credential ...{key[-6:]} from model {model}")
             else:
-                lib_logger.warning(f"Attempted to release credential ...{key[-6:]} for model {model}, but it was not in use.")
         # Notify all tasks waiting on this key's condition
         async with state["condition"]:
             state["condition"].notify_all()
-    async def record_success(self, key: str, model: str, completion_response: Optional[litellm.ModelResponse] = None):
         """
         Records a successful API call, resetting failure counters.
         It safely handles cases where token usage data is not available.
@@ -238,33 +295,59 @@ class UsageManager:
         await self._lazy_init()
         async with self._data_lock:
             today_utc_str = datetime.now(timezone.utc).date().isoformat()
-            key_data = self._usage_data.setdefault(key, {"daily": {"date": today_utc_str, "models": {}}, "global": {"models": {}}, "model_cooldowns": {}, "failures": {}})
             # If the key is new, ensure its reset date is initialized to prevent an immediate reset.
             if "last_daily_reset" not in key_data:
                 key_data["last_daily_reset"] = today_utc_str
             # Always record a success and reset failures
             model_failures = key_data.setdefault("failures", {}).setdefault(model, {})
             model_failures["consecutive_failures"] = 0
             if model in key_data.get("model_cooldowns", {}):
                 del key_data["model_cooldowns"][model]
-            daily_model_data = key_data["daily"]["models"].setdefault(model, {"success_count": 0, "prompt_tokens": 0, "completion_tokens": 0, "approx_cost": 0.0})
             daily_model_data["success_count"] += 1
             # Safely attempt to record token and cost usage
-            if completion_response and hasattr(completion_response, 'usage') and completion_response.usage:
                 usage = completion_response.usage
                 daily_model_data["prompt_tokens"] += usage.prompt_tokens
-                daily_model_data["completion_tokens"] += getattr(usage, 'completion_tokens', 0) # Not present in embedding responses
-                lib_logger.info(f"Recorded usage from response object for key ...{key[-6:]}")
                 try:
-                    provider_name = model.split('/')[0]
                     provider_plugin = PROVIDER_PLUGINS.get(provider_name)
-                    if provider_plugin and provider_plugin.skip_cost_calculation:
-                        lib_logger.debug(f"Skipping cost calculation for provider '{provider_name}' as per its configuration.")
                     else:
                         # Differentiate cost calculation based on response type
                         if isinstance(completion_response, litellm.EmbeddingResponse):
@@ -272,56 +355,85 @@ class UsageManager:
                             model_info = litellm.get_model_info(model)
                             input_cost = model_info.get("input_cost_per_token")
                             if input_cost:
-                                cost = completion_response.usage.prompt_tokens * input_cost
                             else:
                                 cost = None
                         else:
-                            cost = litellm.completion_cost(completion_response=completion_response, model=model)
                         if cost is not None:
                             daily_model_data["approx_cost"] += cost
                 except Exception as e:
-                    lib_logger.warning(f"Could not calculate cost for model {model}: {e}")
-            elif isinstance(completion_response, asyncio.Future) or hasattr(completion_response, '__aiter__'):
                 # This is an unconsumed stream object. Do not log a warning, as usage will be recorded from the chunks.
                 pass
             else:
-                lib_logger.warning(f"No usage data found in completion response for model {model}. Recording success without token count.")
             key_data["last_used_ts"] = time.time()
         await self._save_usage()
-    async def record_failure(self, key: str, model: str, classified_error: ClassifiedError):
         """Records a failure and applies cooldowns based on an escalating backoff strategy."""
         await self._lazy_init()
         async with self._data_lock:
             today_utc_str = datetime.now(timezone.utc).date().isoformat()
-            key_data = self._usage_data.setdefault(key, {"daily": {"date": today_utc_str, "models": {}}, "global": {"models": {}}, "model_cooldowns": {}, "failures": {}})
             # Handle specific error types first
-            if classified_error.error_type == 'rate_limit' and classified_error.retry_after:
                 cooldown_seconds = classified_error.retry_after
-            elif classified_error.error_type == 'authentication':
                 # Apply a 5-minute key-level lockout for auth errors
                 key_data["key_cooldown_until"] = time.time() + 300
-                lib_logger.warning(f"Authentication error on key ...{key[-6:]}. Applying 5-minute key-level lockout.")
                 await self._save_usage()
-                return # No further backoff logic needed
             else:
                 # General backoff logic for other errors
                 failures_data = key_data.setdefault("failures", {})
-                model_failures = failures_data.setdefault(model, {"consecutive_failures": 0})
                 model_failures["consecutive_failures"] += 1
                 count = model_failures["consecutive_failures"]
                 backoff_tiers = {1: 10, 2: 30, 3: 60, 4: 120}
-                cooldown_seconds = backoff_tiers.get(count, 7200) # Default to 2 hours
             # Apply the cooldown
             model_cooldowns = key_data.setdefault("model_cooldowns", {})
             model_cooldowns[model] = time.time() + cooldown_seconds
-            lib_logger.warning(f"Failure recorded for key ...{key[-6:]} with model {model}. Applying {cooldown_seconds}s cooldown.")
             # Check for key-level lockout condition
             await self._check_key_lockout(key, key_data)
@@ -329,20 +441,22 @@ class UsageManager:
             key_data["last_failure"] = {
                 "timestamp": time.time(),
                 "model": model,
-                "error": str(classified_error.original_exception)
             }
         await self._save_usage()
     async def _check_key_lockout(self, key: str, key_data: Dict):
         """Checks if a key should be locked out due to multiple model failures."""
         long_term_lockout_models = 0
         now = time.time()
         for model, cooldown_end in key_data.get("model_cooldowns", {}).items():
-            if cooldown_end - now >= 7200: # Check for 2-hour lockouts
                 long_term_lockout_models += 1
         if long_term_lockout_models >= 3:
-            key_data["key_cooldown_until"] = now + 300 # 5-minute key lockout
-            lib_logger.error(f"Key ...{key[-6:]} has {long_term_lockout_models} models in long-term lockout. Applying 5-minute key-level lockout.")

 from .error_handler import ClassifiedError, NoAvailableKeysError
 from .providers import PROVIDER_PLUGINS
+lib_logger = logging.getLogger("rotator_library")
 lib_logger.propagate = False
 if not lib_logger.handlers:
     lib_logger.addHandler(logging.NullHandler())
 class UsageManager:
     """
     Manages usage statistics and cooldowns for API keys with asyncio-safe locking,
     asynchronous file I/O, and a lazy-loading mechanism for usage data.
     """
+    def __init__(
+        self,
+        file_path: str = "key_usage.json",
+        daily_reset_time_utc: Optional[str] = "03:00",
+    ):
         self.file_path = file_path
         self.key_states: Dict[str, Dict[str, Any]] = {}
         self._data_lock = asyncio.Lock()
         self._usage_data: Optional[Dict] = None
         self._initialized = asyncio.Event()
         self._claimed_on_timeout: Set[str] = set()
         if daily_reset_time_utc:
+            hour, minute = map(int, daily_reset_time_utc.split(":"))
+            self.daily_reset_time_utc = dt_time(
+                hour=hour, minute=minute, tzinfo=timezone.utc
+            )
         else:
             self.daily_reset_time_utc = None
                 self._usage_data = {}
                 return
             try:
+                async with aiofiles.open(self.file_path, "r") as f:
                     content = await f.read()
                     self._usage_data = json.loads(content)
             except (json.JSONDecodeError, IOError, FileNotFoundError):
         if self._usage_data is None:
             return
         async with self._data_lock:
+            async with aiofiles.open(self.file_path, "w") as f:
                 await f.write(json.dumps(self._usage_data, indent=2))
     async def _reset_daily_stats_if_needed(self):
         for key, data in self._usage_data.items():
             last_reset_str = data.get("last_daily_reset", "")
             if last_reset_str != today_str:
                 last_reset_dt = None
                 if last_reset_str:
                     # Ensure the parsed datetime is timezone-aware (UTC)
+                    last_reset_dt = datetime.fromisoformat(last_reset_str).replace(
+                        tzinfo=timezone.utc
+                    )
                 # Determine the reset threshold for today
+                reset_threshold_today = datetime.combine(
+                    now_utc.date(), self.daily_reset_time_utc
+                )
+                if (
+                    last_reset_dt is None
+                    or last_reset_dt < reset_threshold_today <= now_utc
+                ):
                     lib_logger.info(f"Performing daily reset for key ...{key[-6:]}")
                     needs_saving = True
                     # Reset cooldowns
                     data["model_cooldowns"] = {}
                     data["key_cooldown_until"] = None
                     # Reset consecutive failures
                     if "failures" in data:
                         data["failures"] = {}
                     if daily_data:
                         global_data = data.setdefault("global", {"models": {}})
                         for model, stats in daily_data.get("models", {}).items():
+                            global_model_stats = global_data["models"].setdefault(
+                                model,
+                                {
+                                    "success_count": 0,
+                                    "prompt_tokens": 0,
+                                    "completion_tokens": 0,
+                                    "approx_cost": 0.0,
+                                },
+                            )
+                            global_model_stats["success_count"] += stats.get(
+                                "success_count", 0
+                            )
+                            global_model_stats["prompt_tokens"] += stats.get(
+                                "prompt_tokens", 0
+                            )
+                            global_model_stats["completion_tokens"] += stats.get(
+                                "completion_tokens", 0
+                            )
+                            global_model_stats["approx_cost"] += stats.get(
+                                "approx_cost", 0.0
+                            )
                     # Reset daily stats
                     data["daily"] = {"date": today_str, "models": {}}
                     data["last_daily_reset"] = today_str
                 self.key_states[key] = {
                     "lock": asyncio.Lock(),
                     "condition": asyncio.Condition(),
+                    "models_in_use": set(),
                 }
+    async def acquire_key(
+        self, available_keys: List[str], model: str, deadline: float
+    ) -> str:
         """
         Acquires the best available key using a tiered, model-aware locking strategy,
         respecting a global deadline.
         while time.time() < deadline:
             tier1_keys, tier2_keys = [], []
             now = time.time()
             # First, filter the list of available keys to exclude any on cooldown.
             async with self._data_lock:
                 for key in available_keys:
                     key_data = self._usage_data.get(key, {})
+                    if (key_data.get("key_cooldown_until") or 0) > now or (
+                        key_data.get("model_cooldowns", {}).get(model) or 0
+                    ) > now:
                         continue
                     # Prioritize keys based on their current usage to ensure load balancing.
+                    usage_count = (
+                        key_data.get("daily", {})
+                        .get("models", {})
+                        .get(model, {})
+                        .get("success_count", 0)
+                    )
                     key_state = self.key_states[key]
                     # Tier 1: Completely idle keys (preferred).
                 async with state["lock"]:
                     if not state["models_in_use"]:
                         state["models_in_use"].add(model)
+                        lib_logger.info(
+                            f"Acquired Tier 1 key ...{key[-6:]} for model {model}"
+                        )
                         return key
             # If no Tier 1 keys are available, try Tier 2.
                 async with state["lock"]:
                     if model not in state["models_in_use"]:
                         state["models_in_use"].add(model)
+                        lib_logger.info(
+                            f"Acquired Tier 2 key ...{key[-6:]} for model {model}"
+                        )
                         return key
             # If all eligible keys are locked, wait for a key to be released.
+            lib_logger.info(
+                "All eligible keys are currently locked for this model. Waiting..."
+            )
             all_potential_keys = tier1_keys + tier2_keys
             if not all_potential_keys:
+                lib_logger.warning(
+                    "No keys are eligible (all on cooldown). Waiting before re-evaluating."
+                )
                 await asyncio.sleep(1)
                 continue
             # Wait on the condition of the key with the lowest current usage.
             best_wait_key = min(all_potential_keys, key=lambda x: x[1])[0]
             wait_condition = self.key_states[best_wait_key]["condition"]
             try:
                 async with wait_condition:
                     remaining_budget = deadline - time.time()
                     if remaining_budget <= 0:
+                        break  # Exit if the budget has already been exceeded.
                     # Wait for a notification, but no longer than the remaining budget or 1 second.
+                    await asyncio.wait_for(
+                        wait_condition.wait(), timeout=min(1, remaining_budget)
+                    )
                 lib_logger.info("Notified that a key was released. Re-evaluating...")
             except asyncio.TimeoutError:
                 # This is not an error, just a timeout for the wait. The main loop will re-evaluate.
                 lib_logger.info("Wait timed out. Re-evaluating for any available key.")
+        # If the loop exits, it means the deadline was exceeded.
+        raise NoAvailableKeysError(
+            f"Could not acquire a key for model {model} within the global time budget."
+        )
     async def release_key(self, key: str, model: str):
         """Releases a key's lock for a specific model and notifies waiting tasks."""
                 state["models_in_use"].remove(model)
                 lib_logger.info(f"Released credential ...{key[-6:]} from model {model}")
             else:
+                lib_logger.warning(
+                    f"Attempted to release credential ...{key[-6:]} for model {model}, but it was not in use."
+                )
         # Notify all tasks waiting on this key's condition
         async with state["condition"]:
             state["condition"].notify_all()
+    async def record_success(
+        self,
+        key: str,
+        model: str,
+        completion_response: Optional[litellm.ModelResponse] = None,
+    ):
         """
         Records a successful API call, resetting failure counters.
         It safely handles cases where token usage data is not available.
         await self._lazy_init()
         async with self._data_lock:
             today_utc_str = datetime.now(timezone.utc).date().isoformat()
+            key_data = self._usage_data.setdefault(
+                key,
+                {
+                    "daily": {"date": today_utc_str, "models": {}},
+                    "global": {"models": {}},
+                    "model_cooldowns": {},
+                    "failures": {},
+                },
+            )
             # If the key is new, ensure its reset date is initialized to prevent an immediate reset.
             if "last_daily_reset" not in key_data:
                 key_data["last_daily_reset"] = today_utc_str
             # Always record a success and reset failures
             model_failures = key_data.setdefault("failures", {}).setdefault(model, {})
             model_failures["consecutive_failures"] = 0
             if model in key_data.get("model_cooldowns", {}):
                 del key_data["model_cooldowns"][model]
+            daily_model_data = key_data["daily"]["models"].setdefault(
+                model,
+                {
+                    "success_count": 0,
+                    "prompt_tokens": 0,
+                    "completion_tokens": 0,
+                    "approx_cost": 0.0,
+                },
+            )
             daily_model_data["success_count"] += 1
             # Safely attempt to record token and cost usage
+            if (
+                completion_response
+                and hasattr(completion_response, "usage")
+                and completion_response.usage
+            ):
                 usage = completion_response.usage
                 daily_model_data["prompt_tokens"] += usage.prompt_tokens
+                daily_model_data["completion_tokens"] += getattr(
+                    usage, "completion_tokens", 0
+                )  # Not present in embedding responses
+                lib_logger.info(
+                    f"Recorded usage from response object for key ...{key[-6:]}"
+                )
                 try:
+                    provider_name = model.split("/")[0]
                     provider_plugin = PROVIDER_PLUGINS.get(provider_name)
+                    if provider_plugin and provider_plugin.skip_cost_calculation():
+                        lib_logger.debug(
+                            f"Skipping cost calculation for provider '{provider_name}' (custom provider)."
+                        )
                     else:
                         # Differentiate cost calculation based on response type
                         if isinstance(completion_response, litellm.EmbeddingResponse):
                             model_info = litellm.get_model_info(model)
                             input_cost = model_info.get("input_cost_per_token")
                             if input_cost:
+                                cost = (
+                                    completion_response.usage.prompt_tokens * input_cost
+                                )
                             else:
                                 cost = None
                         else:
+                            cost = litellm.completion_cost(
+                                completion_response=completion_response, model=model
+                            )
                         if cost is not None:
                             daily_model_data["approx_cost"] += cost
                 except Exception as e:
+                    lib_logger.warning(
+                        f"Could not calculate cost for model {model}: {e}"
+                    )
+            elif isinstance(completion_response, asyncio.Future) or hasattr(
+                completion_response, "__aiter__"
+            ):
                 # This is an unconsumed stream object. Do not log a warning, as usage will be recorded from the chunks.
                 pass
             else:
+                lib_logger.warning(
+                    f"No usage data found in completion response for model {model}. Recording success without token count."
+                )
             key_data["last_used_ts"] = time.time()
         await self._save_usage()
+    async def record_failure(
+        self, key: str, model: str, classified_error: ClassifiedError
+    ):
         """Records a failure and applies cooldowns based on an escalating backoff strategy."""
         await self._lazy_init()
         async with self._data_lock:
             today_utc_str = datetime.now(timezone.utc).date().isoformat()
+            key_data = self._usage_data.setdefault(
+                key,
+                {
+                    "daily": {"date": today_utc_str, "models": {}},
+                    "global": {"models": {}},
+                    "model_cooldowns": {},
+                    "failures": {},
+                },
+            )
             # Handle specific error types first
+            if (
+                classified_error.error_type == "rate_limit"
+                and classified_error.retry_after
+            ):
                 cooldown_seconds = classified_error.retry_after
+            elif classified_error.error_type == "authentication":
                 # Apply a 5-minute key-level lockout for auth errors
                 key_data["key_cooldown_until"] = time.time() + 300
+                lib_logger.warning(
+                    f"Authentication error on key ...{key[-6:]}. Applying 5-minute key-level lockout."
+                )
                 await self._save_usage()
+                return  # No further backoff logic needed
             else:
                 # General backoff logic for other errors
                 failures_data = key_data.setdefault("failures", {})
+                model_failures = failures_data.setdefault(
+                    model, {"consecutive_failures": 0}
+                )
                 model_failures["consecutive_failures"] += 1
                 count = model_failures["consecutive_failures"]
                 backoff_tiers = {1: 10, 2: 30, 3: 60, 4: 120}
+                cooldown_seconds = backoff_tiers.get(count, 7200)  # Default to 2 hours
             # Apply the cooldown
             model_cooldowns = key_data.setdefault("model_cooldowns", {})
             model_cooldowns[model] = time.time() + cooldown_seconds
+            lib_logger.warning(
+                f"Failure recorded for key ...{key[-6:]} with model {model}. Applying {cooldown_seconds}s cooldown."
+            )
             # Check for key-level lockout condition
             await self._check_key_lockout(key, key_data)
             key_data["last_failure"] = {
                 "timestamp": time.time(),
                 "model": model,
+                "error": str(classified_error.original_exception),
             }
         await self._save_usage()
     async def _check_key_lockout(self, key: str, key_data: Dict):
         """Checks if a key should be locked out due to multiple model failures."""
         long_term_lockout_models = 0
         now = time.time()
         for model, cooldown_end in key_data.get("model_cooldowns", {}).items():
+            if cooldown_end - now >= 7200:  # Check for 2-hour lockouts
                 long_term_lockout_models += 1
         if long_term_lockout_models >= 3:
+            key_data["key_cooldown_until"] = now + 300  # 5-minute key lockout
+            lib_logger.error(
+                f"Key ...{key[-6:]} has {long_term_lockout_models} models in long-term lockout. Applying 5-minute key-level lockout."
+            )