Spaces:

elmerzole
/

llm-api-proxy

Paused

App Files Files Community

Mirrowel commited on Jul 2, 2025

Commit

4bbfff4

1 Parent(s): aa8035e

feat: Enhance asynchronous handling in RotatingClient and UsageManager for improved error management and usage tracking

Browse files

Files changed (4) hide show

requirements.txt +4 -2
src/rotator_library/client.py +23 -11
src/rotator_library/error_handler.py +3 -7
src/rotator_library/usage_manager.py +144 -132

requirements.txt CHANGED Viewed

@@ -1,9 +1,7 @@
 # FastAPI framework for building the proxy server
 fastapi
 # ASGI server for running the FastAPI application
 uvicorn
 # For loading environment variables from a .env file
 python-dotenv
@@ -12,3 +10,7 @@ python-dotenv
 # A library for calling LLM APIs with a consistent format
 litellm

 # FastAPI framework for building the proxy server
 fastapi
 # ASGI server for running the FastAPI application
 uvicorn
 # For loading environment variables from a .env file
 python-dotenv
 # A library for calling LLM APIs with a consistent format
 litellm
+filelock
+httpx
+aiofiles

src/rotator_library/client.py CHANGED Viewed

@@ -58,7 +58,7 @@ class RotatingClient:
                 # Safely check for usage data in the chunk
                 if hasattr(chunk, 'usage') and chunk.usage:
                     lib_logger.info(f"Usage found in chunk for key ...{key[-4:]}: {chunk.usage}")
-                    self.usage_manager.record_success(key, model, chunk)
         finally:
             # Signal the end of the stream
@@ -110,23 +110,35 @@ class RotatingClient:
                         if is_streaming:
                             return self._streaming_wrapper(response, current_key, model)
                         else:
-                            self.usage_manager.record_success(current_key, model, response)
                             return response
                     except Exception as e:
                         log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_data=kwargs)
-                        if is_server_error(e) or (is_rate_limit_error(e) and attempt < self.max_retries - 1):
-                            lib_logger.warning(f"Key ...{current_key[-4:]} failed with retriable error. Retrying...")
-                            await asyncio.sleep(1 * (attempt + 1))
-                            continue
                         if is_unrecoverable_error(e):
                             raise e
-                        lib_logger.error(f"Key ...{current_key[-4:]} failed permanently. Rotating...")
-                        self.usage_manager.record_rotation_error(current_key, model, e)
-                        break # Break from retry loop to acquire a new key
                 # If we exit the retry loop due to failure, release the key and try to get a new one.
                 await self.usage_manager.release_key(current_key)

                 # Safely check for usage data in the chunk
                 if hasattr(chunk, 'usage') and chunk.usage:
                     lib_logger.info(f"Usage found in chunk for key ...{key[-4:]}: {chunk.usage}")
+                    await self.usage_manager.record_success(key, model, chunk)
         finally:
             # Signal the end of the stream
                         if is_streaming:
                             return self._streaming_wrapper(response, current_key, model)
                         else:
+                            await self.usage_manager.record_success(current_key, model, response)
                             return response
                     except Exception as e:
                         log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_data=kwargs)
                         if is_unrecoverable_error(e):
+                            lib_logger.error(f"Key ...{current_key[-4:]} failed with unrecoverable error: {e}. Raising exception.")
                             raise e
+                        if is_rate_limit_error(e):
+                            lib_logger.warning(f"Key ...{current_key[-4:]} hit a rate limit for model {model}. Rotating key and setting cooldown.")
+                            await self.usage_manager.record_rotation_error(current_key, model, e)
+                            break  # Break from retries to get a new key
+                        if is_server_error(e):
+                            if attempt < self.max_retries - 1:
+                                lib_logger.warning(f"Key ...{current_key[-4:]} encountered a server error. Retrying (attempt {attempt + 2}/{self.max_retries})...")
+                                await asyncio.sleep(1.5 * (attempt + 1))
+                                continue
+                            else:
+                                lib_logger.error(f"Key ...{current_key[-4:]} failed after max retries on a server error. Rotating key.")
+                                await self.usage_manager.record_rotation_error(current_key, model, e)
+                                break
+                        # Fallback for any other unexpected errors
+                        lib_logger.error(f"Key ...{current_key[-4:]} failed with an unexpected error: {e}. Rotating key.")
+                        await self.usage_manager.record_rotation_error(current_key, model, e)
+                        break
                 # If we exit the retry loop due to failure, release the key and try to get a new one.
                 await self.usage_manager.release_key(current_key)

src/rotator_library/error_handler.py CHANGED Viewed

@@ -1,9 +1,5 @@
 from litellm.exceptions import APIConnectionError, RateLimitError, ServiceUnavailableError, AuthenticationError, InvalidRequestError
-def is_authentication_error(e: Exception) -> bool:
-    """Checks if the exception is related to authentication."""
-    return isinstance(e, AuthenticationError)
 def is_rate_limit_error(e: Exception) -> bool:
     """Checks if the exception is a rate limit error."""
     return isinstance(e, RateLimitError)
@@ -14,7 +10,7 @@ def is_server_error(e: Exception) -> bool:
 def is_unrecoverable_error(e: Exception) -> bool:
     """
-    Checks if the exception is a non-retriable client-side error
-    (that is not an authentication error).
     """
-    return isinstance(e, InvalidRequestError)

 from litellm.exceptions import APIConnectionError, RateLimitError, ServiceUnavailableError, AuthenticationError, InvalidRequestError
 def is_rate_limit_error(e: Exception) -> bool:
     """Checks if the exception is a rate limit error."""
     return isinstance(e, RateLimitError)
 def is_unrecoverable_error(e: Exception) -> bool:
     """
+    Checks if the exception is a non-retriable client-side error.
+    These are errors that will not resolve on their own.
     """
+    return isinstance(e, (InvalidRequestError, AuthenticationError))

src/rotator_library/usage_manager.py CHANGED Viewed

@@ -3,20 +3,22 @@ import os
 import time
 import logging
 import asyncio
-from datetime import date, datetime
-from typing import Dict, List, Optional, Any
 from filelock import FileLock
 import litellm
 import re
 lib_logger = logging.getLogger('rotator_library')
-lib_logger.propagate = False # Ensure this logger doesn't propagate to root
 if not lib_logger.handlers:
     lib_logger.addHandler(logging.NullHandler())
 class UsageManager:
     """
-    Manages usage statistics and cooldowns for API keys with asyncio-safe locking.
     """
     def __init__(self, file_path: str = "key_usage.json", wait_timeout: int = 5):
         self.file_path = file_path
@@ -24,34 +26,62 @@ class UsageManager:
         self.key_locks: Dict[str, asyncio.Lock] = {}
         self.condition = asyncio.Condition()
         self.wait_timeout = wait_timeout
-        self.usage_data = self._load_usage()
-        self._reset_daily_stats_if_needed()
-    def _load_usage(self) -> Dict:
-        with self.file_lock:
             if not os.path.exists(self.file_path):
-                return {}
             try:
-                with open(self.file_path, 'r') as f:
-                    return json.load(f)
-            except (json.JSONDecodeError, IOError):
-                return {}
-    def _save_usage(self):
-        with self.file_lock:
-            with open(self.file_path, 'w') as f:
-                json.dump(self.usage_data, f, indent=2)
-    def _reset_daily_stats_if_needed(self):
-        """Checks if daily stats need to be reset for any key."""
         today_str = date.today().isoformat()
         needs_saving = False
-        for key, data in self.usage_data.items():
             daily_data = data.get("daily", {})
-            last_date_str = daily_data.get("date")
-            if last_date_str != today_str:
                 needs_saving = True
-                # Add yesterday's daily stats to global stats
                 global_data = data.setdefault("global", {"models": {}})
                 for model, stats in daily_data.get("models", {}).items():
                     global_model_stats = global_data["models"].setdefault(model, {"success_count": 0, "prompt_tokens": 0, "completion_tokens": 0, "approx_cost": 0.0})
@@ -59,12 +89,10 @@ class UsageManager:
                     global_model_stats["prompt_tokens"] += stats.get("prompt_tokens", 0)
                     global_model_stats["completion_tokens"] += stats.get("completion_tokens", 0)
                     global_model_stats["approx_cost"] += stats.get("approx_cost", 0.0)
-                # Reset daily stats
                 data["daily"] = {"date": today_str, "models": {}}
         if needs_saving:
-            self._save_usage()
     def _initialize_locks(self, keys: List[str]):
         """Initializes asyncio locks for all provided keys if not already present."""
@@ -74,31 +102,29 @@ class UsageManager:
     async def acquire_key(self, available_keys: List[str], model: str) -> str:
         """
-        Acquires the best available key. If all are locked, waits for one to be
-        released or times out and returns the best-ranked key anyway.
         """
         self._initialize_locks(available_keys)
         async with self.condition:
             while True:
-                # Rank all keys that are not on cooldown
                 eligible_keys = []
-                for key in available_keys:
-                    key_data = self.usage_data.get(key, {})
-                    cooldown_until = key_data.get("model_cooldowns", {}).get(model)
-                    if not cooldown_until or time.time() > cooldown_until:
-                        usage_count = key_data.get("daily", {}).get("models", {}).get(model, {}).get("success_count", 0)
-                        eligible_keys.append((key, usage_count))
                 if not eligible_keys:
                     lib_logger.warning("All keys are on cooldown. Waiting...")
                     await asyncio.sleep(5)
                     continue
-                # Sort by usage count (ascending)
                 eligible_keys.sort(key=lambda x: x[1])
-                # Try to acquire the lock for the first unlocked key in the ranked list
                 for key, _ in eligible_keys:
                     lock = self.key_locks[key]
                     if not lock.locked():
@@ -106,112 +132,98 @@ class UsageManager:
                         lib_logger.info(f"Acquired lock for available key: ...{key[-4:]}")
                         return key
-                # If all eligible keys are locked, wait for a notification or timeout
-                best_locked_key = eligible_keys[0][0]
-                lib_logger.info(f"All eligible keys are locked. Waiting for a key to be released. Best candidate: ...{best_locked_key[-4:]}")
                 try:
                     await asyncio.wait_for(self.condition.wait(), timeout=self.wait_timeout)
-                    # If wait() returns, it means we were notified, so we re-run the loop
                     lib_logger.info("Notified that a key was released. Re-evaluating...")
                     continue
                 except asyncio.TimeoutError:
-                    # If we time out, we take the best-ranked key, even if it's locked
-                    lib_logger.warning(f"Wait timed out. Proceeding with best-ranked locked key: ...{best_locked_key[-4:]}")
-                    return best_locked_key
     async def release_key(self, key: str):
         """Releases the lock for a given key and notifies waiting tasks."""
         async with self.condition:
             if key in self.key_locks and self.key_locks[key].locked():
                 self.key_locks[key].release()
                 lib_logger.info(f"Released lock for key ...{key[-4:]}")
-                self.condition.notify() # Notify one waiting task
-    def record_success(self, key: str, model: str, completion_response: litellm.ModelResponse):
-        key_data = self.usage_data.setdefault(key, {"daily": {"date": date.today().isoformat(), "models": {}}, "global": {"models": {}}, "model_cooldowns": {}})
-        # Clear any cooldown for this specific model on success
-        if model in key_data.get("model_cooldowns", {}):
-            del key_data["model_cooldowns"][model]
-        # Ensure daily stats are for today
-        if key_data["daily"].get("date") != date.today().isoformat():
-            self._reset_daily_stats_if_needed()
-            key_data = self.usage_data[key]
-        daily_model_data = key_data["daily"]["models"].setdefault(model, {"success_count": 0, "prompt_tokens": 0, "completion_tokens": 0, "approx_cost": 0.0})
-        usage = completion_response.usage
-        daily_model_data["success_count"] += 1
-        daily_model_data["prompt_tokens"] += usage.prompt_tokens
-        daily_model_data["completion_tokens"] += usage.completion_tokens
-        try:
-            cost = litellm.completion_cost(completion_response=completion_response)
-            daily_model_data["approx_cost"] += cost
-        except Exception as e:
-            lib_logger.warning(f"Could not calculate cost for model {model}: {e}")
-        key_data["last_used_ts"] = time.time()
-        self._save_usage()
-    def record_rotation_error(self, key: str, model: str, error: Exception):
-        key_data = self.usage_data.setdefault(key, {"daily": {"date": date.today().isoformat(), "models": {}}, "global": {"models": {}}, "model_cooldowns": {}})
-        cooldown_seconds = 86400  # Default cooldown of 24 hours
-        error_str = str(error).lower()
-        if "retry_delay" in error_str:
-            try:
-                # Try multiple patterns to extract delay from error message
-                delay_str = None
-                # Pattern 1: retry_delay...seconds format
-                if "retry_delay" in error_str and "seconds:" in error_str:
                     try:
-                        delay_str = error_str.split("retry_delay")[1].split("seconds:")[1].strip().split("}")[0]
-                    except (IndexError, AttributeError):
-                        pass
-                # Pattern 2: retryDelay with 's' suffix (Gemini format)
-                if not delay_str and "retrydelay" in error_str:
-                    try:
-                        match = re.search(r'"retrydelay":\s*"(\d+)s"', error_str)
-                        if match:
-                            delay_str = match.group(1)
-                    except Exception:
-                        pass
-                # Pattern 3: Generic numeric extraction for retry/delay contexts
-                if not delay_str:
-                    try:
-                        # Look for numbers followed by 's' or 'seconds' in retry/delay context
-                        patterns = [
-                            r'retry.*?(\d+)s',
-                            r'delay.*?(\d+)s',
-                            r'wait.*?(\d+)\s*seconds?'
-                        ]
-                        for pattern in patterns:
-                            match = re.search(pattern, error_str, re.IGNORECASE)
-                            if match:
-                                delay_str = match.group(1)
-                                break
-                    except Exception:
-                        pass
-                if delay_str:
-                    cooldown_seconds = int(delay_str)
-                cooldown_seconds = int(delay_str)
-            except (IndexError, ValueError):
-                pass
-        model_cooldowns = key_data.setdefault("model_cooldowns", {})
-        model_cooldowns[model] = time.time() + cooldown_seconds
-        key_data["last_rotation_error"] = {
-            "timestamp": time.time(),
-            "model": model,
-            "error": str(error)
-        }
-        self._save_usage()

 import time
 import logging
 import asyncio
+from datetime import date
+from typing import Dict, List, Optional, Set
 from filelock import FileLock
+import aiofiles
 import litellm
 import re
 lib_logger = logging.getLogger('rotator_library')
+lib_logger.propagate = False
 if not lib_logger.handlers:
     lib_logger.addHandler(logging.NullHandler())
 class UsageManager:
     """
+    Manages usage statistics and cooldowns for API keys with asyncio-safe locking,
+    asynchronous file I/O, and a lazy-loading mechanism for usage data.
     """
     def __init__(self, file_path: str = "key_usage.json", wait_timeout: int = 5):
         self.file_path = file_path
         self.key_locks: Dict[str, asyncio.Lock] = {}
         self.condition = asyncio.Condition()
         self.wait_timeout = wait_timeout
+        # Data-related locks and state
+        self._data_lock = asyncio.Lock()
+        self._usage_data: Optional[Dict] = None
+        self._initialized = asyncio.Event()
+        self._init_lock = asyncio.Lock()
+        # For "fair timeout" logic
+        self._timeout_lock = asyncio.Lock()
+        self._claimed_on_timeout: Set[str] = set()
+    async def _lazy_init(self):
+        """
+        Initializes the usage data by loading it from the file asynchronously.
+        This method is called on the first access to ensure data is loaded
+        before any operations are performed.
+        """
+        async with self._init_lock:
+            if not self._initialized.is_set():
+                await self._load_usage()
+                await self._reset_daily_stats_if_needed()
+                self._initialized.set()
+    async def _load_usage(self):
+        """Loads usage data from the JSON file asynchronously."""
+        async with self._data_lock:
             if not os.path.exists(self.file_path):
+                self._usage_data = {}
+                return
             try:
+                async with aiofiles.open(self.file_path, 'r') as f:
+                    content = await f.read()
+                    self._usage_data = json.loads(content)
+            except (json.JSONDecodeError, IOError, FileNotFoundError):
+                self._usage_data = {}
+    async def _save_usage(self):
+        """Saves the current usage data to the JSON file asynchronously."""
+        if self._usage_data is None:
+            return
+        async with self._data_lock:
+            with self.file_lock: # Use filelock to prevent multi-process race conditions
+                async with aiofiles.open(self.file_path, 'w') as f:
+                    await f.write(json.dumps(self._usage_data, indent=2))
+    async def _reset_daily_stats_if_needed(self):
+        """Checks if daily stats need to be reset for any key (async version)."""
+        if self._usage_data is None:
+            return
         today_str = date.today().isoformat()
         needs_saving = False
+        for key, data in self._usage_data.items():
             daily_data = data.get("daily", {})
+            if daily_data.get("date") != today_str:
                 needs_saving = True
                 global_data = data.setdefault("global", {"models": {}})
                 for model, stats in daily_data.get("models", {}).items():
                     global_model_stats = global_data["models"].setdefault(model, {"success_count": 0, "prompt_tokens": 0, "completion_tokens": 0, "approx_cost": 0.0})
                     global_model_stats["prompt_tokens"] += stats.get("prompt_tokens", 0)
                     global_model_stats["completion_tokens"] += stats.get("completion_tokens", 0)
                     global_model_stats["approx_cost"] += stats.get("approx_cost", 0.0)
                 data["daily"] = {"date": today_str, "models": {}}
         if needs_saving:
+            await self._save_usage()
     def _initialize_locks(self, keys: List[str]):
         """Initializes asyncio locks for all provided keys if not already present."""
     async def acquire_key(self, available_keys: List[str], model: str) -> str:
         """
+        Acquires the best available key with robust locking and a fair timeout mechanism.
         """
+        await self._lazy_init()
         self._initialize_locks(available_keys)
         async with self.condition:
             while True:
                 eligible_keys = []
+                async with self._data_lock:
+                    for key in available_keys:
+                        key_data = self._usage_data.get(key, {})
+                        cooldown_until = key_data.get("model_cooldowns", {}).get(model)
+                        if not cooldown_until or time.time() > cooldown_until:
+                            usage_count = key_data.get("daily", {}).get("models", {}).get(model, {}).get("success_count", 0)
+                            eligible_keys.append((key, usage_count))
                 if not eligible_keys:
                     lib_logger.warning("All keys are on cooldown. Waiting...")
                     await asyncio.sleep(5)
                     continue
                 eligible_keys.sort(key=lambda x: x[1])
                 for key, _ in eligible_keys:
                     lock = self.key_locks[key]
                     if not lock.locked():
                         lib_logger.info(f"Acquired lock for available key: ...{key[-4:]}")
                         return key
+                lib_logger.info("All eligible keys are locked. Waiting for a key to be released.")
                 try:
                     await asyncio.wait_for(self.condition.wait(), timeout=self.wait_timeout)
                     lib_logger.info("Notified that a key was released. Re-evaluating...")
                     continue
                 except asyncio.TimeoutError:
+                    lib_logger.warning("Wait timed out. Attempting to acquire a key via fair timeout logic.")
+                    async with self._timeout_lock:
+                        for key, _ in eligible_keys:
+                            if key not in self._claimed_on_timeout:
+                                self._claimed_on_timeout.add(key)
+                                lib_logger.info(f"Acquired key ...{key[-4:]} via timeout claim.")
+                                return key
+                    lib_logger.error("Timeout occurred, but all eligible keys were already claimed by other timed-out tasks.")
+                    # Fallback to waiting again if all keys were claimed
+                    await asyncio.sleep(1)
     async def release_key(self, key: str):
         """Releases the lock for a given key and notifies waiting tasks."""
         async with self.condition:
+            # Also release from timeout claim set if it's there
+            async with self._timeout_lock:
+                if key in self._claimed_on_timeout:
+                    self._claimed_on_timeout.remove(key)
             if key in self.key_locks and self.key_locks[key].locked():
                 self.key_locks[key].release()
                 lib_logger.info(f"Released lock for key ...{key[-4:]}")
+                self.condition.notify()
+    async def record_success(self, key: str, model: str, completion_response: litellm.ModelResponse):
+        """Records a successful API call asynchronously."""
+        await self._lazy_init()
+        async with self._data_lock:
+            key_data = self._usage_data.setdefault(key, {"daily": {"date": date.today().isoformat(), "models": {}}, "global": {"models": {}}, "model_cooldowns": {}})
+            if model in key_data.get("model_cooldowns", {}):
+                del key_data["model_cooldowns"][model]
+            if key_data["daily"].get("date") != date.today().isoformat():
+                # This is a simplified reset for the current key. A full reset is done in _lazy_init.
+                key_data["daily"] = {"date": date.today().isoformat(), "models": {}}
+            daily_model_data = key_data["daily"]["models"].setdefault(model, {"success_count": 0, "prompt_tokens": 0, "completion_tokens": 0, "approx_cost": 0.0})
+            usage = completion_response.usage
+            daily_model_data["success_count"] += 1
+            daily_model_data["prompt_tokens"] += usage.prompt_tokens
+            daily_model_data["completion_tokens"] += usage.completion_tokens
+            try:
+                cost = litellm.completion_cost(completion_response=completion_response)
+                daily_model_data["approx_cost"] += cost
+            except Exception as e:
+                lib_logger.warning(f"Could not calculate cost for model {model}: {e}")
+            key_data["last_used_ts"] = time.time()
+        await self._save_usage()
+    async def record_rotation_error(self, key: str, model: str, error: Exception):
+        """Records a rotation error and sets a cooldown asynchronously."""
+        await self._lazy_init()
+        async with self._data_lock:
+            key_data = self._usage_data.setdefault(key, {"daily": {"date": date.today().isoformat(), "models": {}}, "global": {"models": {}}, "model_cooldowns": {}})
+            cooldown_seconds = 86400
+            error_str = str(error).lower()
+            patterns = [
+                r'retry_delay.*?(\d+)',
+                r'retrydelay.*?(\d+)s',
+                r'wait.*?(\d+)\s*seconds?'
+            ]
+            for pattern in patterns:
+                match = re.search(pattern, error_str, re.IGNORECASE)
+                if match:
                     try:
+                        cooldown_seconds = int(match.group(1))
+                        break
+                    except (ValueError, IndexError):
+                        continue
+            model_cooldowns = key_data.setdefault("model_cooldowns", {})
+            model_cooldowns[model] = time.time() + cooldown_seconds
+            key_data["last_rotation_error"] = {
+                "timestamp": time.time(),
+                "model": model,
+                "error": str(error)
+            }
+        await self._save_usage()