Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Jul 7, 2025

Commit

00b549c

1 Parent(s): 80dbe0b

feat: Implement global provider cooldown for IP rate limits

Add a `CooldownManager` to handle IP-based rate limiting (HTTP 429 errors).
When a provider returns a 429 `RateLimitError`, it is put into a global cooldown state for a duration specified by `retry_after` or a default.
Subsequent requests to that provider will pause until the cooldown expires.

This prevents continuously hitting rate-limited IPs, improving reliability and reducing unnecessary retries.

Files changed (2) hide show

src/rotator_library/client.py +15 -3
src/rotator_library/cooldown_manager.py +37 -0

src/rotator_library/client.py CHANGED Viewed

@@ -20,6 +20,7 @@ from .failure_logger import log_failure
 from .error_handler import classify_error, AllProviders
 from .providers import PROVIDER_PLUGINS
 from .request_sanitizer import sanitize_request_payload
 class StreamedAPIError(Exception):
     """Custom exception to signal an API error received over a stream."""
@@ -46,6 +47,7 @@ class RotatingClient:
         self._provider_instances = {}
         self.http_client = httpx.AsyncClient()
         self.all_providers = AllProviders()
     async def __aenter__(self):
         return self
@@ -190,6 +192,11 @@ class RotatingClient:
             current_key = None
             key_acquired = False
             try:
                 keys_to_try = [k for k in keys_for_provider if k not in tried_keys]
                 if not keys_to_try:
                     break
@@ -236,14 +243,19 @@ class RotatingClient:
                             key_acquired = False
                             return response
-                    except (StreamedAPIError, APIConnectionError) as e:
-                        # These errors are caught to allow retrying with the next key.
                         last_exception = e
                         log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_data=kwargs)
                         classified_error = classify_error(e)
                         await self.usage_manager.record_failure(current_key, model, classified_error)
                         lib_logger.warning(f"Key ...{current_key[-4:]} encountered '{classified_error.error_type}'. Trying next key.")
-                        break # Break from retry loop, try next key
                     except Exception as e:
                         last_exception = e

 from .error_handler import classify_error, AllProviders
 from .providers import PROVIDER_PLUGINS
 from .request_sanitizer import sanitize_request_payload
+from .cooldown_manager import CooldownManager
 class StreamedAPIError(Exception):
     """Custom exception to signal an API error received over a stream."""
         self._provider_instances = {}
         self.http_client = httpx.AsyncClient()
         self.all_providers = AllProviders()
+        self.cooldown_manager = CooldownManager()
     async def __aenter__(self):
         return self
             current_key = None
             key_acquired = False
             try:
+                if await self.cooldown_manager.is_cooling_down(provider):
+                    remaining_time = await self.cooldown_manager.get_cooldown_remaining(provider)
+                    lib_logger.warning(f"Provider {provider} is in cooldown. Waiting for {remaining_time:.2f} seconds.")
+                    await asyncio.sleep(remaining_time)
                 keys_to_try = [k for k in keys_for_provider if k not in tried_keys]
                 if not keys_to_try:
                     break
                             key_acquired = False
                             return response
+                    except (StreamedAPIError, APIConnectionError, litellm.RateLimitError) as e:
                         last_exception = e
                         log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_data=kwargs)
                         classified_error = classify_error(e)
+                        if classified_error.error_type == 'rate_limit' and classified_error.status_code == 429:
+                            cooldown_duration = classified_error.retry_after or 60
+                            await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
+                            lib_logger.error(f"IP-based rate limit detected for {provider}. Starting a {cooldown_duration}-second global cooldown.")
                         await self.usage_manager.record_failure(current_key, model, classified_error)
                         lib_logger.warning(f"Key ...{current_key[-4:]} encountered '{classified_error.error_type}'. Trying next key.")
+                        break
                     except Exception as e:
                         last_exception = e

src/rotator_library/cooldown_manager.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import asyncio
+import time
+from typing import Dict
+class CooldownManager:
+    """
+    Manages global cooldown periods for API providers to handle IP-based rate limiting.
+    This ensures that once a 429 error is received for a provider, all subsequent
+    requests to that provider are paused for a specified duration.
+    """
+    def __init__(self):
+        self._cooldowns: Dict[str, float] = {}
+        self._lock = asyncio.Lock()
+    async def is_cooling_down(self, provider: str) -> bool:
+        """Checks if a provider is currently in a cooldown period."""
+        async with self._lock:
+            return provider in self._cooldowns and time.time() < self._cooldowns[provider]
+    async def start_cooldown(self, provider: str, duration: int):
+        """
+        Initiates or extends a cooldown period for a provider.
+        The cooldown is set to the current time plus the specified duration.
+        """
+        async with self._lock:
+            self._cooldowns[provider] = time.time() + duration
+    async def get_cooldown_remaining(self, provider: str) -> float:
+        """
+        Returns the remaining cooldown time in seconds for a provider.
+        Returns 0 if the provider is not in a cooldown period.
+        """
+        async with self._lock:
+            if provider in self._cooldowns:
+                remaining = self._cooldowns[provider] - time.time()
+                return max(0, remaining)
+            return 0