Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Jul 7, 2025

Commit

a20e9d0

1 Parent(s): c350819

refactor: Enhance error handling and retry logic

This commit improves the robustness of the client by implementing more granular error handling and retry strategies.

The changes introduce three distinct behaviors based on the classified error type:

1. **Fail Fast:** For unrecoverable errors like `InvalidRequestError` and the newly classified `ContextWindowExceededError`, the request now fails immediately. This prevents wasting time and keys on a request that cannot be fixed by rotation.

2. **Retry with Backoff:** For temporary issues like `server_error` and `api_connection`, the client will retry with the same key using exponential backoff. `APIConnectionError` and `Timeout` are now grouped and use a longer initial backoff period to better handle network instability.

3. **Rotate Key:** For key-specific issues like `rate_limit` or `authentication`, the client records the failure and immediately tries the next available key.

Files changed (2) hide show

src/rotator_library/client.py +15 -12
src/rotator_library/error_handler.py +16 -2

src/rotator_library/client.py CHANGED Viewed

@@ -193,27 +193,30 @@ class RotatingClient:
                         classified_error = classify_error(e)
-                        if classified_error.error_type == 'server_error':
-                            # This is a temporary error, so record the failure and retry.
                             await self.usage_manager.record_failure(current_key, model, classified_error)
-                            # If it's the last attempt for this key, we break to try the next key.
                             if attempt >= self.max_retries - 1:
-                                lib_logger.warning(f"Key ...{current_key[-4:]} failed on final retry for server_error. Trying next key.")
                                 break
-                            # Otherwise, wait and retry with the same key.
-                            wait_time = classified_error.retry_after or (2 ** attempt) + random.uniform(0, 1)
-                            lib_logger.warning(f"Key ...{current_key[-4:]} encountered a server_error. Retrying in {wait_time:.2f} seconds...")
                             await asyncio.sleep(wait_time)
                             continue
-                        # For rate limits or other permanent errors, record the failure and break to try the next key.
                         await self.usage_manager.record_failure(current_key, model, classified_error)
-                        if classified_error.error_type == 'rate_limit':
-                            lib_logger.warning(f"Key ...{current_key[-4:]} rate limited. Trying next key.")
-                        else:
-                            lib_logger.warning(f"Key ...{current_key[-4:]} encountered a permanent {classified_error.error_type}. Trying next key.")
                         break
             finally:
                 # This block ensures the key is always released if it was acquired but not passed to the wrapper.

                         classified_error = classify_error(e)
+                        if classified_error.error_type in ['invalid_request', 'context_window_exceeded']:
+                            # These errors are not recoverable by rotating keys, so fail fast.
+                            lib_logger.error(f"Unrecoverable error '{classified_error.error_type}' with key ...{current_key[-4:]}. Failing request.")
+                            raise last_exception
+                        if classified_error.error_type in ['server_error', 'api_connection']:
+                            # These are temporary, so record the failure and retry with backoff.
                             await self.usage_manager.record_failure(current_key, model, classified_error)
                             if attempt >= self.max_retries - 1:
+                                lib_logger.warning(f"Key ...{current_key[-4:]} failed on final retry for {classified_error.error_type}. Trying next key.")
                                 break
+                            # Use a longer cooldown for API connection errors
+                            base_wait = 5 if classified_error.error_type == 'api_connection' else 1
+                            wait_time = classified_error.retry_after or (base_wait * (2 ** attempt)) + random.uniform(0, 1)
+                            lib_logger.warning(f"Key ...{current_key[-4:]} encountered a {classified_error.error_type}. Retrying in {wait_time:.2f} seconds...")
                             await asyncio.sleep(wait_time)
                             continue
+                        # For other errors (rate_limit, authentication, unknown), record failure and try the next key.
                         await self.usage_manager.record_failure(current_key, model, classified_error)
+                        lib_logger.warning(f"Key ...{current_key[-4:]} encountered '{classified_error.error_type}'. Trying next key.")
                         break
             finally:
                 # This block ensures the key is always released if it was acquired but not passed to the wrapper.

src/rotator_library/error_handler.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import re
 from typing import Optional, Dict, Any
-from litellm.exceptions import APIConnectionError, RateLimitError, ServiceUnavailableError, AuthenticationError, InvalidRequestError, BadRequestError, OpenAIError, InternalServerError
 class ClassifiedError:
     """A structured representation of a classified error."""
@@ -97,8 +97,22 @@ def classify_error(e: Exception) -> ClassifiedError:
             original_exception=e,
             status_code=status_code or 400
         )
-    if isinstance(e, (ServiceUnavailableError, APIConnectionError, OpenAIError, InternalServerError)):
         # These are often temporary server-side issues
         return ClassifiedError(
             error_type='server_error',

 import re
 from typing import Optional, Dict, Any
+from litellm.exceptions import APIConnectionError, RateLimitError, ServiceUnavailableError, AuthenticationError, InvalidRequestError, BadRequestError, OpenAIError, InternalServerError, Timeout, ContextWindowExceededError
 class ClassifiedError:
     """A structured representation of a classified error."""
             original_exception=e,
             status_code=status_code or 400
         )
+    if isinstance(e, ContextWindowExceededError):
+        return ClassifiedError(
+            error_type='context_window_exceeded',
+            original_exception=e,
+            status_code=status_code or 400
+        )
+    if isinstance(e, (APIConnectionError, Timeout)):
+        return ClassifiedError(
+            error_type='api_connection',
+            original_exception=e,
+            status_code=status_code or 503 # Treat like a server error
+        )
+    if isinstance(e, (ServiceUnavailableError, InternalServerError, OpenAIError)):
         # These are often temporary server-side issues
         return ClassifiedError(
             error_type='server_error',