Spaces:
Paused
refactor: Enhance error handling and retry logic
Browse filesThis commit improves the robustness of the client by implementing more granular error handling and retry strategies.
The changes introduce three distinct behaviors based on the classified error type:
1. **Fail Fast:** For unrecoverable errors like `InvalidRequestError` and the newly classified `ContextWindowExceededError`, the request now fails immediately. This prevents wasting time and keys on a request that cannot be fixed by rotation.
2. **Retry with Backoff:** For temporary issues like `server_error` and `api_connection`, the client will retry with the same key using exponential backoff. `APIConnectionError` and `Timeout` are now grouped and use a longer initial backoff period to better handle network instability.
3. **Rotate Key:** For key-specific issues like `rate_limit` or `authentication`, the client records the failure and immediately tries the next available key.
|
@@ -193,27 +193,30 @@ class RotatingClient:
|
|
| 193 |
|
| 194 |
classified_error = classify_error(e)
|
| 195 |
|
| 196 |
-
if classified_error.error_type
|
| 197 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
await self.usage_manager.record_failure(current_key, model, classified_error)
|
| 199 |
|
| 200 |
-
# If it's the last attempt for this key, we break to try the next key.
|
| 201 |
if attempt >= self.max_retries - 1:
|
| 202 |
-
lib_logger.warning(f"Key ...{current_key[-4:]} failed on final retry for
|
| 203 |
break
|
| 204 |
|
| 205 |
-
#
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
| 208 |
await asyncio.sleep(wait_time)
|
| 209 |
continue
|
| 210 |
|
| 211 |
-
# For
|
| 212 |
await self.usage_manager.record_failure(current_key, model, classified_error)
|
| 213 |
-
|
| 214 |
-
lib_logger.warning(f"Key ...{current_key[-4:]} rate limited. Trying next key.")
|
| 215 |
-
else:
|
| 216 |
-
lib_logger.warning(f"Key ...{current_key[-4:]} encountered a permanent {classified_error.error_type}. Trying next key.")
|
| 217 |
break
|
| 218 |
finally:
|
| 219 |
# This block ensures the key is always released if it was acquired but not passed to the wrapper.
|
|
|
|
| 193 |
|
| 194 |
classified_error = classify_error(e)
|
| 195 |
|
| 196 |
+
if classified_error.error_type in ['invalid_request', 'context_window_exceeded']:
|
| 197 |
+
# These errors are not recoverable by rotating keys, so fail fast.
|
| 198 |
+
lib_logger.error(f"Unrecoverable error '{classified_error.error_type}' with key ...{current_key[-4:]}. Failing request.")
|
| 199 |
+
raise last_exception
|
| 200 |
+
|
| 201 |
+
if classified_error.error_type in ['server_error', 'api_connection']:
|
| 202 |
+
# These are temporary, so record the failure and retry with backoff.
|
| 203 |
await self.usage_manager.record_failure(current_key, model, classified_error)
|
| 204 |
|
|
|
|
| 205 |
if attempt >= self.max_retries - 1:
|
| 206 |
+
lib_logger.warning(f"Key ...{current_key[-4:]} failed on final retry for {classified_error.error_type}. Trying next key.")
|
| 207 |
break
|
| 208 |
|
| 209 |
+
# Use a longer cooldown for API connection errors
|
| 210 |
+
base_wait = 5 if classified_error.error_type == 'api_connection' else 1
|
| 211 |
+
wait_time = classified_error.retry_after or (base_wait * (2 ** attempt)) + random.uniform(0, 1)
|
| 212 |
+
|
| 213 |
+
lib_logger.warning(f"Key ...{current_key[-4:]} encountered a {classified_error.error_type}. Retrying in {wait_time:.2f} seconds...")
|
| 214 |
await asyncio.sleep(wait_time)
|
| 215 |
continue
|
| 216 |
|
| 217 |
+
# For other errors (rate_limit, authentication, unknown), record failure and try the next key.
|
| 218 |
await self.usage_manager.record_failure(current_key, model, classified_error)
|
| 219 |
+
lib_logger.warning(f"Key ...{current_key[-4:]} encountered '{classified_error.error_type}'. Trying next key.")
|
|
|
|
|
|
|
|
|
|
| 220 |
break
|
| 221 |
finally:
|
| 222 |
# This block ensures the key is always released if it was acquired but not passed to the wrapper.
|
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import re
|
| 2 |
from typing import Optional, Dict, Any
|
| 3 |
|
| 4 |
-
from litellm.exceptions import APIConnectionError, RateLimitError, ServiceUnavailableError, AuthenticationError, InvalidRequestError, BadRequestError, OpenAIError, InternalServerError
|
| 5 |
|
| 6 |
class ClassifiedError:
|
| 7 |
"""A structured representation of a classified error."""
|
|
@@ -97,8 +97,22 @@ def classify_error(e: Exception) -> ClassifiedError:
|
|
| 97 |
original_exception=e,
|
| 98 |
status_code=status_code or 400
|
| 99 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
-
if isinstance(e, (ServiceUnavailableError,
|
| 102 |
# These are often temporary server-side issues
|
| 103 |
return ClassifiedError(
|
| 104 |
error_type='server_error',
|
|
|
|
| 1 |
import re
|
| 2 |
from typing import Optional, Dict, Any
|
| 3 |
|
| 4 |
+
from litellm.exceptions import APIConnectionError, RateLimitError, ServiceUnavailableError, AuthenticationError, InvalidRequestError, BadRequestError, OpenAIError, InternalServerError, Timeout, ContextWindowExceededError
|
| 5 |
|
| 6 |
class ClassifiedError:
|
| 7 |
"""A structured representation of a classified error."""
|
|
|
|
| 97 |
original_exception=e,
|
| 98 |
status_code=status_code or 400
|
| 99 |
)
|
| 100 |
+
|
| 101 |
+
if isinstance(e, ContextWindowExceededError):
|
| 102 |
+
return ClassifiedError(
|
| 103 |
+
error_type='context_window_exceeded',
|
| 104 |
+
original_exception=e,
|
| 105 |
+
status_code=status_code or 400
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
if isinstance(e, (APIConnectionError, Timeout)):
|
| 109 |
+
return ClassifiedError(
|
| 110 |
+
error_type='api_connection',
|
| 111 |
+
original_exception=e,
|
| 112 |
+
status_code=status_code or 503 # Treat like a server error
|
| 113 |
+
)
|
| 114 |
|
| 115 |
+
if isinstance(e, (ServiceUnavailableError, InternalServerError, OpenAIError)):
|
| 116 |
# These are often temporary server-side issues
|
| 117 |
return ClassifiedError(
|
| 118 |
error_type='server_error',
|