Spaces:
Paused
Paused
Mirrowel commited on
Commit ·
b0569d9
1
Parent(s): 4bbfff4
feat: Add pre-request callback support and exponential backoff for server error retries in acompletion method
Browse files
src/rotator_library/client.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import asyncio
|
| 2 |
import json
|
| 3 |
import os
|
|
|
|
| 4 |
import httpx
|
| 5 |
import litellm
|
| 6 |
from litellm.litellm_core_utils.token_counter import token_counter
|
|
@@ -66,7 +67,7 @@ class RotatingClient:
|
|
| 66 |
lib_logger.info("STREAM FINISHED and [DONE] signal sent.")
|
| 67 |
|
| 68 |
|
| 69 |
-
async def acompletion(self, **kwargs) -> Any:
|
| 70 |
"""
|
| 71 |
Performs a completion call with smart key rotation and retry logic.
|
| 72 |
Handles both streaming and non-streaming requests with thread-safe key acquisition.
|
|
@@ -104,6 +105,9 @@ class RotatingClient:
|
|
| 104 |
if provider == "chutes":
|
| 105 |
litellm_kwargs["model"] = f"openai/{model.split('/', 1)[1]}"
|
| 106 |
litellm_kwargs["api_base"] = "https://llm.chutes.ai/v1"
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
response = await litellm.acompletion(api_key=current_key, **litellm_kwargs)
|
| 109 |
|
|
@@ -127,8 +131,9 @@ class RotatingClient:
|
|
| 127 |
|
| 128 |
if is_server_error(e):
|
| 129 |
if attempt < self.max_retries - 1:
|
| 130 |
-
|
| 131 |
-
|
|
|
|
| 132 |
continue
|
| 133 |
else:
|
| 134 |
lib_logger.error(f"Key ...{current_key[-4:]} failed after max retries on a server error. Rotating key.")
|
|
|
|
| 1 |
import asyncio
|
| 2 |
import json
|
| 3 |
import os
|
| 4 |
+
import random
|
| 5 |
import httpx
|
| 6 |
import litellm
|
| 7 |
from litellm.litellm_core_utils.token_counter import token_counter
|
|
|
|
| 67 |
lib_logger.info("STREAM FINISHED and [DONE] signal sent.")
|
| 68 |
|
| 69 |
|
| 70 |
+
async def acompletion(self, pre_request_callback: callable = None, **kwargs) -> Any:
|
| 71 |
"""
|
| 72 |
Performs a completion call with smart key rotation and retry logic.
|
| 73 |
Handles both streaming and non-streaming requests with thread-safe key acquisition.
|
|
|
|
| 105 |
if provider == "chutes":
|
| 106 |
litellm_kwargs["model"] = f"openai/{model.split('/', 1)[1]}"
|
| 107 |
litellm_kwargs["api_base"] = "https://llm.chutes.ai/v1"
|
| 108 |
+
|
| 109 |
+
if pre_request_callback:
|
| 110 |
+
await pre_request_callback()
|
| 111 |
|
| 112 |
response = await litellm.acompletion(api_key=current_key, **litellm_kwargs)
|
| 113 |
|
|
|
|
| 131 |
|
| 132 |
if is_server_error(e):
|
| 133 |
if attempt < self.max_retries - 1:
|
| 134 |
+
wait_time = (2 ** attempt) + random.uniform(0, 1)
|
| 135 |
+
lib_logger.warning(f"Key ...{current_key[-4:]} encountered a server error. Retrying in {wait_time:.2f} seconds...")
|
| 136 |
+
await asyncio.sleep(wait_time)
|
| 137 |
continue
|
| 138 |
else:
|
| 139 |
lib_logger.error(f"Key ...{current_key[-4:]} failed after max retries on a server error. Rotating key.")
|