Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Oct 2, 2025

Commit

d28e7c5

1 Parent(s): fc62d82

feat(provider): introduce OAuth credential management and custom provider handling

- Implemented a new `CredentialManager` to discover and manage OAuth credential files from standard paths (`~/.gemini`, `~/.qwen`).
- Added a `BackgroundRefresher` to proactively refresh OAuth tokens before they expire, ensuring continuous service.
- Refactored `RotatingClient` to support both API keys and OAuth credentials for provider authentication.
- Integrated `litellm_provider_params` in `RotatingClient` to allow specific LiteLLM configurations per provider (e.g., Google Cloud project ID for Gemini CLI).
- Introduced a `has_custom_logic` flag and `acompletion` method in `ProviderInterface` to enable custom handling for providers like Gemini CLI and Qwen Code, which require specific request formats, authentication, or stream parsing not fully supported by LiteLLM's standard interface.
- Updated `proxy_app/main.py` to utilize the new OAuth credential loading, provider-specific LiteLLM parameters, and the background token refresher.
- Enhanced `error_handler.py` to classify `httpx` exceptions, improving error reporting and retry logic for network and HTTP errors.
- Added `.env.example` entries for configuring Gemini CLI project ID and Qwen/Gemini OAuth credential paths.

BREAKING CHANGE: The constructor for `RotatingClient` has been updated. It now requires an `oauth_credentials` dictionary (can be empty) and accepts an optional `litellm_provider_params` dictionary. Direct instantiations of `RotatingClient` must be updated to include these new arguments.

Files changed (12) hide show

.env.example +12 -0
src/proxy_app/main.py +32 -33
src/rotator_library/background_refresher.py +57 -0
src/rotator_library/client.py +234 -146
src/rotator_library/credential_manager.py +70 -0
src/rotator_library/error_handler.py +17 -2
src/rotator_library/providers/__init__.py +2 -2
src/rotator_library/providers/gemini_auth_base.py +102 -0
src/rotator_library/providers/gemini_cli_provider.py +171 -0
src/rotator_library/providers/provider_interface.py +38 -5
src/rotator_library/providers/qwen_auth_base.py +101 -0
src/rotator_library/providers/qwen_code_provider.py +71 -0

.env.example CHANGED Viewed

@@ -11,3 +11,15 @@ NVIDIA_NIM_API_KEY_2="YOUR_NVIDIA_NIM_API_KEY_2"
 # A secret key for your proxy server to authenticate requests(Can be anything. Used for compatibility)
 PROXY_API_KEY="YOUR_PROXY_API_KEY"

 # A secret key for your proxy server to authenticate requests(Can be anything. Used for compatibility)
 PROXY_API_KEY="YOUR_PROXY_API_KEY"
+# --- OAuth Accounts ---
+# The system will automatically discover standard paths if left blank.
+# For Gemini CLI (uses a custom API)
+GEMINI_CLI_OAUTH_1=
+# Required for Gemini CLI: Your Google Cloud Project ID
+GEMINI_CLI_PROJECT_ID="gen-lang-client-..."
+# For Qwen Code (OpenAI Compatible)
+QWEN_CODE_OAUTH_1=

src/proxy_app/main.py CHANGED Viewed

@@ -52,6 +52,8 @@ args, _ = parser.parse_known_args()
 sys.path.append(str(Path(__file__).resolve().parent.parent))
 from rotator_library import RotatingClient, PROVIDER_PLUGINS
 from proxy_app.request_logger import log_request_to_console
 from proxy_app.batch_manager import EmbeddingBatcher
 from proxy_app.detailed_logger import DetailedLogger
@@ -125,19 +127,28 @@ PROXY_API_KEY = os.getenv("PROXY_API_KEY")
 if not PROXY_API_KEY:
     raise ValueError("PROXY_API_KEY environment variable not set.")
-# Load all provider API keys from environment variables
 api_keys = {}
 for key, value in os.environ.items():
-    # Exclude PROXY_API_KEY from being treated as a provider API key
-    if (key.endswith("_API_KEY") or "_API_KEY_" in key) and key != "PROXY_API_KEY":
-        parts = key.split("_API_KEY")
-        provider = parts[0].lower()
         if provider not in api_keys:
             api_keys[provider] = []
         api_keys[provider].append(value)
-if not api_keys:
-    raise ValueError("No provider API keys found in environment variables.")
 # Load model ignore lists from environment variables
 ignore_models = {}
@@ -152,8 +163,20 @@ for key, value in os.environ.items():
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Manage the RotatingClient's lifecycle with the app's lifespan."""
     # The client now uses the root logger configuration
-    client = RotatingClient(api_keys=api_keys, configure_logging=True, ignore_models=ignore_models)
     app.state.rotating_client = client
     os.environ["LITELLM_LOG"] = "ERROR"
     litellm.set_verbose = False
@@ -168,6 +191,7 @@ async def lifespan(app: FastAPI):
     yield
     if app.state.embedding_batcher:
         await app.state.embedding_batcher.stop()
     await client.close()
@@ -477,20 +501,6 @@ async def embeddings(
             response = await client.aembedding(request=request, **request_data)
-        if ENABLE_REQUEST_LOGGING:
-            response_summary = {
-                "model": response.model,
-                "object": response.object,
-                "usage": response.usage.model_dump(),
-                "data_count": len(response.data),
-                "embedding_dimensions": len(response.data[0].embedding) if response.data else 0
-            }
-            log_request_response(
-                request_data=body.model_dump(exclude_none=True),
-                response_data=response_summary,
-                is_streaming=False,
-                log_type="embedding"
-            )
         return response
     except HTTPException as e:
@@ -510,17 +520,6 @@ async def embeddings(
         raise HTTPException(status_code=502, detail=f"Bad Gateway: {str(e)}")
     except Exception as e:
         logging.error(f"Embedding request failed: {e}")
-        if ENABLE_REQUEST_LOGGING:
-            try:
-                request_data = await request.json()
-            except json.JSONDecodeError:
-                request_data = {"error": "Could not parse request body"}
-            log_request_response(
-                request_data=request_data,
-                response_data={"error": str(e)},
-                is_streaming=False,
-                log_type="embedding"
-            )
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/")

 sys.path.append(str(Path(__file__).resolve().parent.parent))
 from rotator_library import RotatingClient, PROVIDER_PLUGINS
+from rotator_library.credential_manager import CredentialManager
+from rotator_library.background_refresher import BackgroundRefresher
 from proxy_app.request_logger import log_request_to_console
 from proxy_app.batch_manager import EmbeddingBatcher
 from proxy_app.detailed_logger import DetailedLogger
 if not PROXY_API_KEY:
     raise ValueError("PROXY_API_KEY environment variable not set.")
+# Split API keys and OAuth config loading
 api_keys = {}
+oauth_credentials = {}
 for key, value in os.environ.items():
+    if key == "PROXY_API_KEY":
+        continue
+    # Handles GEMINI_CLI_OAUTH_1, QWEN_CODE_OAUTH_1, etc.
+    if "_OAUTH_" in key:
+        provider = key.split("_OAUTH_")[0].lower()
+        if provider not in oauth_credentials:
+            oauth_credentials[provider] = []
+        oauth_credentials[provider].append(value)
+    # Handles GEMINI_API_KEY_1, etc.
+    elif "_API_KEY" in key:
+        provider = key.split("_API_KEY")[0].lower()
         if provider not in api_keys:
             api_keys[provider] = []
         api_keys[provider].append(value)
+if not api_keys and not oauth_credentials:
+    raise ValueError("No provider API keys or OAuth credentials found in environment variables.")
 # Load model ignore lists from environment variables
 ignore_models = {}
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Manage the RotatingClient's lifecycle with the app's lifespan."""
+    # [NEW] Load provider-specific params
+    litellm_provider_params = {
+        "gemini_cli": {"project_id": os.getenv("GEMINI_CLI_PROJECT_ID")}
+    }
     # The client now uses the root logger configuration
+    client = RotatingClient(
+        api_keys=api_keys,
+        oauth_credentials=oauth_credentials, # Pass OAuth config
+        configure_logging=True,
+        litellm_provider_params=litellm_provider_params, # [NEW]
+        ignore_models=ignore_models
+    )
+    client.background_refresher.start() # Start the background task
     app.state.rotating_client = client
     os.environ["LITELLM_LOG"] = "ERROR"
     litellm.set_verbose = False
     yield
+    await client.background_refresher.stop() # Stop the background task on shutdown
     if app.state.embedding_batcher:
         await app.state.embedding_batcher.stop()
     await client.close()
             response = await client.aembedding(request=request, **request_data)
         return response
     except HTTPException as e:
         raise HTTPException(status_code=502, detail=f"Bad Gateway: {str(e)}")
     except Exception as e:
         logging.error(f"Embedding request failed: {e}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.get("/")

src/rotator_library/background_refresher.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# src/rotator_library/background_refresher.py
+import asyncio
+import logging
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from .client import RotatingClient
+lib_logger = logging.getLogger('rotator_library')
+class BackgroundRefresher:
+    """
+    A background task that periodically checks and refreshes OAuth tokens
+    to ensure they remain valid.
+    """
+    def __init__(self, client: 'RotatingClient', interval_seconds: int = 300):
+        self._client = client
+        self._interval = interval_seconds
+        self._task: Optional[asyncio.Task] = None
+    def start(self):
+        """Starts the background refresh task."""
+        if self._task is None:
+            self._task = asyncio.create_task(self._run())
+            lib_logger.info(f"Background token refresher started. Check interval: {self._interval} seconds.")
+    async def stop(self):
+        """Stops the background refresh task."""
+        if self._task:
+            self._task.cancel()
+            try:
+                await self._task
+            except asyncio.CancelledError:
+                pass
+            lib_logger.info("Background token refresher stopped.")
+    async def _run(self):
+        """The main loop for the background task."""
+        while True:
+            try:
+                await asyncio.sleep(self._interval)
+                lib_logger.info("Running proactive token refresh check...")
+                oauth_configs = self._client.get_oauth_credentials()
+                for provider, paths in oauth_configs.items():
+                    provider_plugin = self._client._get_provider_instance(f"{provider}_oauth")
+                    if provider_plugin and hasattr(provider_plugin, 'proactively_refresh'):
+                        for path in paths:
+                            try:
+                                await provider_plugin.proactively_refresh(path)
+                            except Exception as e:
+                                lib_logger.error(f"Error during proactive refresh for '{path}': {e}")
+            except asyncio.CancelledError:
+                break
+            except Exception as e:
+                lib_logger.error(f"Unexpected error in background refresher loop: {e}")

src/rotator_library/client.py CHANGED Viewed

@@ -24,6 +24,8 @@ from .error_handler import PreRequestCallbackError, classify_error, AllProviders
 from .providers import PROVIDER_PLUGINS
 from .request_sanitizer import sanitize_request_payload
 from .cooldown_manager import CooldownManager
 class StreamedAPIError(Exception):
     """Custom exception to signal an API error received over a stream."""
@@ -39,11 +41,13 @@ class RotatingClient:
     def __init__(
         self,
         api_keys: Dict[str, List[str]],
         max_retries: int = 2,
         usage_file_path: str = "key_usage.json",
         configure_logging: bool = True,
         global_timeout: int = 30,
         abort_on_callback_error: bool = True,
         ignore_models: Optional[Dict[str, List[str]]] = None
     ):
         os.environ["LITELLM_LOG"] = "ERROR"
@@ -63,6 +67,18 @@ class RotatingClient:
         if not api_keys:
             raise ValueError("API keys dictionary cannot be empty.")
         self.api_keys = api_keys
         self.max_retries = max_retries
         self.global_timeout = global_timeout
         self.abort_on_callback_error = abort_on_callback_error
@@ -73,6 +89,7 @@ class RotatingClient:
         self.http_client = httpx.AsyncClient()
         self.all_providers = AllProviders()
         self.cooldown_manager = CooldownManager()
         self.ignore_models = ignore_models or {}
     def _is_model_ignored(self, provider: str, model_id: str) -> bool:
@@ -191,6 +208,9 @@ class RotatingClient:
         return kwargs
     def _get_provider_instance(self, provider_name: str):
         """Lazily initializes and returns a provider instance."""
         if provider_name not in self._provider_instances:
@@ -338,8 +358,8 @@ class RotatingClient:
             raise ValueError("'model' is a required parameter.")
         provider = model.split('/')[0]
-        if provider not in self.api_keys:
-            raise ValueError(f"No API keys configured for provider: {provider}")
         # Establish a global deadline for the entire request lifecycle.
         deadline = time.time() + self.global_timeout
@@ -347,16 +367,16 @@ class RotatingClient:
         # Create a mutable copy of the keys and shuffle it to ensure
         # that the key selection is randomized, which is crucial when
         # multiple keys have the same usage stats.
-        keys_for_provider = list(self.api_keys[provider])
-        random.shuffle(keys_for_provider)
-        tried_keys = set()
         last_exception = None
         kwargs = self._convert_model_params(**kwargs)
-        # The main rotation loop. It continues as long as there are untried keys and the global deadline has not been exceeded.
-        while len(tried_keys) < len(keys_for_provider) and time.time() < deadline:
-            current_key = None
             key_acquired = False
             try:
                 # Check for a provider-wide cooldown first.
@@ -372,129 +392,167 @@ class RotatingClient:
                     lib_logger.warning(f"Provider {provider} is in cooldown. Waiting for {remaining_cooldown:.2f} seconds.")
                     await asyncio.sleep(remaining_cooldown)
-                keys_to_try = [k for k in keys_for_provider if k not in tried_keys]
-                if not keys_to_try:
                     break
-                lib_logger.info(f"Acquiring key for model {model}. Tried keys: {len(tried_keys)}/{len(keys_for_provider)}")
-                current_key = await self.usage_manager.acquire_key(
-                    available_keys=keys_to_try,
                     model=model,
                     deadline=deadline
                 )
                 key_acquired = True
-                tried_keys.add(current_key)
                 litellm_kwargs = self.all_providers.get_provider_kwargs(**kwargs.copy())
-                provider_instance = self._get_provider_instance(provider)
-                if provider_instance:
-                    if "safety_settings" in litellm_kwargs:
-                        converted_settings = provider_instance.convert_safety_settings(litellm_kwargs["safety_settings"])
-                        if converted_settings is not None:
-                            litellm_kwargs["safety_settings"] = converted_settings
-                        else:
-                            del litellm_kwargs["safety_settings"]
-                if provider == "gemini" and provider_instance:
-                    provider_instance.handle_thinking_parameter(litellm_kwargs, model)
-                if "gemma-3" in model and "messages" in litellm_kwargs:
-                    litellm_kwargs["messages"] = [{"role": "user", "content": m["content"]} if m.get("role") == "system" else m for m in litellm_kwargs["messages"]]
-                litellm_kwargs = sanitize_request_payload(litellm_kwargs, model)
-                for attempt in range(self.max_retries):
-                    try:
-                        lib_logger.info(f"Attempting call with key ...{current_key[-4:]} (Attempt {attempt + 1}/{self.max_retries})")
-                        if pre_request_callback:
-                            try:
-                                await pre_request_callback(request, litellm_kwargs)
-                            except Exception as e:
-                                if self.abort_on_callback_error:
-                                    raise PreRequestCallbackError(f"Pre-request callback failed: {e}") from e
-                                else:
-                                    lib_logger.warning(f"Pre-request callback failed but abort_on_callback_error is False. Proceeding with request. Error: {e}")
-                        response = await api_call(
-                            api_key=current_key,
-                            **litellm_kwargs,
-                            logger_fn=self._litellm_logger_callback
-                        )
-                        await self.usage_manager.record_success(current_key, model, response)
-                        await self.usage_manager.release_key(current_key, model)
                         key_acquired = False
                         return response
-                    except litellm.RateLimitError as e:
-                        last_exception = e
-                        log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_headers=dict(request.headers) if request else {})
-                        classified_error = classify_error(e)
-                        # Extract a clean error message for the user-facing log
-                        error_message = str(e).split('\n')[0]
-                        lib_logger.info(f"Key ...{current_key[-4:]} hit rate limit for model {model}. Reason: '{error_message}'. Rotating key.")
-                        if classified_error.status_code == 429:
-                            cooldown_duration = classified_error.retry_after or 60
-                            await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
-                            lib_logger.warning(f"IP-based rate limit detected for {provider}. Starting a {cooldown_duration}-second global cooldown.")
-                        await self.usage_manager.record_failure(current_key, model, classified_error)
-                        lib_logger.warning(f"Key ...{current_key[-4:]} encountered a rate limit. Trying next key.")
-                        break # Move to the next key
-                    except (APIConnectionError, litellm.InternalServerError, litellm.ServiceUnavailableError) as e:
-                        last_exception = e
-                        log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_headers=dict(request.headers) if request else {})
-                        classified_error = classify_error(e)
-                        await self.usage_manager.record_failure(current_key, model, classified_error)
-                        if attempt >= self.max_retries - 1:
                             error_message = str(e).split('\n')[0]
-                            lib_logger.warning(f"Key ...{current_key[-4:]} failed after max retries for model {model} due to a server error. Reason: '{error_message}'. Rotating key.")
                             break # Move to the next key
-                        # For temporary errors, wait before retrying with the same key.
-                        wait_time = classified_error.retry_after or (1 * (2 ** attempt)) + random.uniform(0, 1)
-                        remaining_budget = deadline - time.time()
-                        # If the required wait time exceeds the budget, don't wait; rotate to the next key immediately.
-                        if wait_time > remaining_budget:
-                            lib_logger.warning(f"Required retry wait time ({wait_time:.2f}s) exceeds remaining budget ({remaining_budget:.2f}s). Rotating key early.")
-                            break
-                        error_message = str(e).split('\n')[0]
-                        lib_logger.warning(f"Key ...{current_key[-4:]} encountered a server error for model {model}. Reason: '{error_message}'. Retrying in {wait_time:.2f}s.")
-                        await asyncio.sleep(wait_time)
-                        continue # Retry with the same key
-                    except Exception as e:
-                        last_exception = e
-                        log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_headers=dict(request.headers) if request else {})
-                        if request and await request.is_disconnected():
-                            lib_logger.warning(f"Client disconnected. Aborting retries for key ...{current_key[-4:]}.")
-                            raise last_exception
-                        classified_error = classify_error(e)
-                        error_message = str(e).split('\n')[0]
-                        lib_logger.warning(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
-                        if classified_error.status_code == 429:
-                            cooldown_duration = classified_error.retry_after or 60
-                            await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
-                            lib_logger.warning(f"IP-based rate limit detected for {provider} from generic exception. Starting a {cooldown_duration}-second global cooldown.")
-                        if classified_error.error_type in ['invalid_request', 'context_window_exceeded', 'authentication']:
-                            # For these errors, we should not retry with other keys.
-                            raise last_exception
-                        await self.usage_manager.record_failure(current_key, model, classified_error)
-                        break # Try next key for other errors
             finally:
-                if key_acquired and current_key:
-                    await self.usage_manager.release_key(current_key, model)
         if last_exception:
             # Log the final error but do not raise it, as per the new requirement.
@@ -510,19 +568,19 @@ class RotatingClient:
         provider = model.split('/')[0]
         # Create a mutable copy of the keys and shuffle it.
-        keys_for_provider = list(self.api_keys[provider])
-        random.shuffle(keys_for_provider)
         deadline = time.time() + self.global_timeout
-        tried_keys = set()
         last_exception = None
         kwargs = self._convert_model_params(**kwargs)
         consecutive_quota_failures = 0
         try:
-            while len(tried_keys) < len(keys_for_provider) and time.time() < deadline:
-                current_key = None
                 key_acquired = False
                 try:
                     if await self.cooldown_manager.is_cooling_down(provider):
@@ -534,21 +592,52 @@ class RotatingClient:
                         lib_logger.warning(f"Provider {provider} is in a global cooldown. All requests to this provider will be paused for {remaining_cooldown:.2f} seconds.")
                         await asyncio.sleep(remaining_cooldown)
-                    keys_to_try = [k for k in keys_for_provider if k not in tried_keys]
-                    if not keys_to_try:
-                        lib_logger.warning(f"All keys for provider {provider} have been tried. No more keys to rotate to.")
                         break
-                    lib_logger.info(f"Acquiring key for model {model}. Tried keys: {len(tried_keys)}/{len(keys_for_provider)}")
-                    current_key = await self.usage_manager.acquire_key(
-                        available_keys=keys_to_try,
                         model=model,
                         deadline=deadline
                     )
                     key_acquired = True
-                    tried_keys.add(current_key)
                     litellm_kwargs = self.all_providers.get_provider_kwargs(**kwargs.copy())
                     provider_instance = self._get_provider_instance(provider)
                     if provider_instance:
                         if "safety_settings" in litellm_kwargs:
@@ -568,7 +657,7 @@ class RotatingClient:
                     for attempt in range(self.max_retries):
                         try:
-                            lib_logger.info(f"Attempting stream with key ...{current_key[-4:]} (Attempt {attempt + 1}/{self.max_retries})")
                             if pre_request_callback:
                                 try:
@@ -580,15 +669,14 @@ class RotatingClient:
                                         lib_logger.warning(f"Pre-request callback failed but abort_on_callback_error is False. Proceeding with request. Error: {e}")
                             response = await litellm.acompletion(
-                                api_key=current_key,
                                 **litellm_kwargs,
                                 logger_fn=self._litellm_logger_callback
                             )
-                            lib_logger.info(f"Stream connection established for key ...{current_key[-4:]}. Processing response.")
                             key_acquired = False
-                            stream_generator = self._safe_streaming_wrapper(response, current_key, model, request)
                             async for chunk in stream_generator:
                                 yield chunk
@@ -618,7 +706,7 @@ class RotatingClient:
                             # Now, log the failure with the extracted raw response.
                             log_failure(
-                                api_key=current_key,
                                 model=model,
                                 attempt=attempt + 1,
                                 error=e,
@@ -633,7 +721,7 @@ class RotatingClient:
                             if "quota" in error_message_text.lower() or "resource_exhausted" in error_status.lower():
                                 consecutive_quota_failures += 1
-                                lib_logger.warning(f"Key ...{current_key[-4:]} hit a quota limit. This is consecutive failure #{consecutive_quota_failures} for this request.")
                                 quota_value = "N/A"
                                 quota_id = "N/A"
@@ -648,11 +736,11 @@ class RotatingClient:
                                                 if quota_value != "N/A" and quota_id != "N/A":
                                                     break
-                                await self.usage_manager.record_failure(current_key, model, classified_error)
                                 if consecutive_quota_failures >= 3:
                                     console_log_message = (
-                                        f"Terminating stream for key ...{current_key[-4:]} due to 3rd consecutive quota error. "
                                         f"This is now considered a fatal input data error. ID: {quota_id}, Limit: {quota_value}."
                                     )
                                     client_error_message = (
@@ -668,31 +756,31 @@ class RotatingClient:
                                 else:
                                     # [MODIFIED] Do not yield to the client. Just log and break to rotate the key.
-                                    lib_logger.warning(f"Quota error on key ...{current_key[-4:]} (failure {consecutive_quota_failures}/3). Rotating key silently.")
                                     break
                             else:
                                 consecutive_quota_failures = 0
                                 # [MODIFIED] Do not yield to the client. Just log and break to rotate the key.
-                                lib_logger.warning(f"Key ...{current_key[-4:]} encountered a recoverable error ({classified_error.error_type}) during stream. Rotating key silently.")
                                 if classified_error.error_type == 'rate_limit' and classified_error.status_code == 429:
                                     cooldown_duration = classified_error.retry_after or 60
                                     await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
                                     lib_logger.warning(f"IP-based rate limit detected for {provider}. Starting a {cooldown_duration}-second global cooldown.")
-                                await self.usage_manager.record_failure(current_key, model, classified_error)
                                 break
                         except (APIConnectionError, litellm.InternalServerError, litellm.ServiceUnavailableError) as e:
                             consecutive_quota_failures = 0
                             last_exception = e
-                            log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_headers=dict(request.headers) if request else {})
                             classified_error = classify_error(e)
-                            await self.usage_manager.record_failure(current_key, model, classified_error)
                             if attempt >= self.max_retries - 1:
-                                lib_logger.warning(f"Key ...{current_key[-4:]} failed after max retries for model {model} due to a server error. Rotating key silently.")
                                 # [MODIFIED] Do not yield to the client here.
                                 break
@@ -703,17 +791,17 @@ class RotatingClient:
                                 break
                             error_message = str(e).split('\n')[0]
-                            lib_logger.warning(f"Key ...{current_key[-4:]} encountered a server error for model {model}. Reason: '{error_message}'. Retrying in {wait_time:.2f}s.")
                             await asyncio.sleep(wait_time)
                             continue
                         except Exception as e:
                             consecutive_quota_failures = 0
                             last_exception = e
-                            log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_headers=dict(request.headers) if request else {})
                             classified_error = classify_error(e)
-                            lib_logger.warning(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {str(e)}. Rotating key.")
                             if classified_error.status_code == 429:
                                 cooldown_duration = classified_error.retry_after or 60
@@ -724,12 +812,12 @@ class RotatingClient:
                                 raise last_exception
                             # [MODIFIED] Do not yield to the client here.
-                            await self.usage_manager.record_failure(current_key, model, classified_error)
                             break
                 finally:
-                    if key_acquired and current_key:
-                        await self.usage_manager.release_key(current_key, model)
             final_error_message = "Failed to complete the streaming request: No available API keys after rotation or global timeout exceeded."
             if last_exception:

 from .providers import PROVIDER_PLUGINS
 from .request_sanitizer import sanitize_request_payload
 from .cooldown_manager import CooldownManager
+from .credential_manager import CredentialManager
+from .background_refresher import BackgroundRefresher
 class StreamedAPIError(Exception):
     """Custom exception to signal an API error received over a stream."""
     def __init__(
         self,
         api_keys: Dict[str, List[str]],
+        oauth_credentials: Dict[str, List[str]],
         max_retries: int = 2,
         usage_file_path: str = "key_usage.json",
         configure_logging: bool = True,
         global_timeout: int = 30,
         abort_on_callback_error: bool = True,
+        litellm_provider_params: Optional[Dict[str, Any]] = None, # [NEW]
         ignore_models: Optional[Dict[str, List[str]]] = None
     ):
         os.environ["LITELLM_LOG"] = "ERROR"
         if not api_keys:
             raise ValueError("API keys dictionary cannot be empty.")
         self.api_keys = api_keys
+        self.credential_manager = CredentialManager(oauth_credentials)
+        self.oauth_credentials = self.credential_manager.discover_and_prepare()
+        self.background_refresher = BackgroundRefresher(self)
+        self.oauth_providers = set(self.oauth_credentials.keys())
+        all_credentials = {}
+        for provider, keys in api_keys.items():
+            all_credentials.setdefault(provider, []).extend(keys)
+        for provider, paths in self.oauth_credentials.items():
+            all_credentials.setdefault(provider, []).extend(paths)
+        self.all_credentials = all_credentials
         self.max_retries = max_retries
         self.global_timeout = global_timeout
         self.abort_on_callback_error = abort_on_callback_error
         self.http_client = httpx.AsyncClient()
         self.all_providers = AllProviders()
         self.cooldown_manager = CooldownManager()
+        self.litellm_provider_params = litellm_provider_params or {}
         self.ignore_models = ignore_models or {}
     def _is_model_ignored(self, provider: str, model_id: str) -> bool:
         return kwargs
+    def get_oauth_credentials(self) -> Dict[str, List[str]]:
+        return self.oauth_credentials
     def _get_provider_instance(self, provider_name: str):
         """Lazily initializes and returns a provider instance."""
         if provider_name not in self._provider_instances:
             raise ValueError("'model' is a required parameter.")
         provider = model.split('/')[0]
+        if provider not in self.all_credentials:
+            raise ValueError(f"No API keys or OAuth credentials configured for provider: {provider}")
         # Establish a global deadline for the entire request lifecycle.
         deadline = time.time() + self.global_timeout
         # Create a mutable copy of the keys and shuffle it to ensure
         # that the key selection is randomized, which is crucial when
         # multiple keys have the same usage stats.
+        credentials_for_provider = list(self.all_credentials[provider])
+        random.shuffle(credentials_for_provider)
+        tried_creds = set()
         last_exception = None
         kwargs = self._convert_model_params(**kwargs)
+        # The main rotation loop. It continues as long as there are untried credentials and the global deadline has not been exceeded.
+        while len(tried_creds) < len(credentials_for_provider) and time.time() < deadline:
+            current_cred = None
             key_acquired = False
             try:
                 # Check for a provider-wide cooldown first.
                     lib_logger.warning(f"Provider {provider} is in cooldown. Waiting for {remaining_cooldown:.2f} seconds.")
                     await asyncio.sleep(remaining_cooldown)
+                creds_to_try = [c for c in credentials_for_provider if c not in tried_creds]
+                if not creds_to_try:
                     break
+                lib_logger.info(f"Acquiring key for model {model}. Tried keys: {len(tried_creds)}/{len(credentials_for_provider)}")
+                current_cred = await self.usage_manager.acquire_key(
+                    available_keys=creds_to_try,
                     model=model,
                     deadline=deadline
                 )
                 key_acquired = True
+                tried_creds.add(current_cred)
                 litellm_kwargs = self.all_providers.get_provider_kwargs(**kwargs.copy())
+                # [NEW] Merge provider-specific params
+                if provider in self.litellm_provider_params:
+                    litellm_kwargs["litellm_params"] = {
+                        **self.litellm_provider_params[provider],
+                        **litellm_kwargs.get("litellm_params", {})
+                    }
+                provider_plugin = self._get_provider_instance(provider)
+                if provider_plugin and provider_plugin.has_custom_logic():
+                    lib_logger.debug(f"Provider '{provider}' has custom logic. Delegating call.")
+                    litellm_kwargs["credential_identifier"] = current_cred
+                    # The plugin handles the entire call, including retries on 401, etc.
+                    # The main retry loop here is for key rotation on other errors.
+                    response = await provider_plugin.acompletion(self.http_client, **litellm_kwargs)
+                    # For non-streaming, success is immediate
+                    if not kwargs.get("stream"):
+                        await self.usage_manager.record_success(current_cred, model, response)
+                        await self.usage_manager.release_key(current_cred, model)
                         key_acquired = False
                         return response
+                    else:
+                        # For streaming, wrap the response and return
+                        key_acquired = False
+                        stream_generator = self._safe_streaming_wrapper(response, current_cred, model, request)
+                        async for chunk in stream_generator:
+                            yield chunk
+                        return
+                else: # This is the standard API Key / litellm-handled provider logic
+                    is_oauth = provider in self.oauth_providers
+                    if is_oauth: # Standard OAuth provider (not custom)
+                        # ... (logic to set headers) ...
+                        pass
+                    else: # API Key
+                        litellm_kwargs["api_key"] = current_cred
+                    provider_instance = self._get_provider_instance(provider)
+                    if provider_instance:
+                        if "safety_settings" in litellm_kwargs:
+                            converted_settings = provider_instance.convert_safety_settings(litellm_kwargs["safety_settings"])
+                            if converted_settings is not None:
+                                litellm_kwargs["safety_settings"] = converted_settings
+                            else:
+                                del litellm_kwargs["safety_settings"]
+                    if provider == "gemini" and provider_instance:
+                        provider_instance.handle_thinking_parameter(litellm_kwargs, model)
+                    if "gemma-3" in model and "messages" in litellm_kwargs:
+                        litellm_kwargs["messages"] = [{"role": "user", "content": m["content"]} if m.get("role") == "system" else m for m in litellm_kwargs["messages"]]
+                    litellm_kwargs = sanitize_request_payload(litellm_kwargs, model)
+                    for attempt in range(self.max_retries):
+                        try:
+                            lib_logger.info(f"Attempting call with credential ...{current_cred[-6:]} (Attempt {attempt + 1}/{self.max_retries})")
+                            if pre_request_callback:
+                                try:
+                                    await pre_request_callback(request, litellm_kwargs)
+                                except Exception as e:
+                                    if self.abort_on_callback_error:
+                                        raise PreRequestCallbackError(f"Pre-request callback failed: {e}") from e
+                                    else:
+                                        lib_logger.warning(f"Pre-request callback failed but abort_on_callback_error is False. Proceeding with request. Error: {e}")
+                            response = await api_call(
+                                **litellm_kwargs,
+                                logger_fn=self._litellm_logger_callback
+                            )
+                            await self.usage_manager.record_success(current_cred, model, response)
+                            await self.usage_manager.release_key(current_cred, model)
+                            key_acquired = False
+                            return response
+                        except litellm.RateLimitError as e:
+                            last_exception = e
+                            log_failure(api_key=current_cred, model=model, attempt=attempt + 1, error=e, request_headers=dict(request.headers) if request else {})
+                            classified_error = classify_error(e)
+                            # Extract a clean error message for the user-facing log
                             error_message = str(e).split('\n')[0]
+                            lib_logger.info(f"Key ...{current_cred[-6:]} hit rate limit for model {model}. Reason: '{error_message}'. Rotating key.")
+                            if classified_error.status_code == 429:
+                                cooldown_duration = classified_error.retry_after or 60
+                                await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
+                                lib_logger.warning(f"IP-based rate limit detected for {provider}. Starting a {cooldown_duration}-second global cooldown.")
+                            await self.usage_manager.record_failure(current_cred, model, classified_error)
+                            lib_logger.warning(f"Key ...{current_cred[-6:]} encountered a rate limit. Trying next key.")
                             break # Move to the next key
+                        except (APIConnectionError, litellm.InternalServerError, litellm.ServiceUnavailableError) as e:
+                            last_exception = e
+                            log_failure(api_key=current_cred, model=model, attempt=attempt + 1, error=e, request_headers=dict(request.headers) if request else {})
+                            classified_error = classify_error(e)
+                            await self.usage_manager.record_failure(current_cred, model, classified_error)
+                            if attempt >= self.max_retries - 1:
+                                error_message = str(e).split('\n')[0]
+                                lib_logger.warning(f"Key ...{current_cred[-6:]} failed after max retries for model {model} due to a server error. Reason: '{error_message}'. Rotating key.")
+                                break # Move to the next key
+                            # For temporary errors, wait before retrying with the same key.
+                            wait_time = classified_error.retry_after or (1 * (2 ** attempt)) + random.uniform(0, 1)
+                            remaining_budget = deadline - time.time()
+                            # If the required wait time exceeds the budget, don't wait; rotate to the next key immediately.
+                            if wait_time > remaining_budget:
+                                lib_logger.warning(f"Required retry wait time ({wait_time:.2f}s) exceeds remaining budget ({remaining_budget:.2f}s). Rotating key early.")
+                                break
+                            error_message = str(e).split('\n')[0]
+                            lib_logger.warning(f"Key ...{current_cred[-6:]} encountered a server error for model {model}. Reason: '{error_message}'. Retrying in {wait_time:.2f}s.")
+                            await asyncio.sleep(wait_time)
+                            continue # Retry with the same key
+                        except Exception as e:
+                            last_exception = e
+                            log_failure(api_key=current_cred, model=model, attempt=attempt + 1, error=e, request_headers=dict(request.headers) if request else {})
+                            if request and await request.is_disconnected():
+                                lib_logger.warning(f"Client disconnected. Aborting retries for key ...{current_cred[-6:]}.")
+                                raise last_exception
+                            classified_error = classify_error(e)
+                            error_message = str(e).split('\n')[0]
+                            lib_logger.warning(f"Key ...{current_cred[-6:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
+                            if classified_error.status_code == 429:
+                                cooldown_duration = classified_error.retry_after or 60
+                                await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
+                                lib_logger.warning(f"IP-based rate limit detected for {provider} from generic exception. Starting a {cooldown_duration}-second global cooldown.")
+                            if classified_error.error_type in ['invalid_request', 'context_window_exceeded', 'authentication']:
+                                # For these errors, we should not retry with other keys.
+                                raise last_exception
+                            await self.usage_manager.record_failure(current_cred, model, classified_error)
+                            break # Try next key for other errors
             finally:
+                if key_acquired and current_cred:
+                    await self.usage_manager.release_key(current_cred, model)
         if last_exception:
             # Log the final error but do not raise it, as per the new requirement.
         provider = model.split('/')[0]
         # Create a mutable copy of the keys and shuffle it.
+        credentials_for_provider = list(self.all_credentials[provider])
+        random.shuffle(credentials_for_provider)
         deadline = time.time() + self.global_timeout
+        tried_creds = set()
         last_exception = None
         kwargs = self._convert_model_params(**kwargs)
         consecutive_quota_failures = 0
         try:
+            while len(tried_creds) < len(credentials_for_provider) and time.time() < deadline:
+                current_cred = None
                 key_acquired = False
                 try:
                     if await self.cooldown_manager.is_cooling_down(provider):
                         lib_logger.warning(f"Provider {provider} is in a global cooldown. All requests to this provider will be paused for {remaining_cooldown:.2f} seconds.")
                         await asyncio.sleep(remaining_cooldown)
+                    creds_to_try = [c for c in credentials_for_provider if c not in tried_creds]
+                    if not creds_to_try:
+                        lib_logger.warning(f"All credentials for provider {provider} have been tried. No more credentials to rotate to.")
                         break
+                    lib_logger.info(f"Acquiring credential for model {model}. Tried credentials: {len(tried_creds)}/{len(credentials_for_provider)}")
+                    current_cred = await self.usage_manager.acquire_key(
+                        available_keys=creds_to_try,
                         model=model,
                         deadline=deadline
                     )
                     key_acquired = True
+                    tried_creds.add(current_cred)
                     litellm_kwargs = self.all_providers.get_provider_kwargs(**kwargs.copy())
+                    # [NEW] Merge provider-specific params
+                    if provider in self.litellm_provider_params:
+                        litellm_kwargs["litellm_params"] = {
+                            **self.litellm_provider_params[provider],
+                            **litellm_kwargs.get("litellm_params", {})
+                        }
+                    provider_plugin = self._get_provider_instance(provider)
+                    if provider_plugin and provider_plugin.has_custom_logic():
+                        lib_logger.debug(f"Provider '{provider}' has custom logic. Delegating call.")
+                        litellm_kwargs["credential_identifier"] = current_cred
+                        # The plugin handles the entire call, including retries on 401, etc.
+                        # The main retry loop here is for key rotation on other errors.
+                        response = await provider_plugin.acompletion(self.http_client, **litellm_kwargs)
+                        key_acquired = False
+                        stream_generator = self._safe_streaming_wrapper(response, current_cred, model, request)
+                        async for chunk in stream_generator:
+                            yield chunk
+                        return
+                    else: # This is the standard API Key / litellm-handled provider logic
+                        is_oauth = provider in self.oauth_providers
+                        if is_oauth: # Standard OAuth provider (not custom)
+                            # ... (logic to set headers) ...
+                            pass
+                        else: # API Key
+                            litellm_kwargs["api_key"] = current_cred
                     provider_instance = self._get_provider_instance(provider)
                     if provider_instance:
                         if "safety_settings" in litellm_kwargs:
                     for attempt in range(self.max_retries):
                         try:
+                            lib_logger.info(f"Attempting stream with credential ...{current_cred[-6:]} (Attempt {attempt + 1}/{self.max_retries})")
                             if pre_request_callback:
                                 try:
                                         lib_logger.warning(f"Pre-request callback failed but abort_on_callback_error is False. Proceeding with request. Error: {e}")
                             response = await litellm.acompletion(
                                 **litellm_kwargs,
                                 logger_fn=self._litellm_logger_callback
                             )
+                            lib_logger.info(f"Stream connection established for credential ...{current_cred[-6:]}. Processing response.")
                             key_acquired = False
+                            stream_generator = self._safe_streaming_wrapper(response, current_cred, model, request)
                             async for chunk in stream_generator:
                                 yield chunk
                             # Now, log the failure with the extracted raw response.
                             log_failure(
+                                api_key=current_cred,
                                 model=model,
                                 attempt=attempt + 1,
                                 error=e,
                             if "quota" in error_message_text.lower() or "resource_exhausted" in error_status.lower():
                                 consecutive_quota_failures += 1
+                                lib_logger.warning(f"Credential ...{current_cred[-6:]} hit a quota limit. This is consecutive failure #{consecutive_quota_failures} for this request.")
                                 quota_value = "N/A"
                                 quota_id = "N/A"
                                                 if quota_value != "N/A" and quota_id != "N/A":
                                                     break
+                                await self.usage_manager.record_failure(current_cred, model, classified_error)
                                 if consecutive_quota_failures >= 3:
                                     console_log_message = (
+                                        f"Terminating stream for credential ...{current_cred[-6:]} due to 3rd consecutive quota error. "
                                         f"This is now considered a fatal input data error. ID: {quota_id}, Limit: {quota_value}."
                                     )
                                     client_error_message = (
                                 else:
                                     # [MODIFIED] Do not yield to the client. Just log and break to rotate the key.
+                                    lib_logger.warning(f"Quota error on credential ...{current_cred[-6:]} (failure {consecutive_quota_failures}/3). Rotating key silently.")
                                     break
                             else:
                                 consecutive_quota_failures = 0
                                 # [MODIFIED] Do not yield to the client. Just log and break to rotate the key.
+                                lib_logger.warning(f"Credential ...{current_cred[-6:]} encountered a recoverable error ({classified_error.error_type}) during stream. Rotating key silently.")
                                 if classified_error.error_type == 'rate_limit' and classified_error.status_code == 429:
                                     cooldown_duration = classified_error.retry_after or 60
                                     await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
                                     lib_logger.warning(f"IP-based rate limit detected for {provider}. Starting a {cooldown_duration}-second global cooldown.")
+                                await self.usage_manager.record_failure(current_cred, model, classified_error)
                                 break
                         except (APIConnectionError, litellm.InternalServerError, litellm.ServiceUnavailableError) as e:
                             consecutive_quota_failures = 0
                             last_exception = e
+                            log_failure(api_key=current_cred, model=model, attempt=attempt + 1, error=e, request_headers=dict(request.headers) if request else {})
                             classified_error = classify_error(e)
+                            await self.usage_manager.record_failure(current_cred, model, classified_error)
                             if attempt >= self.max_retries - 1:
+                                lib_logger.warning(f"Credential ...{current_cred[-6:]} failed after max retries for model {model} due to a server error. Rotating key silently.")
                                 # [MODIFIED] Do not yield to the client here.
                                 break
                                 break
                             error_message = str(e).split('\n')[0]
+                            lib_logger.warning(f"Credential ...{current_cred[-6:]} encountered a server error for model {model}. Reason: '{error_message}'. Retrying in {wait_time:.2f}s.")
                             await asyncio.sleep(wait_time)
                             continue
                         except Exception as e:
                             consecutive_quota_failures = 0
                             last_exception = e
+                            log_failure(api_key=current_cred, model=model, attempt=attempt + 1, error=e, request_headers=dict(request.headers) if request else {})
                             classified_error = classify_error(e)
+                            lib_logger.warning(f"Credential ...{current_cred[-6:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {str(e)}. Rotating key.")
                             if classified_error.status_code == 429:
                                 cooldown_duration = classified_error.retry_after or 60
                                 raise last_exception
                             # [MODIFIED] Do not yield to the client here.
+                            await self.usage_manager.record_failure(current_cred, model, classified_error)
                             break
                 finally:
+                    if key_acquired and current_cred:
+                        await self.usage_manager.release_key(current_cred, model)
             final_error_message = "Failed to complete the streaming request: No available API keys after rotation or global timeout exceeded."
             if last_exception:

src/rotator_library/credential_manager.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import os
+import shutil
+import logging
+from pathlib import Path
+from typing import Dict, List, Optional
+lib_logger = logging.getLogger('rotator_library')
+OAUTH_BASE_DIR = Path.cwd() / "oauth_creds"
+OAUTH_BASE_DIR.mkdir(exist_ok=True)
+# Standard paths where tools like `gemini login` store credentials.
+DEFAULT_OAUTH_PATHS = {
+    "gemini": Path.home() / ".gemini" / "oauth_creds.json",
+    "qwen": Path.home() / ".qwen" / "oauth_creds.json",
+    # Add other providers like 'claude' here if they have a standard CLI path
+}
+class CredentialManager:
+    """
+    Discovers OAuth credential files from standard locations, copies them locally,
+    and updates the configuration to use the local paths.
+    """
+    def __init__(self, oauth_config: Dict[str, List[str]]):
+        self.oauth_config = oauth_config
+    def discover_and_prepare(self) -> Dict[str, List[str]]:
+        """
+        Processes the initial OAuth config. If a path is empty, it tries to
+        discover the file from a default location. It then copies the file
+        locally if it doesn't already exist and returns the updated config
+        pointing to the local paths.
+        """
+        updated_config = {}
+        for provider, paths in self.oauth_config.items():
+            updated_paths = []
+            for i, path_str in enumerate(paths):
+                account_id = i + 1
+                source_path = self._resolve_source_path(provider, path_str)
+                if not source_path or not source_path.exists():
+                    lib_logger.warning(f"Could not find OAuth file for {provider} account #{account_id}. Skipping.")
+                    continue
+                local_filename = f"{provider}_oauth_{account_id}.json"
+                local_path = OAUTH_BASE_DIR / local_filename
+                if not local_path.exists():
+                    try:
+                        shutil.copy(source_path, local_path)
+                        lib_logger.info(f"Copied '{source_path}' to local credentials at '{local_path}'.")
+                    except Exception as e:
+                        lib_logger.error(f"Failed to copy OAuth file for {provider} account #{account_id}: {e}")
+                        continue
+                updated_paths.append(str(local_path.resolve()))
+            if updated_paths:
+                updated_config[provider] = updated_paths
+        return updated_config
+    def _resolve_source_path(self, provider: str, specified_path: Optional[str]) -> Optional[Path]:
+        """Determines the source path for a credential file."""
+        if specified_path:
+            # If a path is given, use it directly.
+            return Path(specified_path).expanduser()
+        # If no path is given, try the default location.
+        return DEFAULT_OAUTH_PATHS.get(provider)

src/rotator_library/error_handler.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import re
 from typing import Optional, Dict, Any
 from litellm.exceptions import APIConnectionError, RateLimitError, ServiceUnavailableError, AuthenticationError, InvalidRequestError, BadRequestError, OpenAIError, InternalServerError, Timeout, ContextWindowExceededError
@@ -22,8 +24,6 @@ class ClassifiedError:
     def __str__(self):
         return f"ClassifiedError(type={self.error_type}, status={self.status_code}, retry_after={self.retry_after}, original_exc={self.original_exception})"
-import json
 def get_retry_after(error: Exception) -> Optional[int]:
     """
     Extracts the 'retry-after' duration in seconds from an exception message.
@@ -80,9 +80,24 @@ def get_retry_after(error: Exception) -> Optional[int]:
 def classify_error(e: Exception) -> ClassifiedError:
     """
     Classifies an exception into a structured ClassifiedError object.
     """
     status_code = getattr(e, 'status_code', None)
     if isinstance(e, PreRequestCallbackError):
         return ClassifiedError(
             error_type='pre_request_callback_error',

 import re
+import json
 from typing import Optional, Dict, Any
+import httpx
 from litellm.exceptions import APIConnectionError, RateLimitError, ServiceUnavailableError, AuthenticationError, InvalidRequestError, BadRequestError, OpenAIError, InternalServerError, Timeout, ContextWindowExceededError
     def __str__(self):
         return f"ClassifiedError(type={self.error_type}, status={self.status_code}, retry_after={self.retry_after}, original_exc={self.original_exception})"
 def get_retry_after(error: Exception) -> Optional[int]:
     """
     Extracts the 'retry-after' duration in seconds from an exception message.
 def classify_error(e: Exception) -> ClassifiedError:
     """
     Classifies an exception into a structured ClassifiedError object.
+    Now handles both litellm and httpx exceptions.
     """
     status_code = getattr(e, 'status_code', None)
+    if isinstance(e, httpx.HTTPStatusError): # [NEW] Handle httpx errors first
+        status_code = e.response.status_code
+        if status_code == 401:
+            return ClassifiedError(error_type='authentication', original_exception=e, status_code=status_code)
+        if status_code == 429:
+            retry_after = get_retry_after(e)
+            return ClassifiedError(error_type='rate_limit', original_exception=e, status_code=status_code, retry_after=retry_after)
+        if 400 <= status_code < 500:
+            return ClassifiedError(error_type='invalid_request', original_exception=e, status_code=status_code)
+        if 500 <= status_code:
+            return ClassifiedError(error_type='server_error', original_exception=e, status_code=status_code)
+    if isinstance(e, (httpx.TimeoutException, httpx.ConnectError, httpx.NetworkError)): # [NEW]
+        return ClassifiedError(error_type='api_connection', original_exception=e, status_code=status_code)
     if isinstance(e, PreRequestCallbackError):
         return ClassifiedError(
             error_type='pre_request_callback_error',

src/rotator_library/providers/__init__.py CHANGED Viewed

@@ -26,9 +26,9 @@ def _register_providers():
         for attribute_name in dir(module):
             attribute = getattr(module, attribute_name)
             if isinstance(attribute, type) and issubclass(attribute, ProviderInterface) and attribute is not ProviderInterface:
-                # The provider name is derived from the module name (e.g., 'openai_provider' -> 'openai')
-                provider_name = module_name.replace("_provider", "")
                 # Remap 'nvidia' to 'nvidia_nim' to align with litellm's provider name
                 if provider_name == "nvidia":
                     provider_name = "nvidia_nim"
                 PROVIDER_PLUGINS[provider_name] = attribute

         for attribute_name in dir(module):
             attribute = getattr(module, attribute_name)
             if isinstance(attribute, type) and issubclass(attribute, ProviderInterface) and attribute is not ProviderInterface:
+                # Derives 'gemini_cli' from 'gemini_cli_provider.py'
                 # Remap 'nvidia' to 'nvidia_nim' to align with litellm's provider name
+                provider_name = module_name.replace("_provider", "")
                 if provider_name == "nvidia":
                     provider_name = "nvidia_nim"
                 PROVIDER_PLUGINS[provider_name] = attribute

src/rotator_library/providers/gemini_auth_base.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# src/rotator_library/providers/gemini_auth_base.py
+import json
+import time
+import asyncio
+import logging
+from pathlib import Path
+from typing import Dict, Any
+import httpx
+lib_logger = logging.getLogger('rotator_library')
+CLIENT_ID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
+CLIENT_SECRET = "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"
+TOKEN_URI = "https://oauth2.googleapis.com/token"
+REFRESH_EXPIRY_BUFFER_SECONDS = 300
+class GeminiAuthBase:
+    def __init__(self):
+        self._credentials_cache: Dict[str, Dict[str, Any]] = {}
+        self._refresh_locks: Dict[str, asyncio.Lock] = {}
+    async def _load_credentials(self, path: str) -> Dict[str, Any]:
+        if path in self._credentials_cache:
+            return self._credentials_cache[path]
+        async with self._get_lock(path):
+            if path in self._credentials_cache:
+                return self._credentials_cache[path]
+            try:
+                with open(path, 'r') as f:
+                    creds = json.load(f)
+                # Handle gcloud-style creds file which nest tokens under "credential"
+                if "credential" in creds:
+                    creds = creds["credential"]
+                self._credentials_cache[path] = creds
+                return creds
+            except Exception as e:
+                raise IOError(f"Failed to load Gemini OAuth credentials from '{path}': {e}")
+    async def _save_credentials(self, path: str, creds: Dict[str, Any]):
+        self._credentials_cache[path] = creds
+        try:
+            with open(path, 'w') as f:
+                json.dump(creds, f, indent=2)
+        except Exception as e:
+            lib_logger.error(f"Failed to save updated Gemini OAuth credentials to '{path}': {e}")
+    def _is_token_expired(self, creds: Dict[str, Any]) -> bool:
+        expiry = creds.get("token_expiry") # gcloud format
+        if not expiry: # gemini-cli format
+             expiry_timestamp = creds.get("expiry_date", 0) / 1000
+        else:
+            expiry_timestamp = time.mktime(time.strptime(expiry, "%Y-%m-%dT%H:%M:%SZ"))
+        return expiry_timestamp < time.time() + REFRESH_EXPIRY_BUFFER_SECONDS
+    async def _refresh_token(self, path: str, creds: Dict[str, Any]) -> Dict[str, Any]:
+        async with self._get_lock(path):
+            if not self._is_token_expired(self._credentials_cache.get(path, creds)):
+                return self._credentials_cache.get(path, creds)
+            lib_logger.info(f"Refreshing Gemini OAuth token for '{Path(path).name}'...")
+            refresh_token = creds.get("refresh_token")
+            if not refresh_token:
+                raise ValueError("No refresh_token found in credentials file.")
+            async with httpx.AsyncClient() as client:
+                response = await client.post(TOKEN_URI, data={
+                    "client_id": creds.get("client_id", CLIENT_ID),
+                    "client_secret": creds.get("client_secret", CLIENT_SECRET),
+                    "refresh_token": refresh_token,
+                    "grant_type": "refresh_token",
+                })
+                response.raise_for_status()
+                new_token_data = response.json()
+            creds["access_token"] = new_token_data["access_token"]
+            expiry_timestamp = time.time() + new_token_data["expires_in"]
+            creds["expiry_date"] = expiry_timestamp * 1000 # gemini-cli format
+            creds["token_expiry"] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime(expiry_timestamp)) # gcloud format
+            await self._save_credentials(path, creds)
+            lib_logger.info(f"Successfully refreshed Gemini OAuth token for '{Path(path).name}'.")
+            return creds
+    async def get_auth_header(self, credential_path: str) -> Dict[str, str]:
+        creds = await self._load_credentials(credential_path)
+        if self._is_token_expired(creds):
+            creds = await self._refresh_token(credential_path, creds)
+        return {"Authorization": f"Bearer {creds['access_token']}"}
+    async def proactively_refresh(self, credential_path: str):
+        creds = await self._load_credentials(credential_path)
+        if self._is_token_expired(creds):
+            await self._refresh_token(credential_path, creds)
+    def _get_lock(self, path: str) -> asyncio.Lock:
+        if path not in self._refresh_locks:
+            self._refresh_locks[path] = asyncio.Lock()
+        return self._refresh_locks[path]

src/rotator_library/providers/gemini_cli_provider.py ADDED Viewed

	@@ -0,0 +1,171 @@

+# src/rotator_library/providers/gemini_cli_provider.py
+import json
+import httpx
+import logging
+import time
+from typing import List, Dict, Any, AsyncGenerator, Union, Optional
+from .provider_interface import ProviderInterface
+from .gemini_auth_base import GeminiAuthBase
+import litellm
+import os
+from pathlib import Path
+lib_logger = logging.getLogger('rotator_library')
+CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com/v1internal"
+class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
+    def __init__(self):
+        super().__init__()
+        self.project_id: Optional[str] = None
+    async def _discover_project_id(self, litellm_params: Dict[str, Any]) -> str:
+        """Discovers the Google Cloud Project ID."""
+        if self.project_id:
+            return self.project_id
+        # 1. Prioritize explicitly configured project_id
+        if litellm_params.get("project_id"):
+            self.project_id = litellm_params["project_id"]
+            lib_logger.info(f"Using configured Gemini CLI project ID: {self.project_id}")
+            return self.project_id
+        # 2. Fallback: Look for .env file in the standard .gemini directory
+        try:
+            gemini_env_path = Path.home() / ".gemini" / ".env"
+            if gemini_env_path.exists():
+                with open(gemini_env_path, 'r') as f:
+                    for line in f:
+                        if line.startswith("GOOGLE_CLOUD_PROJECT="):
+                            self.project_id = line.strip().split("=")[1]
+                            lib_logger.info(f"Discovered Gemini CLI project ID from ~/.gemini/.env: {self.project_id}")
+                            return self.project_id
+        except Exception as e:
+            lib_logger.warning(f"Could not read project ID from ~/.gemini/.env: {e}")
+        raise ValueError(
+            "Gemini CLI project ID not found. Please set `GEMINI_CLI_PROJECT_ID` in your main .env file "
+            "or ensure it is present in `~/.gemini/.env`."
+        )
+    def has_custom_logic(self) -> bool:
+        return True
+    def _transform_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        # As seen in Kilo examples, system prompts are injected into the first user message.
+        gemini_contents = []
+        system_prompt = ""
+        if messages and messages[0].get('role') == 'system':
+            system_prompt = messages.pop(0).get('content', '')
+        for msg in messages:
+            role = "model" if msg.get("role") == "assistant" else "user"
+            content = msg.get("content", "")
+            if system_prompt and role == "user":
+                content = f"{system_prompt}\n\n{content}"
+                system_prompt = "" # Inject only once
+            gemini_contents.append({"role": role, "parts": [{"text": content}]})
+        return gemini_contents
+    def _convert_chunk_to_openai(self, chunk: Dict[str, Any], model_id: str) -> dict:
+        response_data = chunk.get('response', chunk)
+        candidate = response_data.get('candidates', [{}])[0]
+        delta = {}
+        finish_reason = None
+        # Correctly handle reasoning vs. content based on 'thought' flag from Kilo example
+        if 'content' in candidate and 'parts' in candidate['content']:
+            part = candidate['content']['parts'][0]
+            if part.get('text'):
+                if part.get('thought') is True:
+                    # This is a reasoning/thinking step
+                    delta['reasoning_content'] = part['text']
+                else:
+                    delta['content'] = part['text']
+        raw_finish_reason = candidate.get('finishReason')
+        if raw_finish_reason:
+            mapping = {'STOP': 'stop', 'MAX_TOKENS': 'length', 'SAFETY': 'content_filter'}
+            finish_reason = mapping.get(raw_finish_reason, 'stop')
+        choice = {"index": 0, "delta": delta, "finish_reason": finish_reason}
+        openai_chunk = {
+            "choices": [choice], "model": model_id, "object": "chat.completion.chunk",
+            "id": f"chatcmpl-geminicli-{time.time()}", "created": int(time.time())
+        }
+        if 'usageMetadata' in response_data:
+            usage = response_data['usageMetadata']
+            openai_chunk["usage"] = {
+                "prompt_tokens": usage.get("promptTokenCount", 0),
+                "completion_tokens": usage.get("candidatesTokenCount", 0),
+                "total_tokens": usage.get("totalTokenCount", 0),
+            }
+        return openai_chunk
+    async def acompletion(self, client: httpx.AsyncClient, **kwargs) -> Union[litellm.ModelResponse, AsyncGenerator[litellm.ModelResponse, None]]:
+        model = kwargs["model"]
+        credential_path = kwargs.pop("credential_identifier")
+        auth_header = await self.get_auth_header(credential_path)
+        project_id = await self._discover_project_id(kwargs.get("litellm_params", {}))
+        # Handle :thinking suffix from Kilo example
+        model_name = model.split('/')[-1]
+        enable_thinking = model_name.endswith(':thinking')
+        if enable_thinking:
+            model_name = model_name.replace(':thinking', '')
+        gen_config = {
+            "temperature": kwargs.get("temperature", 0.7),
+            "maxOutputTokens": kwargs.get("max_tokens", 8192),
+        }
+        if enable_thinking:
+            gen_config["thinkingConfig"] = {"thinkingBudget": -1}
+        request_payload = {
+            "model": model_name,
+            "project": project_id,
+            "request": {
+                "contents": self._transform_messages(kwargs.get("messages", [])),
+                "generationConfig": gen_config,
+            },
+        }
+        url = f"{CODE_ASSIST_ENDPOINT}:streamGenerateContent"
+        async def stream_handler():
+            async with client.stream("POST", url, headers=auth_header, json=request_payload, params={"alt": "sse"}, timeout=600) as response:
+                response.raise_for_status()
+                async for line in response.aiter_lines():
+                    if line.startswith('data: '):
+                        data_str = line[6:]
+                        if data_str == "[DONE]": break
+                        try:
+                            chunk = json.loads(data_str)
+                            openai_chunk = self._convert_chunk_to_openai(chunk, model)
+                            yield litellm.ModelResponse(**openai_chunk)
+                        except json.JSONDecodeError:
+                            lib_logger.warning(f"Could not decode JSON from Gemini CLI: {line}")
+        if kwargs.get("stream", False):
+            return stream_handler()
+        else:
+            # Accumulate stream for non-streaming response
+            chunks = [chunk async for chunk in stream_handler()]
+            return litellm.utils.stream_to_completion_response(chunks)
+    # [NEW] Hardcoded model list based on Kilo example
+    HARDCODED_MODELS = [
+        "gemini-2.5-pro",
+        "gemini-2.5-flash",
+        "gemini-2.5-flash-lite"
+    ]
+    # Use the shared GeminiAuthBase for auth logic
+    # get_models is not applicable for this custom provider
+    async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
+        """Returns a hardcoded list of known compatible Gemini CLI models."""
+        return [f"gemini_cli/{model_id}" for model_id in HARDCODED_MODELS]

src/rotator_library/providers/provider_interface.py CHANGED Viewed

@@ -1,13 +1,14 @@
 from abc import ABC, abstractmethod
-from typing import List, Dict, Any
 import httpx
 class ProviderInterface(ABC):
     """
-    An interface for API provider-specific functionality, primarily for discovering
-    available models.
     """
     @abstractmethod
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
@@ -22,7 +23,25 @@ class ProviderInterface(ABC):
         """
         pass
-    def convert_safety_settings(self, settings: Dict[str, str]) -> List[Dict[str, Any]]:
         """
         Converts a generic safety settings dictionary to the provider-specific format.
@@ -33,3 +52,17 @@ class ProviderInterface(ABC):
             A list of provider-specific safety setting objects or None.
         """
         return None

 from abc import ABC, abstractmethod
+from typing import List, Dict, Any, Optional, AsyncGenerator, Union
 import httpx
+import litellm
 class ProviderInterface(ABC):
     """
+    An interface for API provider-specific functionality, including model
+    discovery and custom API call handling for non-standard providers.
     """
     @abstractmethod
     async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """
         """
         pass
+    # [NEW] Add methods for providers that need to bypass litellm
+    def has_custom_logic(self) -> bool:
+        """
+        Returns True if the provider implements its own acompletion/aembedding logic,
+        bypassing the standard litellm call.
+        """
+        return False
+    async def acompletion(self, client: httpx.AsyncClient, **kwargs) -> Union[litellm.ModelResponse, AsyncGenerator[litellm.ModelResponse, None]]:
+        """
+        Handles the entire completion call for non-standard providers.
+        """
+        raise NotImplementedError(f"{self.__class__.__name__} does not implement custom acompletion.")
+    async def aembedding(self, client: httpx.AsyncClient, **kwargs) -> litellm.EmbeddingResponse:
+        """Handles the entire embedding call for non-standard providers."""
+        raise NotImplementedError(f"{self.__class__.__name__} does not implement custom aembedding.")
+    def convert_safety_settings(self, settings: Dict[str, str]) -> Optional[List[Dict[str, Any]]]:
         """
         Converts a generic safety settings dictionary to the provider-specific format.
             A list of provider-specific safety setting objects or None.
         """
         return None
+    # [NEW] Add new methods for OAuth providers
+    async def get_auth_header(self, credential_identifier: str) -> Dict[str, str]:
+        """
+        For OAuth providers, this method returns the Authorization header.
+        For API key providers, this can be a no-op or raise NotImplementedError.
+        """
+        raise NotImplementedError("This provider does not support OAuth.")
+    async def proactively_refresh(self, credential_path: str):
+        """
+        Proactively refreshes a token if it's nearing expiry.
+        """
+        pass

src/rotator_library/providers/qwen_auth_base.py ADDED Viewed

	@@ -0,0 +1,101 @@

+# src/rotator_library/providers/qwen_auth_base.py
+import json
+import time
+import asyncio
+import logging
+from pathlib import Path
+from typing import Dict, Any, Tuple
+import httpx
+lib_logger = logging.getLogger('rotator_library')
+CLIENT_ID = "f0304373b74a44d2b584a3fb70ca9e56"
+TOKEN_ENDPOINT = "https://chat.qwen.ai/api/v1/oauth2/token"
+REFRESH_EXPIRY_BUFFER_SECONDS = 300
+class QwenAuthBase:
+    def __init__(self):
+        self._credentials_cache: Dict[str, Dict[str, Any]] = {}
+        self._refresh_locks: Dict[str, asyncio.Lock] = {}
+    async def _load_credentials(self, path: str) -> Dict[str, Any]:
+        if path in self._credentials_cache:
+            return self._credentials_cache[path]
+        async with self._get_lock(path):
+            if path in self._credentials_cache:
+                return self._credentials_cache[path]
+            try:
+                with open(path, 'r') as f:
+                    creds = json.load(f)
+                self._credentials_cache[path] = creds
+                return creds
+            except Exception as e:
+                raise IOError(f"Failed to load Qwen OAuth credentials from '{path}': {e}")
+    async def _save_credentials(self, path: str, creds: Dict[str, Any]):
+        self._credentials_cache[path] = creds
+        try:
+            with open(path, 'w') as f:
+                json.dump(creds, f, indent=2)
+        except Exception as e:
+            lib_logger.error(f"Failed to save updated Qwen OAuth credentials to '{path}': {e}")
+    def _is_token_expired(self, creds: Dict[str, Any]) -> bool:
+        expiry_timestamp = creds.get("expiry_date", 0) / 1000
+        return expiry_timestamp < time.time() + REFRESH_EXPIRY_BUFFER_SECONDS
+    async def _refresh_token(self, path: str, force: bool = False) -> Dict[str, Any]:
+        async with self._get_lock(path):
+            cached_creds = self._credentials_cache.get(path)
+            if not force and cached_creds and not self._is_token_expired(cached_creds):
+                return cached_creds
+            creds_from_file = await self._load_credentials(path)
+            lib_logger.info(f"Refreshing Qwen OAuth token for '{Path(path).name}'...")
+            refresh_token = creds_from_file.get("refresh_token")
+            if not refresh_token:
+                raise ValueError("No refresh_token found in Qwen credentials file.")
+            async with httpx.AsyncClient() as client:
+                response = await client.post(TOKEN_ENDPOINT, data={
+                    "grant_type": "refresh_token",
+                    "refresh_token": refresh_token,
+                    "client_id": CLIENT_ID,
+                })
+                response.raise_for_status()
+                new_token_data = response.json()
+            creds_from_file["access_token"] = new_token_data["access_token"]
+            creds_from_file["refresh_token"] = new_token_data.get("refresh_token", creds_from_file["refresh_token"])
+            creds_from_file["expiry_date"] = (time.time() + new_token_data["expires_in"]) * 1000
+            await self._save_credentials(path, creds_from_file)
+            lib_logger.info(f"Successfully refreshed Qwen OAuth token for '{Path(path).name}'.")
+            return creds_from_file
+    async def get_auth_header(self, credential_path: str) -> Dict[str, str]:
+        creds = await self._load_credentials(credential_path)
+        if self._is_token_expired(creds):
+            creds = await self._refresh_token(credential_path)
+        return {"Authorization": f"Bearer {creds['access_token']}"}
+    def get_api_details(self, credential_path: str) -> Tuple[str, str]:
+        creds = self._credentials_cache[credential_path]
+        base_url = creds.get("resource_url", "https://dashscope.aliyuncs.com/compatible-mode/v1")
+        if not base_url.startswith("http"):
+            base_url = f"https://{base_url}"
+        return base_url, creds["access_token"]
+    async def proactively_refresh(self, credential_path: str):
+        creds = await self._load_credentials(credential_path)
+        if self._is_token_expired(creds):
+            await self._refresh_token(credential_path)
+    def _get_lock(self, path: str) -> asyncio.Lock:
+        if path not in self._refresh_locks:
+            self._refresh_locks[path] = asyncio.Lock()
+        return self._refresh_locks[path]

src/rotator_library/providers/qwen_code_provider.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# src/rotator_library/providers/qwen_code_provider.py
+import httpx
+import logging
+from typing import Union, AsyncGenerator
+from .provider_interface import ProviderInterface
+from .qwen_auth_base import QwenAuthBase
+import litellm
+lib_logger = logging.getLogger('rotator_library')
+# [NEW] Hardcoded model list based on Kilo example
+HARDCODED_MODELS = [
+    "qwen3-coder-plus",
+    "qwen3-coder-flash"
+]
+class QwenCodeProvider(QwenAuthBase, ProviderInterface):
+    def has_custom_logic(self) -> bool:
+        return True # We use custom logic to handle 401 retries and stream parsing
+    # [NEW] get_models implementation
+    async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
+        """Returns a hardcoded list of known compatible Qwen models for the OpenAI-compatible API."""
+        return [f"qwen_code/{model_id}" for model_id in HARDCODED_MODELS]
+    async def _stream_parser(self, stream: AsyncGenerator, model_id: str) -> AsyncGenerator:
+        """Parses the stream from litellm to handle Qwen's <think> tags."""
+        async for chunk in stream:
+            content = chunk.choices[0].delta.content
+            if content and ("<think>" in content or "</think>" in content):
+                parts = content.replace("<think>", "||THINK||").replace("</think>", "||/THINK||").split("||")
+                for part in parts:
+                    if not part: continue
+                    new_chunk = chunk.copy()
+                    if part.startswith("THINK||"):
+                        new_chunk.choices[0].delta.reasoning_content = part.replace("THINK||", "")
+                        new_chunk.choices[0].delta.content = None
+                    elif part.startswith("/THINK||"):
+                        continue # Ignore closing tag
+                    else:
+                        new_chunk.choices[0].delta.content = part
+                        new_chunk.choices[0].delta.reasoning_content = None
+                    yield new_chunk
+            else:
+                yield chunk
+    async def acompletion(self, client: httpx.AsyncClient, **kwargs) -> Union[litellm.ModelResponse, AsyncGenerator[litellm.ModelResponse, None]]:
+        credential_path = kwargs.pop("credential_identifier")
+        model = kwargs["model"]
+        async def do_call():
+            api_base, access_token = self.get_api_details(credential_path)
+            response = await litellm.acompletion(
+                **kwargs, api_key=access_token, api_base=api_base
+            )
+            return response
+        try:
+            response = await do_call()
+        except litellm.AuthenticationError as e:
+            if "401" in str(e):
+                lib_logger.warning("Qwen Code returned 401. Forcing token refresh and retrying once.")
+                await self._refresh_token(credential_path, force=True)
+                response = await do_call()
+            else:
+                raise e
+        if kwargs.get("stream"):
+            return self._stream_parser(response, model)
+        return response