Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Oct 3, 2025

Commit

65ec48f

1 Parent(s): d28e7c5

feat(auth): implement robust OAuth credential initialization and management

- Introduce `initialize_token` methods for Gemini and Qwen providers to facilitate interactive OAuth setup upon first use or invalid tokens.
- Enable skipping of interactive OAuth validation at startup via the `SKIP_OAUTH_INIT_CHECK` environment variable for non-interactive environments.
- Allow customization of the background OAuth token refresh interval using the `OAUTH_REFRESH_INTERVAL` environment variable.
- Enhance Gemini project ID discovery with improved mechanisms, caching, and automatic retry logic for 401 Unauthorized errors.
- Implement Qwen-specific error handling for 'slow_down' responses, mapping them to rate limit exceptions, and add 401 retry logic.
- Update `.env.example` to reflect new configuration options for refresh interval and OAuth setup.

Files changed (7) hide show

.env.example +12 -1
src/proxy_app/main.py +21 -0
src/rotator_library/background_refresher.py +10 -3
src/rotator_library/providers/gemini_auth_base.py +58 -7
src/rotator_library/providers/gemini_cli_provider.py +91 -63
src/rotator_library/providers/qwen_auth_base.py +73 -7
src/rotator_library/providers/qwen_code_provider.py +19 -0

.env.example CHANGED Viewed

@@ -21,5 +21,16 @@ GEMINI_CLI_OAUTH_1=
 # Required for Gemini CLI: Your Google Cloud Project ID
 GEMINI_CLI_PROJECT_ID="gen-lang-client-..."
 # For Qwen Code (OpenAI Compatible)
-QWEN_CODE_OAUTH_1=

 # Required for Gemini CLI: Your Google Cloud Project ID
 GEMINI_CLI_PROJECT_ID="gen-lang-client-..."
+# For Gemini CLI (uses a custom API)
+GEMINI_CLI_OAUTH_1= # Leave blank to auto-discover from ~/.gemini/oauth_creds.json
+# Optional: Overrides auto-discovery for Gemini CLI project ID
+GEMINI_CLI_PROJECT_ID=
 # For Qwen Code (OpenAI Compatible)
+QWEN_CODE_OAUTH_1= # Leave blank to auto-discover from ~/.qwen/oauth_creds.json
+# [NEW] Optional: Set background OAuth refresh interval in seconds
+OAUTH_REFRESH_INTERVAL=3600 # Default is 3600 seconds (1 hour)
+# [NEW] Optional: Skip interactive OAuth validation/setup on startup. Set to "true" for non-interactive environments.
+SKIP_OAUTH_INIT_CHECK=false

src/proxy_app/main.py CHANGED Viewed

@@ -163,6 +163,27 @@ for key, value in os.environ.items():
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Manage the RotatingClient's lifecycle with the app's lifespan."""
     # [NEW] Load provider-specific params
     litellm_provider_params = {
         "gemini_cli": {"project_id": os.getenv("GEMINI_CLI_PROJECT_ID")}

 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Manage the RotatingClient's lifecycle with the app's lifespan."""
+    # [MODIFIED] Perform skippable OAuth initialization at startup
+    skip_oauth_init = os.getenv("SKIP_OAUTH_INIT_CHECK", "false").lower() == "true"
+    if not skip_oauth_init:
+        logging.info("Performing OAuth credential validation at startup...")
+        temp_cred_manager = CredentialManager(oauth_credentials)
+        discovered_creds = temp_cred_manager.discover_and_prepare()
+        init_tasks = []
+        for provider, paths in discovered_creds.items():
+            provider_plugin_class = PROVIDER_PLUGINS.get(provider)
+            if provider_plugin_class:
+                provider_instance = provider_plugin_class()
+                if hasattr(provider_instance, 'initialize_token'):
+                    for path in paths:
+                        init_tasks.append(provider_instance.initialize_token(path))
+        if init_tasks:
+            await asyncio.gather(*init_tasks)
+        logging.info("OAuth credential validation complete.")
     # [NEW] Load provider-specific params
     litellm_provider_params = {
         "gemini_cli": {"project_id": os.getenv("GEMINI_CLI_PROJECT_ID")}

src/rotator_library/background_refresher.py CHANGED Viewed

@@ -1,8 +1,9 @@
 # src/rotator_library/background_refresher.py
 import asyncio
 import logging
-from typing import TYPE_CHECKING
 if TYPE_CHECKING:
     from .client import RotatingClient
@@ -14,9 +15,14 @@ class BackgroundRefresher:
     A background task that periodically checks and refreshes OAuth tokens
     to ensure they remain valid.
     """
-    def __init__(self, client: 'RotatingClient', interval_seconds: int = 300):
         self._client = client
-        self._interval = interval_seconds
         self._task: Optional[asyncio.Task] = None
     def start(self):
@@ -24,6 +30,7 @@ class BackgroundRefresher:
         if self._task is None:
             self._task = asyncio.create_task(self._run())
             lib_logger.info(f"Background token refresher started. Check interval: {self._interval} seconds.")
     async def stop(self):
         """Stops the background refresh task."""

 # src/rotator_library/background_refresher.py
+import os
 import asyncio
 import logging
+from typing import TYPE_CHECKING, Optional
 if TYPE_CHECKING:
     from .client import RotatingClient
     A background task that periodically checks and refreshes OAuth tokens
     to ensure they remain valid.
     """
+    def __init__(self, client: 'RotatingClient'):
+        try:
+            interval_str = os.getenv("OAUTH_REFRESH_INTERVAL", "3600")
+            self._interval = int(interval_str)
+        except ValueError:
+            lib_logger.warning(f"Invalid OAUTH_REFRESH_INTERVAL '{interval_str}'. Falling back to 3600s.")
+            self._interval = 3600
         self._client = client
         self._task: Optional[asyncio.Task] = None
     def start(self):
         if self._task is None:
             self._task = asyncio.create_task(self._run())
             lib_logger.info(f"Background token refresher started. Check interval: {self._interval} seconds.")
+            # [NEW] Log if custom interval is set
     async def stop(self):
         """Stops the background refresh task."""

src/rotator_library/providers/gemini_auth_base.py CHANGED Viewed

@@ -1,5 +1,7 @@
 # src/rotator_library/providers/gemini_auth_base.py
 import json
 import time
 import asyncio
@@ -85,12 +87,6 @@ class GeminiAuthBase:
             lib_logger.info(f"Successfully refreshed Gemini OAuth token for '{Path(path).name}'.")
             return creds
-    async def get_auth_header(self, credential_path: str) -> Dict[str, str]:
-        creds = await self._load_credentials(credential_path)
-        if self._is_token_expired(creds):
-            creds = await self._refresh_token(credential_path, creds)
-        return {"Authorization": f"Bearer {creds['access_token']}"}
     async def proactively_refresh(self, credential_path: str):
         creds = await self._load_credentials(credential_path)
         if self._is_token_expired(creds):
@@ -99,4 +95,59 @@ class GeminiAuthBase:
     def _get_lock(self, path: str) -> asyncio.Lock:
         if path not in self._refresh_locks:
             self._refresh_locks[path] = asyncio.Lock()
-        return self._refresh_locks[path]

 # src/rotator_library/providers/gemini_auth_base.py
+import subprocess
+from typing import Optional
 import json
 import time
 import asyncio
             lib_logger.info(f"Successfully refreshed Gemini OAuth token for '{Path(path).name}'.")
             return creds
     async def proactively_refresh(self, credential_path: str):
         creds = await self._load_credentials(credential_path)
         if self._is_token_expired(creds):
     def _get_lock(self, path: str) -> asyncio.Lock:
         if path not in self._refresh_locks:
             self._refresh_locks[path] = asyncio.Lock()
+        return self._refresh_locks[path]
+    # [NEW] Add init flow for invalid/expired tokens
+    async def initialize_token(self, path: str) -> Dict[str, Any]:
+        """Initiates OAuth flow if tokens are missing or invalid."""
+        try:
+            creds = await self._load_credentials(path)
+            if not creds.get("refresh_token") or self._is_token_expired(creds):
+                lib_logger.warning(f"Invalid or missing Gemini OAuth tokens at '{path}'. Initiating setup...")
+                # Use subprocess to run gemini-cli setup or simulate web flow
+                # Based on CLIProxyAPI-main/gemini/gemini_auth.go: Use web flow with local server
+                # For simplicity, prompt user to run manual setup or integrate browser flow
+                print("Gemini CLI OAuth setup required. Please visit the authorization URL and paste the code.")
+                # Simulate getTokenFromWeb logic
+                from urllib.parse import urlencode
+                auth_url = "https://accounts.google.com/oauth2/v2/auth?" + urlencode({
+                    "client_id": CLIENT_ID,
+                    "redirect_uri": "http://localhost:8085/oauth2callback",
+                    "scope": " ".join(["https://www.googleapis.com/auth/cloud-platform", "https://www.googleapis.com/auth/userinfo.email", "https://www.googleapis.com/auth/userinfo.profile"]),
+                    "access_type": "offline",
+                    "response_type": "code",
+                    "prompt": "consent"
+                })
+                print(f"\n--- Gemini OAuth Setup Required for {Path(path).name} ---")
+                print(f"Please open this URL in your browser:\n\n{auth_url}\n")
+                auth_code = input("After authorizing, paste the 'code' from the redirected URL here: ")
+                async with httpx.AsyncClient() as client:
+                    response = await client.post(TOKEN_URI, data={
+                        "code": auth_code.strip(),
+                        "client_id": CLIENT_ID,
+                        "client_secret": CLIENT_SECRET,
+                        "redirect_uri": "http://localhost:8085/oauth2callback",
+                        "grant_type": "authorization_code"
+                    })
+                    response.raise_for_status()
+                    token_data = response.json()
+                    creds = {
+                        "access_token": token_data["access_token"],
+                        "refresh_token": token_data["refresh_token"],
+                        "expiry_date": (time.time() + token_data["expires_in"]) * 1000,
+                        "client_id": CLIENT_ID,
+                        "client_secret": CLIENT_SECRET
+                    }
+                    await self._save_credentials(path, creds)
+                    lib_logger.info(f"Gemini OAuth initialized successfully for '{path}'.")
+                return creds
+            return creds
+        except Exception as e:
+            raise ValueError(f"Failed to initialize Gemini OAuth: {e}")
+    async def get_auth_header(self, credential_path: str) -> Dict[str, str]:
+        creds = await self.initialize_token(credential_path)  # [NEW] Call init if needed
+        if self._is_token_expired(creds):
+            creds = await self._refresh_token(credential_path, creds)
+        return {"Authorization": f"Bearer {creds['access_token']}"}

src/rotator_library/providers/gemini_cli_provider.py CHANGED Viewed

@@ -18,35 +18,52 @@ CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com/v1internal"
 class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
     def __init__(self):
         super().__init__()
-        self.project_id: Optional[str] = None
-    async def _discover_project_id(self, litellm_params: Dict[str, Any]) -> str:
-        """Discovers the Google Cloud Project ID."""
-        if self.project_id:
-            return self.project_id
-        # 1. Prioritize explicitly configured project_id
         if litellm_params.get("project_id"):
-            self.project_id = litellm_params["project_id"]
-            lib_logger.info(f"Using configured Gemini CLI project ID: {self.project_id}")
-            return self.project_id
-        # 2. Fallback: Look for .env file in the standard .gemini directory
         try:
-            gemini_env_path = Path.home() / ".gemini" / ".env"
-            if gemini_env_path.exists():
-                with open(gemini_env_path, 'r') as f:
-                    for line in f:
-                        if line.startswith("GOOGLE_CLOUD_PROJECT="):
-                            self.project_id = line.strip().split("=")[1]
-                            lib_logger.info(f"Discovered Gemini CLI project ID from ~/.gemini/.env: {self.project_id}")
-                            return self.project_id
-        except Exception as e:
-            lib_logger.warning(f"Could not read project ID from ~/.gemini/.env: {e}")
         raise ValueError(
-            "Gemini CLI project ID not found. Please set `GEMINI_CLI_PROJECT_ID` in your main .env file "
-            "or ensure it is present in `~/.gemini/.env`."
         )
     def has_custom_logic(self) -> bool:
         return True
@@ -109,53 +126,64 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
     async def acompletion(self, client: httpx.AsyncClient, **kwargs) -> Union[litellm.ModelResponse, AsyncGenerator[litellm.ModelResponse, None]]:
         model = kwargs["model"]
         credential_path = kwargs.pop("credential_identifier")
-        auth_header = await self.get_auth_header(credential_path)
-        project_id = await self._discover_project_id(kwargs.get("litellm_params", {}))
-        # Handle :thinking suffix from Kilo example
-        model_name = model.split('/')[-1]
-        enable_thinking = model_name.endswith(':thinking')
-        if enable_thinking:
-            model_name = model_name.replace(':thinking', '')
-        gen_config = {
-            "temperature": kwargs.get("temperature", 0.7),
-            "maxOutputTokens": kwargs.get("max_tokens", 8192),
-        }
-        if enable_thinking:
-            gen_config["thinkingConfig"] = {"thinkingBudget": -1}
-        request_payload = {
-            "model": model_name,
-            "project": project_id,
-            "request": {
-                "contents": self._transform_messages(kwargs.get("messages", [])),
-                "generationConfig": gen_config,
-            },
-        }
-        url = f"{CODE_ASSIST_ENDPOINT}:streamGenerateContent"
-        async def stream_handler():
-            async with client.stream("POST", url, headers=auth_header, json=request_payload, params={"alt": "sse"}, timeout=600) as response:
-                response.raise_for_status()
-                async for line in response.aiter_lines():
-                    if line.startswith('data: '):
-                        data_str = line[6:]
-                        if data_str == "[DONE]": break
-                        try:
-                            chunk = json.loads(data_str)
-                            openai_chunk = self._convert_chunk_to_openai(chunk, model)
-                            yield litellm.ModelResponse(**openai_chunk)
-                        except json.JSONDecodeError:
-                            lib_logger.warning(f"Could not decode JSON from Gemini CLI: {line}")
         if kwargs.get("stream", False):
-            return stream_handler()
         else:
             # Accumulate stream for non-streaming response
-            chunks = [chunk async for chunk in stream_handler()]
             return litellm.utils.stream_to_completion_response(chunks)
     # [NEW] Hardcoded model list based on Kilo example

 class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
     def __init__(self):
         super().__init__()
+        self.project_id_cache: Dict[str, str] = {} # Cache project ID per credential path
+    async def _discover_project_id(self, credential_path: str, access_token: str, litellm_params: Dict[str, Any]) -> str:
+        """Discovers the Google Cloud Project ID, with caching."""
+        if credential_path in self.project_id_cache:
+            return self.project_id_cache[credential_path]
         if litellm_params.get("project_id"):
+            project_id = litellm_params["project_id"]
+            lib_logger.info(f"Using configured Gemini CLI project ID: {project_id}")
+            self.project_id_cache[credential_path] = project_id
+            return project_id
+        headers = {'Authorization': f'Bearer {access_token}', 'Content-Type': 'application/json'}
+        # 1. Try Gemini-specific discovery endpoint
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.post(f"{CODE_ASSIST_ENDPOINT}:loadCodeAssist", headers=headers, json={"metadata": {"pluginType": "GEMINI"}})
+                response.raise_for_status()
+                data = response.json()
+                if data.get('cloudaicompanionProject'):
+                    project_id = data['cloudaicompanionProject']
+                    lib_logger.info(f"Discovered Gemini project ID via loadCodeAssist: {project_id}")
+                    self.project_id_cache[credential_path] = project_id
+                    return project_id
+        except httpx.RequestError as e:
+            lib_logger.warning(f"Gemini loadCodeAssist failed, falling back to project listing: {e}")
+        # 2. Fallback to listing all available GCP projects
         try:
+            async with httpx.AsyncClient() as client:
+                response = await client.get("https://cloudresourcemanager.googleapis.com/v1/projects", headers=headers)
+                response.raise_for_status()
+                projects = response.json().get('projects', [])
+                active_projects = [p for p in projects if p.get('lifecycleState') == 'ACTIVE']
+                if active_projects:
+                    project_id = active_projects[0]['projectId']
+                    lib_logger.info(f"Discovered Gemini project ID from active projects list: {project_id}")
+                    self.project_id_cache[credential_path] = project_id
+                    return project_id
+        except httpx.RequestError as e:
+            lib_logger.error(f"Failed to list GCP projects: {e}")
         raise ValueError(
+            "Could not auto-discover Gemini project ID. Please set GEMINI_CLI_PROJECT_ID in your .env file."
         )
     def has_custom_logic(self) -> bool:
         return True
     async def acompletion(self, client: httpx.AsyncClient, **kwargs) -> Union[litellm.ModelResponse, AsyncGenerator[litellm.ModelResponse, None]]:
         model = kwargs["model"]
         credential_path = kwargs.pop("credential_identifier")
+        async def do_call():
+            auth_header = await self.get_auth_header(credential_path)
+            project_id = await self._discover_project_id(credential_path, auth_header['Authorization'].split(' ')[1], kwargs.get("litellm_params", {}))
+            # Handle :thinking suffix from Kilo example
+            model_name = model.split('/')[-1]
+            enable_thinking = model_name.endswith(':thinking')
+            if enable_thinking:
+                model_name = model_name.replace(':thinking', '')
+            gen_config = {
+                "temperature": kwargs.get("temperature", 0.7),
+                "maxOutputTokens": kwargs.get("max_tokens", 8192),
+            }
+            if enable_thinking:
+                gen_config["thinkingConfig"] = {"thinkingBudget": -1}
+            request_payload = {
+                "model": model_name,
+                "project": project_id,
+                "request": {
+                    "contents": self._transform_messages(kwargs.get("messages", [])),
+                    "generationConfig": gen_config,
+                },
+            }
+            url = f"{CODE_ASSIST_ENDPOINT}:streamGenerateContent"
+            async def stream_handler():
+                async with client.stream("POST", url, headers=auth_header, json=request_payload, params={"alt": "sse"}, timeout=600) as response:
+                    response.raise_for_status()
+                    async for line in response.aiter_lines():
+                        if line.startswith('data: '):
+                            data_str = line[6:]
+                            if data_str == "[DONE]": break
+                            try:
+                                chunk = json.loads(data_str)
+                                openai_chunk = self._convert_chunk_to_openai(chunk, model)
+                                yield litellm.ModelResponse(**openai_chunk)
+                            except json.JSONDecodeError:
+                                lib_logger.warning(f"Could not decode JSON from Gemini CLI: {line}")
+            return stream_handler()
+        try:
+            response_gen = await do_call()
+        except httpx.HTTPStatusError as e:
+            if e.response.status_code == 401:
+                lib_logger.warning("Gemini CLI returned 401. Forcing token refresh and retrying once.")
+                await self._refresh_token(credential_path, force=True)
+                response_gen = await do_call()
+            else:
+                raise e
         if kwargs.get("stream", False):
+            return response_gen
         else:
             # Accumulate stream for non-streaming response
+            chunks = [chunk async for chunk in response_gen]
             return litellm.utils.stream_to_completion_response(chunks)
     # [NEW] Hardcoded model list based on Kilo example

src/rotator_library/providers/qwen_auth_base.py CHANGED Viewed

@@ -1,5 +1,8 @@
 # src/rotator_library/providers/qwen_auth_base.py
 import json
 import time
 import asyncio
@@ -77,12 +80,6 @@ class QwenAuthBase:
             lib_logger.info(f"Successfully refreshed Qwen OAuth token for '{Path(path).name}'.")
             return creds_from_file
-    async def get_auth_header(self, credential_path: str) -> Dict[str, str]:
-        creds = await self._load_credentials(credential_path)
-        if self._is_token_expired(creds):
-            creds = await self._refresh_token(credential_path)
-        return {"Authorization": f"Bearer {creds['access_token']}"}
     def get_api_details(self, credential_path: str) -> Tuple[str, str]:
         creds = self._credentials_cache[credential_path]
         base_url = creds.get("resource_url", "https://dashscope.aliyuncs.com/compatible-mode/v1")
@@ -98,4 +95,73 @@ class QwenAuthBase:
     def _get_lock(self, path: str) -> asyncio.Lock:
         if path not in self._refresh_locks:
             self._refresh_locks[path] = asyncio.Lock()
-        return self._refresh_locks[path]

 # src/rotator_library/providers/qwen_auth_base.py
+import secrets
+import hashlib
+import base64
 import json
 import time
 import asyncio
             lib_logger.info(f"Successfully refreshed Qwen OAuth token for '{Path(path).name}'.")
             return creds_from_file
     def get_api_details(self, credential_path: str) -> Tuple[str, str]:
         creds = self._credentials_cache[credential_path]
         base_url = creds.get("resource_url", "https://dashscope.aliyuncs.com/compatible-mode/v1")
     def _get_lock(self, path: str) -> asyncio.Lock:
         if path not in self._refresh_locks:
             self._refresh_locks[path] = asyncio.Lock()
+        return self._refresh_locks[path]
+    # [NEW] Add init flow for invalid/expired tokens
+    async def initialize_token(self, path: str) -> Dict[str, Any]:
+        """Initiates device flow if tokens are missing or invalid."""
+        try:
+            creds = await self._load_credentials(path)
+            if not creds.get("refresh_token") or self._is_token_expired(creds):
+                lib_logger.warning(f"Invalid or missing Qwen OAuth tokens at '{path}'. Initiating device flow...")
+                # Based on CLIProxyAPI-main/qwen/qwen_auth.go: Use device code with PKCE
+                code_verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).decode('utf-8').rstrip('=')
+                code_challenge = base64.urlsafe_b64encode(
+                    hashlib.sha256(code_verifier.encode('utf-8')).digest()
+                ).decode('utf-8').rstrip('=')
+                async with httpx.AsyncClient() as client:
+                    dev_response = await client.post(
+                        "https://chat.qwen.ai/api/v1/oauth2/device/code",
+                        data={
+                            "client_id": CLIENT_ID,
+                            "scope": "openid profile email model.completion",
+                            "code_challenge": code_challenge,
+                            "code_challenge_method": "S256"
+                        }
+                    )
+                    dev_response.raise_for_status()
+                    dev_data = dev_response.json()
+                    print(f"\n--- Qwen OAuth Setup Required for {Path(path).name} ---")
+                    print(f"Please visit: {dev_data['verification_uri_complete']}")
+                    print(f"And enter code: {dev_data['user_code']}\n")
+                    token_data = None
+                    start_time = time.time()
+                    while time.time() - start_time < dev_data['expires_in']:
+                        poll_response = await client.post(
+                            TOKEN_ENDPOINT,
+                            data={
+                                "grant_type": "urn:ietf:params:oauth:grant-type:device_code",
+                                "device_code": dev_data['device_code'],
+                                "client_id": CLIENT_ID,
+                                "code_verifier": code_verifier
+                            }
+                        )
+                        if poll_response.status_code == 200:
+                            token_data = poll_response.json()
+                            break
+                        await asyncio.sleep(dev_data['interval'])
+                    if not token_data:
+                        raise TimeoutError("Qwen device flow timed out.")
+                    creds.update({
+                        "access_token": token_data["access_token"],
+                        "refresh_token": token_data.get("refresh_token"),
+                        "expiry_date": (time.time() + token_data["expires_in"]) * 1000,
+                        "resource_url": token_data.get("resource_url")
+                    })
+                    await self._save_credentials(path, creds)
+                    lib_logger.info(f"Qwen OAuth initialized successfully for '{path}'.")
+                return creds
+            return creds
+        except Exception as e:
+            raise ValueError(f"Failed to initialize Qwen OAuth: {e}")
+    async def get_auth_header(self, credential_path: str) -> Dict[str, str]:
+        creds = await self.initialize_token(credential_path)  # [NEW] Call init if needed
+        if self._is_token_expired(creds):
+            creds = await self._refresh_token(credential_path)
+        return {"Authorization": f"Bearer {creds['access_token']}"}

src/rotator_library/providers/qwen_code_provider.py CHANGED Viewed

@@ -1,5 +1,6 @@
 # src/rotator_library/providers/qwen_code_provider.py
 import httpx
 import logging
 from typing import Union, AsyncGenerator
@@ -31,6 +32,12 @@ class QwenCodeProvider(QwenAuthBase, ProviderInterface):
             if content and ("<think>" in content or "</think>" in content):
                 parts = content.replace("<think>", "||THINK||").replace("</think>", "||/THINK||").split("||")
                 for part in parts:
                     if not part: continue
                     new_chunk = chunk.copy()
                     if part.startswith("THINK||"):
@@ -52,8 +59,15 @@ class QwenCodeProvider(QwenAuthBase, ProviderInterface):
         async def do_call():
             api_base, access_token = self.get_api_details(credential_path)
             response = await litellm.acompletion(
                 **kwargs, api_key=access_token, api_base=api_base
             )
             return response
         try:
@@ -63,6 +77,11 @@ class QwenCodeProvider(QwenAuthBase, ProviderInterface):
                 lib_logger.warning("Qwen Code returned 401. Forcing token refresh and retrying once.")
                 await self._refresh_token(credential_path, force=True)
                 response = await do_call()
             else:
                 raise e

 # src/rotator_library/providers/qwen_code_provider.py
+import litellm.exceptions as litellm_exc
 import httpx
 import logging
 from typing import Union, AsyncGenerator
             if content and ("<think>" in content or "</think>" in content):
                 parts = content.replace("<think>", "||THINK||").replace("</think>", "||/THINK||").split("||")
                 for part in parts:
+                    # [NEW] Check for provider-specific errors in content
+                    if "slow_down" in part.lower():
+                        lib_logger.warning("Qwen 'slow_down' detected in response content. Treating as rate limit.")
+                        raise litellm_exc.RateLimitError(
+                            message="Qwen slow_down error detected.", llm_provider="qwen_code"
+                        )
                     if not part: continue
                     new_chunk = chunk.copy()
                     if part.startswith("THINK||"):
         async def do_call():
             api_base, access_token = self.get_api_details(credential_path)
             response = await litellm.acompletion(
+                # [NEW] Add timeout and retry params if needed, but since rotation handles retries, this is optional
                 **kwargs, api_key=access_token, api_base=api_base
             )
+            # [NEW] Post-call check for specific finish reasons or errors
+            if not kwargs.get("stream") and response.choices[0].finish_reason == "slow_down":
+                lib_logger.warning("Qwen 'slow_down' finish reason detected. Treating as rate limit.")
+                raise litellm_exc.RateLimitError(
+                    message="Qwen slow_down finish reason.", llm_provider="qwen_code"
+                )
             return response
         try:
                 lib_logger.warning("Qwen Code returned 401. Forcing token refresh and retrying once.")
                 await self._refresh_token(credential_path, force=True)
                 response = await do_call()
+            # [NEW] Catch provider-specific exceptions
+            elif "slow_down" in str(e).lower():
+                raise litellm_exc.RateLimitError(
+                    message="Qwen slow_down error in exception.", llm_provider="qwen_code"
+                )
             else:
                 raise e