Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Jul 9, 2025

Commit

4caa52c

1 Parent(s): 15c96ab

feat(logging): centralize and enhance application logging

Overhaul the application's logging infrastructure for better control, clarity, and operational visibility.

- Centralize logging configuration in `main.py` with file and console handlers.
- Add `src/proxy_app/provider_urls.py` for dynamic provider endpoint resolution, enhancing request context.
- Introduce `log_request_to_console` to provide real-time, concise summaries of incoming API requests.
- Migrate `RotatingClient` and other modules from `print()` statements to Python's `logging` module.
- Ensure `RotatingClient` logs propagate correctly and adjust internal log levels for clearer diagnostics.
- Suppress verbose output from external libraries (uvicorn, httpx, litellm) for focused logs.
- Improve error logging in `request_logger.py` to use `logging.error`.

Files changed (4) hide show

src/proxy_app/main.py +45 -8
src/proxy_app/provider_urls.py +65 -0
src/proxy_app/request_logger.py +27 -2
src/rotator_library/client.py +35 -41

src/proxy_app/main.py CHANGED Viewed

@@ -16,6 +16,7 @@ from pydantic import BaseModel
 import argparse
 import litellm
 # --- Pydantic Models ---
 class EmbeddingRequest(BaseModel):
     model: str
@@ -36,12 +37,34 @@ args, _ = parser.parse_known_args()
 sys.path.append(str(Path(__file__).resolve().parent.parent))
 from rotator_library import RotatingClient, PROVIDER_PLUGINS
-from proxy_app.request_logger import log_request_response
 from proxy_app.batch_manager import EmbeddingBatcher
-# Configure logging
-logging.basicConfig(level=logging.INFO)
 # Load environment variables from .env file
 load_dotenv()
@@ -70,16 +93,17 @@ if not api_keys:
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Manage the RotatingClient's lifecycle with the app's lifespan."""
     client = RotatingClient(api_keys=api_keys, configure_logging=True)
     app.state.rotating_client = client
     if USE_EMBEDDING_BATCHER:
         batcher = EmbeddingBatcher(client=client)
         app.state.embedding_batcher = batcher
-        print("RotatingClient and EmbeddingBatcher initialized.")
     else:
         app.state.embedding_batcher = None
-        print("RotatingClient initialized (EmbeddingBatcher disabled).")
     yield
@@ -88,9 +112,9 @@ async def lifespan(app: FastAPI):
     await client.close()
     if app.state.embedding_batcher:
-        print("RotatingClient and EmbeddingBatcher closed.")
     else:
-        print("RotatingClient closed.")
 # --- FastAPI App Setup ---
 app = FastAPI(lifespan=lifespan)
@@ -261,6 +285,12 @@ async def chat_completions(
     """
     try:
         request_data = await request.json()
         is_streaming = request_data.get("stream", False)
         if is_streaming:
@@ -323,6 +353,13 @@ async def embeddings(
     - False: Passes requests directly to the provider.
     """
     try:
         if USE_EMBEDDING_BATCHER and batcher:
             # --- Server-Side Batching Logic ---
             request_data = body.model_dump(exclude_none=True)

 import argparse
 import litellm
 # --- Pydantic Models ---
 class EmbeddingRequest(BaseModel):
     model: str
 sys.path.append(str(Path(__file__).resolve().parent.parent))
 from rotator_library import RotatingClient, PROVIDER_PLUGINS
+from proxy_app.request_logger import log_request_response, log_request_to_console
 from proxy_app.batch_manager import EmbeddingBatcher
+# --- Logging Configuration ---
+LOG_DIR = Path(__file__).resolve().parent.parent / "logs"
+LOG_DIR.mkdir(exist_ok=True)
+# Configure a file handler for detailed debug logs
+# Configure a file handler for detailed debug logs
+file_handler = logging.FileHandler(LOG_DIR / "proxy.log", encoding="utf-8")
+file_handler.setLevel(logging.INFO)
+file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
+# Configure a console handler for concise, high-level info
+console_handler = logging.StreamHandler(sys.stdout)
+console_handler.setLevel(logging.INFO)
+console_handler.setFormatter(logging.Formatter('%(message)s'))
+# Get the root logger and add the handlers
+root_logger = logging.getLogger()
+root_logger.setLevel(logging.INFO) # Set root to INFO
+root_logger.addHandler(file_handler)
+root_logger.addHandler(console_handler)
+# Silence other noisy loggers by setting their level higher than root
+logging.getLogger("uvicorn").setLevel(logging.WARNING)
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logging.getLogger("litellm").setLevel(logging.WARNING)
 # Load environment variables from .env file
 load_dotenv()
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Manage the RotatingClient's lifecycle with the app's lifespan."""
+    # The client now uses the root logger configuration
     client = RotatingClient(api_keys=api_keys, configure_logging=True)
     app.state.rotating_client = client
     if USE_EMBEDDING_BATCHER:
         batcher = EmbeddingBatcher(client=client)
         app.state.embedding_batcher = batcher
+        logging.info("RotatingClient and EmbeddingBatcher initialized.")
     else:
         app.state.embedding_batcher = None
+        logging.info("RotatingClient initialized (EmbeddingBatcher disabled).")
     yield
     await client.close()
     if app.state.embedding_batcher:
+        logging.info("RotatingClient and EmbeddingBatcher closed.")
     else:
+        logging.info("RotatingClient closed.")
 # --- FastAPI App Setup ---
 app = FastAPI(lifespan=lifespan)
     """
     try:
         request_data = await request.json()
+        log_request_to_console(
+            url=str(request.url),
+            headers=dict(request.headers),
+            client_info=(request.client.host, request.client.port),
+            request_data=request_data
+        )
         is_streaming = request_data.get("stream", False)
         if is_streaming:
     - False: Passes requests directly to the provider.
     """
     try:
+        request_data = body.model_dump(exclude_none=True)
+        log_request_to_console(
+            url=str(request.url),
+            headers=dict(request.headers),
+            client_info=(request.client.host, request.client.port),
+            request_data=request_data
+        )
         if USE_EMBEDDING_BATCHER and batcher:
             # --- Server-Side Batching Logic ---
             request_data = body.model_dump(exclude_none=True)

src/proxy_app/provider_urls.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from typing import Optional
+# A comprehensive map of provider names to their base URLs.
+PROVIDER_URL_MAP = {
+    "perplexity": "https://api.perplexity.ai",
+    "anyscale": "https://api.endpoints.anyscale.com/v1",
+    "deepinfra": "https://api.deepinfra.com/v1/openai",
+    "mistral": "https://api.mistral.ai/v1",
+    "groq": "https://api.groq.com/openai/v1",
+    "nvidia_nim": "https://integrate.api.nvidia.com/v1",
+    "cerebras": "https://api.cerebras.ai/v1",
+    "sambanova": "https://api.sambanova.ai/v1",
+    "ai21_chat": "https://api.ai21.com/studio/v1",
+    "codestral": "https://codestral.mistral.ai/v1",
+    "text-completion-codestral": "https://codestral.mistral.ai/v1",
+    "empower": "https://app.empower.dev/api/v1",
+    "deepseek": "https://api.deepseek.com/v1",
+    "friendliai": "https://api.friendli.ai/serverless/v1",
+    "galadriel": "https://api.galadriel.com/v1",
+    "meta_llama": "https://api.llama.com/compat/v1",
+    "featherless_ai": "https://api.featherless.ai/v1",
+    "nscale": "https://api.nscale.com/v1",
+    "openai": "https://api.openai.com/v1",
+    "gemini": "https://generativelanguage.googleapis.com/v1beta",
+    "anthropic": "https://api.anthropic.com/v1",
+    "cohere": "https://api.cohere.ai/v1",
+    "bedrock": "https://bedrock-runtime.us-east-1.amazonaws.com",
+    "openrouter": "https://openrouter.ai/api/v1",
+}
+def get_provider_endpoint(provider: str, model_name: str, incoming_path: str) -> Optional[str]:
+    """
+    Constructs the full provider endpoint URL based on the provider and incoming request path.
+    """
+    base_url = PROVIDER_URL_MAP.get(provider)
+    if not base_url:
+        return None
+    # Determine the specific action from the incoming path (e.g., 'chat/completions')
+    action = incoming_path.split('/v1/', 1)[-1] if '/v1/' in incoming_path else incoming_path
+    # --- Provider-specific endpoint structures ---
+    if provider == "gemini":
+        if action == "chat/completions":
+            return f"{base_url}/models/{model_name}:generateContent"
+        elif action == "embeddings":
+            return f"{base_url}/models/{model_name}:embedContent"
+    elif provider == "anthropic":
+        if action == "chat/completions":
+            return f"{base_url}/messages"
+    elif provider == "cohere":
+        if action == "chat/completions":
+            return f"{base_url}/chat"
+        elif action == "embeddings":
+            return f"{base_url}/embed"
+    # Default for OpenAI-compatible providers
+    # Most of these have /v1 in the base URL already, so we just append the action.
+    if base_url.endswith(("/v1", "/v1/openai")):
+        return f"{base_url}/{action}"
+    # Fallback for other cases
+    return f"{base_url}/v1/{action}"

src/proxy_app/request_logger.py CHANGED Viewed

@@ -3,7 +3,10 @@ import os
 from datetime import datetime
 from pathlib import Path
 import uuid
-from typing import Literal
 LOGS_DIR = Path(__file__).resolve().parent.parent.parent / "logs"
 COMPLETIONS_LOGS_DIR = LOGS_DIR / "completions"
@@ -14,6 +17,27 @@ LOGS_DIR.mkdir(exist_ok=True)
 COMPLETIONS_LOGS_DIR.mkdir(exist_ok=True)
 EMBEDDINGS_LOGS_DIR.mkdir(exist_ok=True)
 def log_request_response(
     request_data: dict,
     response_data: dict,
@@ -48,4 +72,5 @@ def log_request_response(
     except Exception as e:
         # In case of logging failure, we don't want to crash the main application
-        print(f"Error logging request/response: {e}")

 from datetime import datetime
 from pathlib import Path
 import uuid
+from typing import Literal, Dict
+import logging
+from .provider_urls import get_provider_endpoint
 LOGS_DIR = Path(__file__).resolve().parent.parent.parent / "logs"
 COMPLETIONS_LOGS_DIR = LOGS_DIR / "completions"
 COMPLETIONS_LOGS_DIR.mkdir(exist_ok=True)
 EMBEDDINGS_LOGS_DIR.mkdir(exist_ok=True)
+def log_request_to_console(url: str, headers: dict, client_info: tuple, request_data: dict):
+    """
+    Logs a concise, single-line summary of an incoming request to the console.
+    """
+    time_str = datetime.now().strftime("%H:%M")
+    model_full = request_data.get("model", "N/A")
+    provider = "N/A"
+    model_name = model_full
+    endpoint_url = "N/A"
+    if '/' in model_full:
+        parts = model_full.split('/', 1)
+        provider = parts[0]
+        model_name = parts[1]
+        # Use the helper function to get the full endpoint URL
+        endpoint_url = get_provider_endpoint(provider, model_name, url) or "N/A"
+    log_message = f"{time_str} - {client_info[0]}:{client_info[1]} - provider: {provider}, model: {model_name} - {endpoint_url}"
+    logging.info(log_message)
 def log_request_response(
     request_data: dict,
     response_data: dict,
     except Exception as e:
         # In case of logging failure, we don't want to crash the main application
+        # Use the root logger to log the error to the file.
+        logging.error(f"Error logging request/response to file: {e}")

src/rotator_library/client.py CHANGED Viewed

@@ -10,11 +10,11 @@ import logging
 from typing import List, Dict, Any, AsyncGenerator, Optional, Union
 lib_logger = logging.getLogger('rotator_library')
 lib_logger.propagate = False
-if not lib_logger.handlers:
-    lib_logger.addHandler(logging.NullHandler())
 from .usage_manager import UsageManager
 from .failure_logger import log_failure
 from .error_handler import classify_error, AllProviders
@@ -34,13 +34,17 @@ class RotatingClient:
     with support for both streaming and non-streaming responses.
     """
     def __init__(self, api_keys: Dict[str, List[str]], max_retries: int = 2, usage_file_path: str = "key_usage.json", configure_logging: bool = True):
-        os.environ["LITELLM_LOG"] = "ERROR"
-        litellm.set_verbose = False
-        litellm.drop_params = True
         if configure_logging:
             lib_logger.propagate = True
-            if any(isinstance(h, logging.NullHandler) for h in lib_logger.handlers):
-                lib_logger.handlers = [h for h in lib_logger.handlers if not isinstance(h, logging.NullHandler)]
         if not api_keys:
             raise ValueError("API keys dictionary cannot be empty.")
         self.api_keys = api_keys
@@ -103,7 +107,7 @@ class RotatingClient:
         try:
             while True:
                 if request and await request.is_disconnected():
-                    lib_logger.warning(f"Client disconnected. Aborting stream for key ...{key[-4:]}.")
                     # Do not yield [DONE] because the client is gone.
                     # The 'finally' block will handle key release.
                     break
@@ -111,7 +115,7 @@ class RotatingClient:
                 try:
                     chunk = await stream_iterator.__anext__()
                     if json_buffer:
-                        lib_logger.warning(f"Discarding incomplete JSON buffer: {json_buffer}")
                         json_buffer = ""
                     yield f"data: {json.dumps(chunk.dict())}\n\n"
@@ -123,7 +127,7 @@ class RotatingClient:
                 except StopAsyncIteration:
                     stream_completed = True
                     if json_buffer:
-                        lib_logger.warning(f"Stream ended with incomplete data in buffer: {json_buffer}")
                     break
                 except Exception as e:
@@ -132,7 +136,7 @@ class RotatingClient:
                         json_buffer += raw_chunk
                         parsed_data = json.loads(json_buffer)
-                        lib_logger.info(f"Successfully reassembled JSON from buffer: {json_buffer}")
                         if "error" in parsed_data:
                             lib_logger.warning(f"Reassembled object is an API error. Passing it to the client and raising internally.")
@@ -144,7 +148,7 @@ class RotatingClient:
                         json_buffer = ""
                     except json.JSONDecodeError:
-                        lib_logger.info(f"Buffer still incomplete. Waiting for more chunks: {json_buffer}")
                         continue
                     except StreamedAPIError:
                         # Re-raise to be caught by the outer handler
@@ -240,17 +244,15 @@ class RotatingClient:
                         log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_data=kwargs)
                         classified_error = classify_error(e)
                         error_message = str(e).split('\n')[0]
-                        print(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
                         if classified_error.status_code == 429:
                             cooldown_duration = classified_error.retry_after or 60
                             await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
-                            print(f"IP-based rate limit detected for {provider}. Starting a {cooldown_duration}-second global cooldown.")
-                            lib_logger.error(f"IP-based rate limit detected for {provider}. Starting a {cooldown_duration}-second global cooldown.")
                         await self.usage_manager.record_failure(current_key, model, classified_error)
-                        print(f"Key ...{current_key[-4:]} encountered a rate limit. Trying next key.")
-                        lib_logger.warning(f"Key ...{current_key[-4:]} encountered a rate limit. Trying next key.")
                         break # Move to the next key
                     except (APIConnectionError, litellm.InternalServerError, litellm.ServiceUnavailableError) as e:
@@ -261,14 +263,12 @@ class RotatingClient:
                         if attempt >= self.max_retries - 1:
                             error_message = str(e).split('\n')[0]
-                            print(f"Key ...{current_key[-4:]} failed after {self.max_retries} retries with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
-                            lib_logger.warning(f"Key ...{current_key[-4:]} failed after {self.max_retries} retries for a server-side error. Trying next key.")
                             break # Move to the next key
                         wait_time = classified_error.retry_after or (1 * (2 ** attempt)) + random.uniform(0, 1)
                         error_message = str(e).split('\n')[0]
-                        print(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Retrying in {wait_time:.2f} seconds.")
-                        lib_logger.info(f"Server-side error with key ...{current_key[-4:]}. Retrying in {wait_time:.2f} seconds.")
                         await asyncio.sleep(wait_time)
                         continue # Retry with the same key
@@ -282,12 +282,11 @@ class RotatingClient:
                         classified_error = classify_error(e)
                         error_message = str(e).split('\n')[0]
-                        print(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
                         if classified_error.status_code == 429:
                             cooldown_duration = classified_error.retry_after or 60
                             await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
-                            print(f"IP-based rate limit detected for {provider} from generic exception. Starting a {cooldown_duration}-second global cooldown.")
-                            lib_logger.error(f"IP-based rate limit detected for {provider} from generic exception. Starting a {cooldown_duration}-second global cooldown.")
                         if classified_error.error_type in ['invalid_request', 'context_window_exceeded', 'authentication']:
                             # For these errors, we should not retry with other keys.
@@ -366,17 +365,15 @@ class RotatingClient:
                         log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_data=kwargs)
                         classified_error = classify_error(e)
                         error_message = str(e).split('\n')[0]
-                        print(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
                         if classified_error.error_type == 'rate_limit' and classified_error.status_code == 429:
                             cooldown_duration = classified_error.retry_after or 60
                             await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
-                            print(f"IP-based rate limit detected for {provider}. Starting a {cooldown_duration}-second global cooldown.")
-                            lib_logger.error(f"IP-based rate limit detected for {provider}. Starting a {cooldown_duration}-second global cooldown.")
                         await self.usage_manager.record_failure(current_key, model, classified_error)
-                        print(f"Key ...{current_key[-4:]} failed during stream initiation. Trying next key.")
-                        lib_logger.warning(f"Key ...{current_key[-4:]} failed during stream initiation. Trying next key.")
                         break # Break inner loop to try next key
                     except (APIConnectionError, litellm.InternalServerError, litellm.ServiceUnavailableError) as e:
@@ -387,14 +384,12 @@ class RotatingClient:
                         if attempt >= self.max_retries - 1:
                             error_message = str(e).split('\n')[0]
-                            print(f"Key ...{current_key[-4:]} failed after {self.max_retries} retries with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
-                            lib_logger.warning(f"Key ...{current_key[-4:]} failed after {self.max_retries} retries for a server-side error. Trying next key.")
                             break # Move to the next key
                         wait_time = classified_error.retry_after or (1 * (2 ** attempt)) + random.uniform(0, 1)
                         error_message = str(e).split('\n')[0]
-                        print(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Retrying in {wait_time:.2f} seconds.")
-                        lib_logger.info(f"Server-side error with key ...{current_key[-4:]}. Retrying in {wait_time:.2f} seconds.")
                         await asyncio.sleep(wait_time)
                         continue # Retry with the same key
@@ -403,13 +398,12 @@ class RotatingClient:
                         log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_data=kwargs)
                         classified_error = classify_error(e)
                         error_message = str(e).split('\n')[0]
-                        print(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
                         if classified_error.status_code == 429:
                             cooldown_duration = classified_error.retry_after or 60
                             await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
-                            print(f"IP-based rate limit detected for {provider} from generic stream exception. Starting a {cooldown_duration}-second global cooldown.")
-                            lib_logger.error(f"IP-based rate limit detected for {provider} from generic stream exception. Starting a {cooldown_duration}-second global cooldown.")
                         if classified_error.error_type in ['invalid_request', 'context_window_exceeded', 'authentication']:
                             raise last_exception # Do not retry for these errors
@@ -463,9 +457,9 @@ class RotatingClient:
     async def get_available_models(self, provider: str) -> List[str]:
         """Returns a list of available models for a specific provider, with caching."""
-        lib_logger.info(f"Getting available models for provider: {provider}")
         if provider in self._model_list_cache:
-            lib_logger.info(f"Returning cached models for provider: {provider}")
             return self._model_list_cache[provider]
         keys_for_provider = self.api_keys.get(provider)
@@ -481,9 +475,9 @@ class RotatingClient:
         if provider_instance:
             for api_key in shuffled_keys:
                 try:
-                    lib_logger.info(f"Attempting to get models for {provider} with key ...{api_key[-4:]}")
                     models = await provider_instance.get_models(api_key, self.http_client)
-                    lib_logger.info(f"Got {len(models)} models for provider: {provider}")
                     self._model_list_cache[provider] = models
                     return models
                 except Exception as e:

 from typing import List, Dict, Any, AsyncGenerator, Optional, Union
 lib_logger = logging.getLogger('rotator_library')
+# Ensure the logger is configured to propagate to the root logger
+# which is set up in main.py. This allows the main app to control
+# log levels and handlers centrally.
 lib_logger.propagate = False
 from .usage_manager import UsageManager
 from .failure_logger import log_failure
 from .error_handler import classify_error, AllProviders
     with support for both streaming and non-streaming responses.
     """
     def __init__(self, api_keys: Dict[str, List[str]], max_retries: int = 2, usage_file_path: str = "key_usage.json", configure_logging: bool = True):
         if configure_logging:
+            # When True, this allows logs from this library to be handled
+            # by the parent application's logging configuration.
             lib_logger.propagate = True
+            # Remove any default handlers to prevent duplicate logging
+            if lib_logger.hasHandlers():
+                lib_logger.handlers.clear()
+                lib_logger.addHandler(logging.NullHandler())
+        else:
+            lib_logger.propagate = False
         if not api_keys:
             raise ValueError("API keys dictionary cannot be empty.")
         self.api_keys = api_keys
         try:
             while True:
                 if request and await request.is_disconnected():
+                    lib_logger.info(f"Client disconnected. Aborting stream for key ...{key[-4:]}.")
                     # Do not yield [DONE] because the client is gone.
                     # The 'finally' block will handle key release.
                     break
                 try:
                     chunk = await stream_iterator.__anext__()
                     if json_buffer:
+                        lib_logger.debug(f"Discarding incomplete JSON buffer: {json_buffer}")
                         json_buffer = ""
                     yield f"data: {json.dumps(chunk.dict())}\n\n"
                 except StopAsyncIteration:
                     stream_completed = True
                     if json_buffer:
+                        lib_logger.debug(f"Stream ended with incomplete data in buffer: {json_buffer}")
                     break
                 except Exception as e:
                         json_buffer += raw_chunk
                         parsed_data = json.loads(json_buffer)
+                        lib_logger.debug(f"Successfully reassembled JSON from buffer: {json_buffer}")
                         if "error" in parsed_data:
                             lib_logger.warning(f"Reassembled object is an API error. Passing it to the client and raising internally.")
                         json_buffer = ""
                     except json.JSONDecodeError:
+                        lib_logger.debug(f"Buffer still incomplete. Waiting for more chunks: {json_buffer}")
                         continue
                     except StreamedAPIError:
                         # Re-raise to be caught by the outer handler
                         log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_data=kwargs)
                         classified_error = classify_error(e)
                         error_message = str(e).split('\n')[0]
+                        lib_logger.warning(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
                         if classified_error.status_code == 429:
                             cooldown_duration = classified_error.retry_after or 60
                             await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
+                            lib_logger.warning(f"IP-based rate limit detected for {provider}. Starting a {cooldown_duration}-second global cooldown.")
                         await self.usage_manager.record_failure(current_key, model, classified_error)
+                        lib_logger.info(f"Key ...{current_key[-4:]} encountered a rate limit. Trying next key.")
                         break # Move to the next key
                     except (APIConnectionError, litellm.InternalServerError, litellm.ServiceUnavailableError) as e:
                         if attempt >= self.max_retries - 1:
                             error_message = str(e).split('\n')[0]
+                            lib_logger.warning(f"Key ...{current_key[-4:]} failed after {self.max_retries} retries with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
                             break # Move to the next key
                         wait_time = classified_error.retry_after or (1 * (2 ** attempt)) + random.uniform(0, 1)
                         error_message = str(e).split('\n')[0]
+                        lib_logger.warning(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Retrying in {wait_time:.2f} seconds.")
                         await asyncio.sleep(wait_time)
                         continue # Retry with the same key
                         classified_error = classify_error(e)
                         error_message = str(e).split('\n')[0]
+                        lib_logger.warning(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
                         if classified_error.status_code == 429:
                             cooldown_duration = classified_error.retry_after or 60
                             await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
+                            lib_logger.warning(f"IP-based rate limit detected for {provider} from generic exception. Starting a {cooldown_duration}-second global cooldown.")
                         if classified_error.error_type in ['invalid_request', 'context_window_exceeded', 'authentication']:
                             # For these errors, we should not retry with other keys.
                         log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_data=kwargs)
                         classified_error = classify_error(e)
                         error_message = str(e).split('\n')[0]
+                        lib_logger.warning(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
                         if classified_error.error_type == 'rate_limit' and classified_error.status_code == 429:
                             cooldown_duration = classified_error.retry_after or 60
                             await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
+                            lib_logger.warning(f"IP-based rate limit detected for {provider}. Starting a {cooldown_duration}-second global cooldown.")
                         await self.usage_manager.record_failure(current_key, model, classified_error)
+                        lib_logger.info(f"Key ...{current_key[-4:]} failed during stream initiation. Trying next key.")
                         break # Break inner loop to try next key
                     except (APIConnectionError, litellm.InternalServerError, litellm.ServiceUnavailableError) as e:
                         if attempt >= self.max_retries - 1:
                             error_message = str(e).split('\n')[0]
+                            lib_logger.warning(f"Key ...{current_key[-4:]} failed after {self.max_retries} retries with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
                             break # Move to the next key
                         wait_time = classified_error.retry_after or (1 * (2 ** attempt)) + random.uniform(0, 1)
                         error_message = str(e).split('\n')[0]
+                        lib_logger.warning(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Retrying in {wait_time:.2f} seconds.")
                         await asyncio.sleep(wait_time)
                         continue # Retry with the same key
                         log_failure(api_key=current_key, model=model, attempt=attempt + 1, error=e, request_data=kwargs)
                         classified_error = classify_error(e)
                         error_message = str(e).split('\n')[0]
+                        lib_logger.warning(f"Key ...{current_key[-4:]} failed with {classified_error.error_type} (Status: {classified_error.status_code}). Error: {error_message}. Rotating key.")
                         if classified_error.status_code == 429:
                             cooldown_duration = classified_error.retry_after or 60
                             await self.cooldown_manager.start_cooldown(provider, cooldown_duration)
+                            lib_logger.warning(f"IP-based rate limit detected for {provider} from generic stream exception. Starting a {cooldown_duration}-second global cooldown.")
                         if classified_error.error_type in ['invalid_request', 'context_window_exceeded', 'authentication']:
                             raise last_exception # Do not retry for these errors
     async def get_available_models(self, provider: str) -> List[str]:
         """Returns a list of available models for a specific provider, with caching."""
+        lib_logger.debug(f"Getting available models for provider: {provider}")
         if provider in self._model_list_cache:
+            lib_logger.debug(f"Returning cached models for provider: {provider}")
             return self._model_list_cache[provider]
         keys_for_provider = self.api_keys.get(provider)
         if provider_instance:
             for api_key in shuffled_keys:
                 try:
+                    lib_logger.debug(f"Attempting to get models for {provider} with key ...{api_key[-4:]}")
                     models = await provider_instance.get_models(api_key, self.http_client)
+                    lib_logger.debug(f"Got {len(models)} models for provider: {provider}")
                     self._model_list_cache[provider] = models
                     return models
                 except Exception as e: