Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Jun 11, 2025

Commit

21dcb11

1 Parent(s): 7a5872b

feat(multi-provider): Implement dynamic API key loading and new endpoints

Refactor API key loading in `main.py` to dynamically support multiple providers from environment variables (e.g., `OPENAI_API_KEY`, `ANTHROPIC_API_KEY_1`).

Introduce a pluggable provider system in `src/rotator_library/providers` with an abstract `ProviderInterface` and concrete implementations for:
- Anthropic
- AWS Bedrock
- Cohere
- Google Gemini
- Groq
- Mistral
- OpenAI

Enhance `RotatingClient` to:
- Accept a dictionary of API keys, grouped by provider.
- Dynamically fetch available models from integrated providers with caching.
- Calculate token counts using `litellm.token_counter`.

Add new API endpoints to `main.py`:
- `GET /v1/models`: Lists all available models across configured providers.
- `GET /v1/providers`: Lists all integrated providers.
- `POST /v1/token-count`: Calculates token usage for given messages or text.

Update `UsageManager` to:
- Record approximate costs for completions.
- Utilize `litellm.ModelResponse` for more comprehensive usage tracking.
- Streamline `rotator_library/__init__.py` exports.

Files changed (13) hide show

src/proxy_app/main.py +50 -21
src/rotator_library/__init__.py +2 -15
src/rotator_library/client.py +56 -11
src/rotator_library/providers/__init__.py +35 -0
src/rotator_library/providers/anthropic_provider.py +26 -0
src/rotator_library/providers/bedrock_provider.py +23 -0
src/rotator_library/providers/cohere_provider.py +23 -0
src/rotator_library/providers/gemini_provider.py +23 -0
src/rotator_library/providers/groq_provider.py +23 -0
src/rotator_library/providers/mistral_provider.py +23 -0
src/rotator_library/providers/openai_provider.py +23 -0
src/rotator_library/providers/provider_interface.py +21 -0
src/rotator_library/usage_manager.py +17 -5

src/proxy_app/main.py CHANGED Viewed

@@ -10,10 +10,10 @@ import sys
 # Add the 'src' directory to the Python path to allow importing 'rotating_api_key_client'
 sys.path.append(str(Path(__file__).resolve().parent.parent))
-from rotator_library import RotatingClient
 # Configure logging
-logging.basicConfig(level=logging.INFO)
 # Load environment variables from .env file
 load_dotenv()
@@ -23,27 +23,21 @@ PROXY_API_KEY = os.getenv("PROXY_API_KEY")
 if not PROXY_API_KEY:
     raise ValueError("PROXY_API_KEY environment variable not set.")
-# Load all Gemini keys from environment variables
-gemini_keys = []
-i = 1
-while True:
-    # Start with GEMINI_API_KEY_1, then GEMINI_API_KEY_2, etc.
-    key = os.getenv(f"GEMINI_API_KEY_{i}")
-    if not key and i == 1:
-        # Fallback for a single key named just GEMINI_API_KEY
-        key = os.getenv("GEMINI_API_KEY")
-    if key:
-        gemini_keys.append(key)
-        i += 1
-    else:
-        break
-if not gemini_keys:
-    raise ValueError("No GEMINI_API_KEY or GEMINI_API_KEY_n environment variables found.")
 # Initialize the rotating client
-rotating_client = RotatingClient(api_keys=gemini_keys)
 # --- FastAPI App Setup ---
 app = FastAPI()
@@ -79,3 +73,38 @@ async def chat_completions(request: Request, _=Depends(verify_api_key)):
 @app.get("/")
 def read_root():
     return {"Status": "API Key Proxy is running"}

 # Add the 'src' directory to the Python path to allow importing 'rotating_api_key_client'
 sys.path.append(str(Path(__file__).resolve().parent.parent))
+from rotator_library import RotatingClient, PROVIDER_PLUGINS
 # Configure logging
+logging.basicConfig(level=logging.INFO) #-> moved to the rotator_library
 # Load environment variables from .env file
 load_dotenv()
 if not PROXY_API_KEY:
     raise ValueError("PROXY_API_KEY environment variable not set.")
+# Load all provider API keys from environment variables
+api_keys = {}
+for key, value in os.environ.items():
+    if key.endswith("_API_KEY") or "_API_KEY_" in key:
+        parts = key.split("_API_KEY")
+        provider = parts[0].lower()
+        if provider not in api_keys:
+            api_keys[provider] = []
+        api_keys[provider].append(value)
+if not api_keys:
+    raise ValueError("No provider API keys found in environment variables.")
 # Initialize the rotating client
+rotating_client = RotatingClient(api_keys=api_keys)
 # --- FastAPI App Setup ---
 app = FastAPI()
 @app.get("/")
 def read_root():
     return {"Status": "API Key Proxy is running"}
+@app.get("/v1/models")
+async def list_models(_=Depends(verify_api_key)):
+    """
+    Returns a list of available models from all configured providers.
+    """
+    models = await rotating_client.get_all_available_models()
+    return {"data": models}
+@app.get("/v1/providers")
+async def list_providers(_=Depends(verify_api_key)):
+    """
+    Returns a list of all available providers.
+    """
+    return {"data": list(PROVIDER_PLUGINS.keys())}
+@app.post("/v1/token-count")
+async def token_count(request: Request, _=Depends(verify_api_key)):
+    """
+    Calculates the token count for a given list of messages and a model.
+    """
+    try:
+        data = await request.json()
+        model = data.get("model")
+        messages = data.get("messages")
+        if not model or not messages:
+            raise HTTPException(status_code=400, detail="'model' and 'messages' are required.")
+        count = rotating_client.token_count(model=model, messages=messages)
+        return {"token_count": count}
+    except Exception as e:
+        logging.error(f"Token count failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))

src/rotator_library/__init__.py CHANGED Viewed

@@ -1,17 +1,4 @@
-"""
-Rotating API Key Client
-"""
 from .client import RotatingClient
-from .usage_manager import UsageManager
-from .error_handler import is_authentication_error, is_rate_limit_error, is_server_error, is_unrecoverable_error
-from .failure_logger import log_failure
-__all__ = [
-    "RotatingClient",
-    "UsageManager",
-    "is_authentication_error",
-    "is_rate_limit_error",
-    "is_server_error",
-    "is_unrecoverable_error",
-    "log_failure",
-]

 from .client import RotatingClient
+from .providers import PROVIDER_PLUGINS
+__all__ = ["RotatingClient", "PROVIDER_PLUGINS"]

src/rotator_library/client.py CHANGED Viewed

@@ -1,29 +1,31 @@
 import asyncio
 import json
 import litellm
 import logging
 from typing import List, Dict, Any, AsyncGenerator
 from src.rotator_library.usage_manager import UsageManager
 from src.rotator_library.failure_logger import log_failure
-from src.rotator_library.error_handler import (
-    is_authentication_error,
-    is_rate_limit_error,
-    is_server_error,
-    is_unrecoverable_error,
-)
 class RotatingClient:
     """
     A client that intelligently rotates and retries API keys using LiteLLM,
     with support for both streaming and non-streaming responses.
     """
-    def __init__(self, api_keys: List[str], max_retries: int = 2, usage_file_path: str = "key_usage.json"):
         if not api_keys:
-            raise ValueError("API keys list cannot be empty.")
         self.api_keys = api_keys
         self.max_retries = max_retries
         self.usage_manager = UsageManager(file_path=usage_file_path)
     async def _streaming_wrapper(self, stream: Any, key: str, model: str) -> AsyncGenerator[Any, None]:
         """
@@ -42,7 +44,7 @@ class RotatingClient:
                 # Safely check for usage data in the chunk
                 if hasattr(chunk, 'usage') and chunk.usage:
                     logging.info(f"Usage found in chunk for key ...{key[-4:]}: {chunk.usage}")
-                    self.usage_manager.record_success(key, model, chunk.usage)
         finally:
             # Signal the end of the stream
@@ -61,9 +63,13 @@ class RotatingClient:
         if not model:
             raise ValueError("'model' is a required parameter.")
         while True: # Loop until a key succeeds or we decide to give up
             current_key = self.usage_manager.get_next_smart_key(
-                available_keys=self.api_keys,
                 model=model
             )
@@ -82,7 +88,7 @@ class RotatingClient:
                         return self._streaming_wrapper(response, current_key, model)
                     else:
                         # For non-streams, we can log usage immediately.
-                        self.usage_manager.record_success(current_key, model, response.usage)
                         return response
                 except Exception as e:
@@ -108,3 +114,42 @@ class RotatingClient:
                     print(f"Key ...{current_key[-4:]} failed permanently. Rotating...")
                     self.usage_manager.record_rotation_error(current_key, model, e)
                     break

 import asyncio
 import json
 import litellm
+from litellm.litellm_core_utils.token_counter import token_counter
 import logging
 from typing import List, Dict, Any, AsyncGenerator
 from src.rotator_library.usage_manager import UsageManager
 from src.rotator_library.failure_logger import log_failure
+from src.rotator_library.error_handler import is_server_error, is_unrecoverable_error
+from src.rotator_library.providers import PROVIDER_PLUGINS
 class RotatingClient:
     """
     A client that intelligently rotates and retries API keys using LiteLLM,
     with support for both streaming and non-streaming responses.
     """
+    def __init__(self, api_keys: Dict[str, List[str]], max_retries: int = 2, usage_file_path: str = "key_usage.json"):
+        litellm.set_verbose = False
         if not api_keys:
+            raise ValueError("API keys dictionary cannot be empty.")
         self.api_keys = api_keys
         self.max_retries = max_retries
         self.usage_manager = UsageManager(file_path=usage_file_path)
+        self._model_list_cache = {}
+        self._provider_instances = {
+            name: plugin() for name, plugin in PROVIDER_PLUGINS.items()
+        }
     async def _streaming_wrapper(self, stream: Any, key: str, model: str) -> AsyncGenerator[Any, None]:
         """
                 # Safely check for usage data in the chunk
                 if hasattr(chunk, 'usage') and chunk.usage:
                     logging.info(f"Usage found in chunk for key ...{key[-4:]}: {chunk.usage}")
+                    self.usage_manager.record_success(key, model, chunk)
         finally:
             # Signal the end of the stream
         if not model:
             raise ValueError("'model' is a required parameter.")
+        provider = model.split('/')[0]
+        if provider not in self.api_keys:
+            raise ValueError(f"No API keys configured for provider: {provider}")
         while True: # Loop until a key succeeds or we decide to give up
             current_key = self.usage_manager.get_next_smart_key(
+                available_keys=self.api_keys[provider],
                 model=model
             )
                         return self._streaming_wrapper(response, current_key, model)
                     else:
                         # For non-streams, we can log usage immediately.
+                        self.usage_manager.record_success(current_key, model, response)
                         return response
                 except Exception as e:
                     print(f"Key ...{current_key[-4:]} failed permanently. Rotating...")
                     self.usage_manager.record_rotation_error(current_key, model, e)
                     break
+    def token_count(self, model: str, text: str = None, messages: List[Dict[str, str]] = None) -> int:
+        """
+        Calculates the number of tokens for a given text or list of messages.
+        """
+        if messages:
+            return token_counter(model=model, messages=messages)
+        elif text:
+            return token_counter(model=model, text=text)
+        else:
+            raise ValueError("Either 'text' or 'messages' must be provided.")
+    async def get_available_models(self, provider: str) -> List[str]:
+        """
+        Returns a list of available models for a specific provider, with caching.
+        """
+        if provider in self._model_list_cache:
+            return self._model_list_cache[provider]
+        api_key = self.api_keys.get(provider, [None])[0]
+        if not api_key:
+            return []
+        if provider in self._provider_instances:
+            models = await self._provider_instances[provider].get_models(api_key)
+            self._model_list_cache[provider] = models
+            return models
+        else:
+            logging.warning(f"Model list fetching not implemented for provider: {provider}")
+            return []
+    async def get_all_available_models(self) -> Dict[str, List[str]]:
+        """
+        Returns a dictionary of all available models, grouped by provider.
+        """
+        all_provider_models = {}
+        for provider in self.api_keys.keys():
+            all_provider_models[provider] = await self.get_available_models(provider)
+        return all_provider_models

src/rotator_library/providers/__init__.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import importlib
+import pkgutil
+from typing import Dict, Type
+from .provider_interface import ProviderInterface
+# --- Provider Plugin System ---
+# Dictionary to hold discovered provider classes, mapping provider name to class
+PROVIDER_PLUGINS: Dict[str, Type[ProviderInterface]] = {}
+def _register_providers():
+    """
+    Dynamically discovers and imports provider plugins from this directory.
+    """
+    package_path = __path__
+    package_name = __name__
+    for _, module_name, _ in pkgutil.iter_modules(package_path):
+        # Construct the full module path
+        full_module_path = f"{package_name}.{module_name}"
+        # Import the module
+        module = importlib.import_module(full_module_path)
+        # Look for a class that inherits from ProviderInterface
+        for attribute_name in dir(module):
+            attribute = getattr(module, attribute_name)
+            if isinstance(attribute, type) and issubclass(attribute, ProviderInterface) and attribute is not ProviderInterface:
+                # The provider name is derived from the module name (e.g., 'openai_provider' -> 'openai')
+                provider_name = module_name.replace("_provider", "")
+                PROVIDER_PLUGINS[provider_name] = attribute
+                print(f"Registered provider: {provider_name}")
+# Discover and register providers when the package is imported
+_register_providers()

src/rotator_library/providers/anthropic_provider.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import requests
+import logging
+from typing import List
+from .provider_interface import ProviderInterface
+class AnthropicProvider(ProviderInterface):
+    """
+    Provider implementation for the Anthropic API.
+    """
+    async def get_models(self, api_key: str) -> List[str]:
+        """
+        Fetches the list of available models from the Anthropic API.
+        """
+        try:
+            response = requests.get(
+                "https://api.anthropic.com/v1/models",
+                headers={
+                    "x-api-key": api_key,
+                    "anthropic-version": "2023-06-01"
+                }
+            )
+            response.raise_for_status()
+            return [f"anthropic/{model['id']}" for model in response.json().get("data", [])]
+        except requests.RequestException as e:
+            logging.error(f"Failed to fetch Anthropic models: {e}")
+            return []

src/rotator_library/providers/bedrock_provider.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import logging
+from typing import List
+from .provider_interface import ProviderInterface
+class BedrockProvider(ProviderInterface):
+    """
+    Provider implementation for AWS Bedrock.
+    """
+    async def get_models(self, api_key: str) -> List[str]:
+        """
+        Returns a hardcoded list of common Bedrock models, as there is no
+        simple, unauthenticated API endpoint to list them.
+        """
+        # Note: Listing Bedrock models typically requires AWS credentials and boto3.
+        # For a simple, key-based proxy, we'll list common models.
+        # This can be expanded with full AWS authentication if needed.
+        logging.info("Returning hardcoded list for Bedrock. Full discovery requires AWS auth.")
+        return [
+            "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
+            "bedrock/anthropic.claude-3-haiku-20240307-v1:0",
+            "bedrock/cohere.command-r-plus-v1:0",
+            "bedrock/mistral.mistral-large-2402-v1:0",
+        ]

src/rotator_library/providers/cohere_provider.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import requests
+import logging
+from typing import List
+from .provider_interface import ProviderInterface
+class CohereProvider(ProviderInterface):
+    """
+    Provider implementation for the Cohere API.
+    """
+    async def get_models(self, api_key: str) -> List[str]:
+        """
+        Fetches the list of available models from the Cohere API.
+        """
+        try:
+            response = requests.get(
+                "https://api.cohere.ai/v1/models",
+                headers={"Authorization": f"Bearer {api_key}"}
+            )
+            response.raise_for_status()
+            return [f"cohere/{model['name']}" for model in response.json().get("models", [])]
+        except requests.RequestException as e:
+            logging.error(f"Failed to fetch Cohere models: {e}")
+            return []

src/rotator_library/providers/gemini_provider.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import requests
+import logging
+from typing import List
+from .provider_interface import ProviderInterface
+class GeminiProvider(ProviderInterface):
+    """
+    Provider implementation for the Google Gemini API.
+    """
+    async def get_models(self, api_key: str) -> List[str]:
+        """
+        Fetches the list of available models from the Google Gemini API.
+        """
+        try:
+            response = requests.get(
+                "https://generativelanguage.googleapis.com/v1beta/models",
+                headers={"x-goog-api-key": api_key}
+            )
+            response.raise_for_status()
+            return [f"gemini/{model['name'].replace('models/', '')}" for model in response.json().get("models", [])]
+        except requests.RequestException as e:
+            logging.error(f"Failed to fetch Gemini models: {e}")
+            return []

src/rotator_library/providers/groq_provider.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import requests
+import logging
+from typing import List
+from .provider_interface import ProviderInterface
+class GroqProvider(ProviderInterface):
+    """
+    Provider implementation for the Groq API.
+    """
+    async def get_models(self, api_key: str) -> List[str]:
+        """
+        Fetches the list of available models from the Groq API.
+        """
+        try:
+            response = requests.get(
+                "https://api.groq.com/openai/v1/models",
+                headers={"Authorization": f"Bearer {api_key}"}
+            )
+            response.raise_for_status()
+            return [f"groq/{model['id']}" for model in response.json().get("data", [])]
+        except requests.RequestException as e:
+            logging.error(f"Failed to fetch Groq models: {e}")
+            return []

src/rotator_library/providers/mistral_provider.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import requests
+import logging
+from typing import List
+from .provider_interface import ProviderInterface
+class MistralProvider(ProviderInterface):
+    """
+    Provider implementation for the Mistral API.
+    """
+    async def get_models(self, api_key: str) -> List[str]:
+        """
+        Fetches the list of available models from the Mistral API.
+        """
+        try:
+            response = requests.get(
+                "https://api.mistral.ai/v1/models",
+                headers={"Authorization": f"Bearer {api_key}"}
+            )
+            response.raise_for_status()
+            return [f"mistral/{model['id']}" for model in response.json().get("data", [])]
+        except requests.RequestException as e:
+            logging.error(f"Failed to fetch Mistral models: {e}")
+            return []

src/rotator_library/providers/openai_provider.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import requests
+import logging
+from typing import List
+from .provider_interface import ProviderInterface
+class OpenAIProvider(ProviderInterface):
+    """
+    Provider implementation for the OpenAI API.
+    """
+    async def get_models(self, api_key: str) -> List[str]:
+        """
+        Fetches the list of available models from the OpenAI API.
+        """
+        try:
+            response = requests.get(
+                "https://api.openai.com/v1/models",
+                headers={"Authorization": f"Bearer {api_key}"}
+            )
+            response.raise_for_status()
+            return [f"openai/{model['id']}" for model in response.json().get("data", [])]
+        except requests.RequestException as e:
+            logging.error(f"Failed to fetch OpenAI models: {e}")
+            return []

src/rotator_library/providers/provider_interface.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from abc import ABC, abstractmethod
+from typing import List, Any
+class ProviderInterface(ABC):
+    """
+    An interface for API provider-specific functionality, primarily for discovering
+    available models.
+    """
+    @abstractmethod
+    async def get_models(self, api_key: str) -> List[str]:
+        """
+        Fetches the list of available model names from the provider's API.
+        Args:
+            api_key: The API key required for authentication.
+        Returns:
+            A list of model name strings.
+        """
+        pass

src/rotator_library/usage_manager.py CHANGED Viewed

@@ -4,6 +4,7 @@ import time
 from datetime import date, datetime
 from typing import Dict, List, Optional, Any
 from filelock import FileLock
 class UsageManager:
     """
@@ -42,10 +43,11 @@ class UsageManager:
                 # Add yesterday's daily stats to global stats
                 global_data = data.setdefault("global", {"models": {}})
                 for model, stats in daily_data.get("models", {}).items():
-                    global_model_stats = global_data["models"].setdefault(model, {"success_count": 0, "prompt_tokens": 0, "completion_tokens": 0})
                     global_model_stats["success_count"] += stats.get("success_count", 0)
                     global_model_stats["prompt_tokens"] += stats.get("prompt_tokens", 0)
                     global_model_stats["completion_tokens"] += stats.get("completion_tokens", 0)
                 # Reset daily stats
                 data["daily"] = {"date": today_str, "models": {}}
@@ -82,7 +84,7 @@ class UsageManager:
         return best_key if best_key else active_keys[0]
-    def record_success(self, key: str, model: str, usage: Dict):
         key_data = self.usage_data.setdefault(key, {"daily": {"date": date.today().isoformat(), "models": {}}, "global": {"models": {}}, "cooldown_until": None})
         # Ensure daily stats are for today
@@ -90,12 +92,22 @@ class UsageManager:
             self._reset_daily_stats_if_needed() # Should be rare, but as a safeguard
             key_data = self.usage_data[key]
-        daily_model_data = key_data["daily"]["models"].setdefault(model, {"success_count": 0, "prompt_tokens": 0, "completion_tokens": 0})
         daily_model_data["success_count"] += 1
-        daily_model_data["prompt_tokens"] += usage.get("prompt_tokens", 0)
-        daily_model_data["completion_tokens"] += usage.get("completion_tokens", 0)
         key_data["last_used_ts"] = time.time()
         self._save_usage()

 from datetime import date, datetime
 from typing import Dict, List, Optional, Any
 from filelock import FileLock
+import litellm
 class UsageManager:
     """
                 # Add yesterday's daily stats to global stats
                 global_data = data.setdefault("global", {"models": {}})
                 for model, stats in daily_data.get("models", {}).items():
+                    global_model_stats = global_data["models"].setdefault(model, {"success_count": 0, "prompt_tokens": 0, "completion_tokens": 0, "approx_cost": 0.0})
                     global_model_stats["success_count"] += stats.get("success_count", 0)
                     global_model_stats["prompt_tokens"] += stats.get("prompt_tokens", 0)
                     global_model_stats["completion_tokens"] += stats.get("completion_tokens", 0)
+                    global_model_stats["approx_cost"] += stats.get("approx_cost", 0.0)
                 # Reset daily stats
                 data["daily"] = {"date": today_str, "models": {}}
         return best_key if best_key else active_keys[0]
+    def record_success(self, key: str, model: str, completion_response: litellm.ModelResponse):
         key_data = self.usage_data.setdefault(key, {"daily": {"date": date.today().isoformat(), "models": {}}, "global": {"models": {}}, "cooldown_until": None})
         # Ensure daily stats are for today
             self._reset_daily_stats_if_needed() # Should be rare, but as a safeguard
             key_data = self.usage_data[key]
+        daily_model_data = key_data["daily"]["models"].setdefault(model, {"success_count": 0, "prompt_tokens": 0, "completion_tokens": 0, "approx_cost": 0.0})
+        usage = completion_response.usage
         daily_model_data["success_count"] += 1
+        daily_model_data["prompt_tokens"] += usage.prompt_tokens
+        daily_model_data["completion_tokens"] += usage.completion_tokens
+        # Calculate approximate cost using LiteLLM
+        try:
+            cost = litellm.completion_cost(
+                completion_response=completion_response
+            )
+            daily_model_data["approx_cost"] += cost
+        except Exception as e:
+            print(f"Warning: Could not calculate cost for model {model}: {e}")
         key_data["last_used_ts"] = time.time()
         self._save_usage()