Spaces:

elmerzole
/

llm-api-proxy

Paused

Mirrowel commited on Oct 3, 2025

Commit

3b4c51c

1 Parent(s): 2ed5bb6

refactor(auth): generalize credential handling and improve model matching

Refactors the RotatingClient and provider interfaces to use the generic 'credential' term instead of 'api_key'. This ensures consistency when handling non-key credentials, such as OAuth file paths.

Specific changes include:
- Renaming internal property references from 'api_keys' to 'all_credentials'.
- Updating provider interface method signatures (`get_models`) from `api_key` to `credential`.
- Enhancing model matching (whitelist/blacklist) to correctly match patterns against both the full proxy ID and the provider's native model name (e.g., allowing wildcards to match 'gemma-7b' when the full ID is 'google/gemma-7b').

Files changed (3) hide show

src/rotator_library/client.py +58 -29
src/rotator_library/providers/gemini_cli_provider.py +8 -7
src/rotator_library/providers/qwen_code_provider.py +1 -1

src/rotator_library/client.py CHANGED Viewed

@@ -97,42 +97,62 @@ class RotatingClient:
     def _is_model_ignored(self, provider: str, model_id: str) -> bool:
         """
         Checks if a model should be ignored based on the ignore list.
-        Supports exact and partial matching.
         """
-        if provider not in self.ignore_models:
             return False
-        ignore_list = self.ignore_models[provider]
         if ignore_list == ['*']:
             return True
-        for ignored_model in ignore_list:
-            if ignored_model.endswith('*'):
-                # Partial match
-                if ignored_model[:-1] in model_id:
                     return True
             else:
-                # Exact match (ignoring provider prefix)
-                if model_id.endswith(ignored_model):
                     return True
         return False
     def _is_model_whitelisted(self, provider: str, model_id: str) -> bool:
         """
         Checks if a model is explicitly whitelisted.
-        Supports exact and partial matching.
         """
-        if provider not in self.whitelist_models:
             return False
-        whitelist = self.whitelist_models[provider]
-        for whitelisted_model in whitelist:
-            if whitelisted_model == '*':
                 return True
-            if whitelisted_model.endswith('*'):
-                if whitelisted_model[:-1] in model_id:
                     return True
             else:
-                if model_id.endswith(whitelisted_model):
                     return True
         return False
@@ -918,21 +938,27 @@ class RotatingClient:
             lib_logger.debug(f"Returning cached models for provider: {provider}")
             return self._model_list_cache[provider]
-        keys_for_provider = self.api_keys.get(provider)
-        if not keys_for_provider:
-            lib_logger.warning(f"No API key for provider: {provider}")
             return []
-        # Create a copy and shuffle it to randomize the starting key
-        shuffled_keys = list(keys_for_provider)
-        random.shuffle(shuffled_keys)
         provider_instance = self._get_provider_instance(provider)
         if provider_instance:
-            for api_key in shuffled_keys:
                 try:
-                    lib_logger.debug(f"Attempting to get models for {provider} with credential ...{api_key[-6:]}")
-                    models = await provider_instance.get_models(api_key, self.http_client)
                     lib_logger.info(f"Got {len(models)} models for provider: {provider}")
                     # Whitelist and blacklist logic
@@ -955,7 +981,8 @@ class RotatingClient:
                     return final_models
                 except Exception as e:
                     classified_error = classify_error(e)
-                    lib_logger.debug(f"Failed to get models for provider {provider} with credential ...{api_key[-6:]}: {classified_error.error_type}. Trying next credential.")
                     continue # Try the next credential
         lib_logger.error(f"Failed to get models for provider {provider} after trying all credentials.")
@@ -964,11 +991,13 @@ class RotatingClient:
     async def get_all_available_models(self, grouped: bool = True) -> Union[Dict[str, List[str]], List[str]]:
         """Returns a list of all available models, either grouped by provider or as a flat list."""
         lib_logger.info("Getting all available models...")
-        tasks = [self.get_available_models(provider) for provider in self.api_keys.keys()]
         results = await asyncio.gather(*tasks, return_exceptions=True)
         all_provider_models = {}
-        for provider, result in zip(self.api_keys.keys(), results):
             if isinstance(result, Exception):
                 lib_logger.error(f"Failed to get models for provider {provider}: {result}")
                 all_provider_models[provider] = []

     def _is_model_ignored(self, provider: str, model_id: str) -> bool:
         """
         Checks if a model should be ignored based on the ignore list.
+        Supports exact and partial matching for both full model IDs and model names.
         """
+        model_provider = model_id.split('/')[0]
+        if model_provider not in self.ignore_models:
             return False
+        ignore_list = self.ignore_models[model_provider]
         if ignore_list == ['*']:
             return True
+        try:
+            # This is the model name as the provider sees it (e.g., "gpt-4" or "google/gemma-7b")
+            provider_model_name = model_id.split('/', 1)[1]
+        except IndexError:
+            provider_model_name = model_id
+        for ignored_pattern in ignore_list:
+            if ignored_pattern.endswith('*'):
+                match_pattern = ignored_pattern[:-1]
+                # Match wildcard against the provider's model name
+                if provider_model_name.startswith(match_pattern):
                     return True
             else:
+                # Exact match against the full proxy ID OR the provider's model name
+                if model_id == ignored_pattern or provider_model_name == ignored_pattern:
                     return True
         return False
     def _is_model_whitelisted(self, provider: str, model_id: str) -> bool:
         """
         Checks if a model is explicitly whitelisted.
+        Supports exact and partial matching for both full model IDs and model names.
         """
+        model_provider = model_id.split('/')[0]
+        if model_provider not in self.whitelist_models:
             return False
+        whitelist = self.whitelist_models[model_provider]
+        for whitelisted_pattern in whitelist:
+            if whitelisted_pattern == '*':
                 return True
+            try:
+                # This is the model name as the provider sees it (e.g., "gpt-4" or "google/gemma-7b")
+                provider_model_name = model_id.split('/', 1)[1]
+            except IndexError:
+                provider_model_name = model_id
+            if whitelisted_pattern.endswith('*'):
+                match_pattern = whitelisted_pattern[:-1]
+                # Match wildcard against the provider's model name
+                if provider_model_name.startswith(match_pattern):
                     return True
             else:
+                # Exact match against the full proxy ID OR the provider's model name
+                if model_id == whitelisted_pattern or provider_model_name == whitelisted_pattern:
                     return True
         return False
             lib_logger.debug(f"Returning cached models for provider: {provider}")
             return self._model_list_cache[provider]
+        credentials_for_provider = self.all_credentials.get(provider)
+        if not credentials_for_provider:
+            lib_logger.warning(f"No credentials for provider: {provider}")
             return []
+        # Create a copy and shuffle it to randomize the starting credential
+        shuffled_credentials = list(credentials_for_provider)
+        random.shuffle(shuffled_credentials)
         provider_instance = self._get_provider_instance(provider)
         if provider_instance:
+            # For providers with hardcoded models (like gemini_cli), we only need to call once.
+            # For others, we might need to try multiple keys if one is invalid.
+            # The current logic of iterating works for both, as the credential is not
+            # always used in get_models.
+            for credential in shuffled_credentials:
                 try:
+                    # Display last 6 chars for API keys, or the filename for OAuth paths
+                    cred_display = credential[-6:] if not os.path.isfile(credential) else os.path.basename(credential)
+                    lib_logger.debug(f"Attempting to get models for {provider} with credential ...{cred_display}")
+                    models = await provider_instance.get_models(credential, self.http_client)
                     lib_logger.info(f"Got {len(models)} models for provider: {provider}")
                     # Whitelist and blacklist logic
                     return final_models
                 except Exception as e:
                     classified_error = classify_error(e)
+                    cred_display = credential[-6:] if not os.path.isfile(credential) else os.path.basename(credential)
+                    lib_logger.debug(f"Failed to get models for provider {provider} with credential ...{cred_display}: {classified_error.error_type}. Trying next credential.")
                     continue # Try the next credential
         lib_logger.error(f"Failed to get models for provider {provider} after trying all credentials.")
     async def get_all_available_models(self, grouped: bool = True) -> Union[Dict[str, List[str]], List[str]]:
         """Returns a list of all available models, either grouped by provider or as a flat list."""
         lib_logger.info("Getting all available models...")
+        all_providers = list(self.all_credentials.keys())
+        tasks = [self.get_available_models(provider) for provider in all_providers]
         results = await asyncio.gather(*tasks, return_exceptions=True)
         all_provider_models = {}
+        for provider, result in zip(all_providers, results):
             if isinstance(result, Exception):
                 lib_logger.error(f"Failed to get models for provider {provider}: {result}")
                 all_provider_models[provider] = []

src/rotator_library/providers/gemini_cli_provider.py CHANGED Viewed

@@ -15,6 +15,13 @@ lib_logger = logging.getLogger('rotator_library')
 CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com/v1internal"
 class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
     def __init__(self):
         super().__init__()
@@ -197,14 +204,8 @@ class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
             chunks = [chunk async for chunk in response_gen]
             return litellm.utils.stream_to_completion_response(chunks)
-    # [NEW] Hardcoded model list based on Kilo example
-    HARDCODED_MODELS = [
-        "gemini-2.5-pro",
-        "gemini-2.5-flash",
-        "gemini-2.5-flash-lite"
-    ]
     # Use the shared GeminiAuthBase for auth logic
     # get_models is not applicable for this custom provider
-    async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """Returns a hardcoded list of known compatible Gemini CLI models."""
         return [f"gemini_cli/{model_id}" for model_id in HARDCODED_MODELS]

 CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com/v1internal"
+# [NEW] Hardcoded model list based on Kilo example
+HARDCODED_MODELS = [
+    "gemini-2.5-pro",
+    "gemini-2.5-flash",
+    "gemini-2.5-flash-lite"
+]
 class GeminiCliProvider(GeminiAuthBase, ProviderInterface):
     def __init__(self):
         super().__init__()
             chunks = [chunk async for chunk in response_gen]
             return litellm.utils.stream_to_completion_response(chunks)
     # Use the shared GeminiAuthBase for auth logic
     # get_models is not applicable for this custom provider
+    async def get_models(self, credential: str, client: httpx.AsyncClient) -> List[str]:
         """Returns a hardcoded list of known compatible Gemini CLI models."""
         return [f"gemini_cli/{model_id}" for model_id in HARDCODED_MODELS]

src/rotator_library/providers/qwen_code_provider.py CHANGED Viewed

@@ -21,7 +21,7 @@ class QwenCodeProvider(QwenAuthBase, ProviderInterface):
         return True # We use custom logic to handle 401 retries and stream parsing
     # [NEW] get_models implementation
-    async def get_models(self, api_key: str, client: httpx.AsyncClient) -> List[str]:
         """Returns a hardcoded list of known compatible Qwen models for the OpenAI-compatible API."""
         return [f"qwen_code/{model_id}" for model_id in HARDCODED_MODELS]

         return True # We use custom logic to handle 401 retries and stream parsing
     # [NEW] get_models implementation
+    async def get_models(self, credential: str, client: httpx.AsyncClient) -> List[str]:
         """Returns a hardcoded list of known compatible Qwen models for the OpenAI-compatible API."""
         return [f"qwen_code/{model_id}" for model_id in HARDCODED_MODELS]