Spaces:
Paused
Paused
Commit
·
9fde8ed
1
Parent(s):
93e5da8
added [pay] prefix for preview openai mode. removed model check
Browse files- app/routes/chat_api.py +17 -40
- app/routes/models_api.py +21 -12
app/routes/chat_api.py
CHANGED
|
@@ -36,43 +36,21 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 36 |
credential_manager_instance = fastapi_request.app.state.credential_manager
|
| 37 |
OPENAI_DIRECT_SUFFIX = "-openai"
|
| 38 |
EXPERIMENTAL_MARKER = "-exp-"
|
|
|
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
|
| 42 |
-
#
|
| 43 |
-
# The remote model config should ideally be the source of truth for all valid permutations.
|
| 44 |
-
standard_suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
|
| 45 |
-
# No longer using special_suffix_map, will use prefix check instead
|
| 46 |
-
|
| 47 |
-
all_allowed_model_ids = set(vertex_model_ids) # Start with base models from config
|
| 48 |
-
for base_id in vertex_model_ids: # Iterate over base models to add suffixed versions
|
| 49 |
-
# Apply standard suffixes only if not gemini-2.0
|
| 50 |
-
if not base_id.startswith("gemini-2.0"):
|
| 51 |
-
for suffix in standard_suffixes:
|
| 52 |
-
all_allowed_model_ids.add(f"{base_id}{suffix}")
|
| 53 |
-
|
| 54 |
-
# Apply special suffixes for models starting with "gemini-2.5-flash"
|
| 55 |
-
if base_id.startswith("gemini-2.5-flash"):
|
| 56 |
-
special_flash_suffixes = ["-nothinking", "-max"]
|
| 57 |
-
for special_suffix in special_flash_suffixes:
|
| 58 |
-
all_allowed_model_ids.add(f"{base_id}{special_suffix}")
|
| 59 |
-
|
| 60 |
-
# Add express models to the allowed list as well.
|
| 61 |
-
# These should be full names from the remote config.
|
| 62 |
vertex_express_model_ids = await get_vertex_express_models()
|
| 63 |
-
all_allowed_model_ids.update(vertex_express_model_ids)
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
# Add potential -openai models if they contain -exp-
|
| 67 |
-
potential_openai_direct_models = set()
|
| 68 |
-
for base_id in vertex_model_ids: # vertex_model_ids are base models
|
| 69 |
-
if EXPERIMENTAL_MARKER in base_id:
|
| 70 |
-
potential_openai_direct_models.add(f"{base_id}{OPENAI_DIRECT_SUFFIX}")
|
| 71 |
-
all_allowed_model_ids.update(potential_openai_direct_models)
|
| 72 |
-
if not request.model or request.model not in all_allowed_model_ids:
|
| 73 |
-
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' not found or not supported by this adapter. Valid models are: {sorted(list(all_allowed_model_ids))}", "invalid_request_error"))
|
| 74 |
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
is_auto_model = request.model.endswith("-auto")
|
| 77 |
is_grounded_search = request.model.endswith("-search")
|
| 78 |
is_encrypted_model = request.model.endswith("-encrypt")
|
|
@@ -84,7 +62,11 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 84 |
# Determine base_model_name by stripping known suffixes
|
| 85 |
# This order matters if a model could have multiple (e.g. -encrypt-auto, though not currently a pattern)
|
| 86 |
if is_openai_direct_model:
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
elif is_auto_model: base_model_name = request.model[:-len("-auto")]
|
| 89 |
elif is_grounded_search: base_model_name = request.model[:-len("-search")]
|
| 90 |
elif is_encrypted_full_model: base_model_name = request.model[:-len("-encrypt-full")] # Must be before -encrypt
|
|
@@ -119,11 +101,6 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 119 |
|
| 120 |
if client_to_use is None:
|
| 121 |
print(f"WARNING: All {len(express_api_keys_list)} Vertex Express API key(s) failed to initialize for model {base_model_name}. Falling back.")
|
| 122 |
-
# else:
|
| 123 |
-
# if not express_api_keys_list:
|
| 124 |
-
# print(f"DEBUG: No Vertex Express API keys configured. Skipping Express Mode attempt for model {base_model_name}.")
|
| 125 |
-
# elif base_model_name not in vertex_express_model_ids:
|
| 126 |
-
# print(f"DEBUG: Model {base_model_name} is not in the Vertex Express model list. Skipping Express Mode attempt.")
|
| 127 |
|
| 128 |
if client_to_use is None:
|
| 129 |
rotated_credentials, rotated_project_id = credential_manager_instance.get_random_credentials()
|
|
|
|
| 36 |
credential_manager_instance = fastapi_request.app.state.credential_manager
|
| 37 |
OPENAI_DIRECT_SUFFIX = "-openai"
|
| 38 |
EXPERIMENTAL_MARKER = "-exp-"
|
| 39 |
+
PAY_PREFIX = "[PAY]"
|
| 40 |
|
| 41 |
+
# Model validation based on a predefined list has been removed as per user request.
|
| 42 |
+
# The application will now attempt to use any provided model string.
|
| 43 |
+
# We still need to fetch vertex_express_model_ids for the Express Mode logic.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
vertex_express_model_ids = await get_vertex_express_models()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
+
# Updated logic for is_openai_direct_model
|
| 47 |
+
is_openai_direct_model = False
|
| 48 |
+
if request.model.endswith(OPENAI_DIRECT_SUFFIX):
|
| 49 |
+
temp_name_for_marker_check = request.model[:-len(OPENAI_DIRECT_SUFFIX)]
|
| 50 |
+
if temp_name_for_marker_check.startswith(PAY_PREFIX):
|
| 51 |
+
is_openai_direct_model = True
|
| 52 |
+
elif EXPERIMENTAL_MARKER in temp_name_for_marker_check:
|
| 53 |
+
is_openai_direct_model = True
|
| 54 |
is_auto_model = request.model.endswith("-auto")
|
| 55 |
is_grounded_search = request.model.endswith("-search")
|
| 56 |
is_encrypted_model = request.model.endswith("-encrypt")
|
|
|
|
| 62 |
# Determine base_model_name by stripping known suffixes
|
| 63 |
# This order matters if a model could have multiple (e.g. -encrypt-auto, though not currently a pattern)
|
| 64 |
if is_openai_direct_model:
|
| 65 |
+
temp_base_name = request.model[:-len(OPENAI_DIRECT_SUFFIX)]
|
| 66 |
+
if temp_base_name.startswith(PAY_PREFIX):
|
| 67 |
+
base_model_name = temp_base_name[len(PAY_PREFIX):]
|
| 68 |
+
else:
|
| 69 |
+
base_model_name = temp_base_name
|
| 70 |
elif is_auto_model: base_model_name = request.model[:-len("-auto")]
|
| 71 |
elif is_grounded_search: base_model_name = request.model[:-len("-search")]
|
| 72 |
elif is_encrypted_full_model: base_model_name = request.model[:-len("-encrypt-full")] # Must be before -encrypt
|
|
|
|
| 101 |
|
| 102 |
if client_to_use is None:
|
| 103 |
print(f"WARNING: All {len(express_api_keys_list)} Vertex Express API key(s) failed to initialize for model {base_model_name}. Falling back.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
if client_to_use is None:
|
| 106 |
rotated_credentials, rotated_project_id = credential_manager_instance.get_random_credentials()
|
app/routes/models_api.py
CHANGED
|
@@ -14,6 +14,7 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
|
|
| 14 |
|
| 15 |
OPENAI_DIRECT_SUFFIX = "-openai"
|
| 16 |
EXPERIMENTAL_MARKER = "-exp-"
|
|
|
|
| 17 |
# Access credential_manager from app state
|
| 18 |
credential_manager_instance: CredentialManager = fastapi_request.app.state.credential_manager
|
| 19 |
|
|
@@ -83,19 +84,27 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
|
|
| 83 |
})
|
| 84 |
|
| 85 |
# Ensure uniqueness again after adding suffixes
|
| 86 |
-
# Add OpenAI direct variations
|
| 87 |
if has_sa_creds: # OpenAI direct mode only works with SA credentials
|
| 88 |
-
#
|
| 89 |
-
#
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
# final_models_data_map = {m["id"]: m for m in dynamic_models_data}
|
| 100 |
# model_list = list(final_models_data_map.values())
|
| 101 |
# model_list.sort()
|
|
|
|
| 14 |
|
| 15 |
OPENAI_DIRECT_SUFFIX = "-openai"
|
| 16 |
EXPERIMENTAL_MARKER = "-exp-"
|
| 17 |
+
PAY_PREFIX = "[PAY]"
|
| 18 |
# Access credential_manager from app state
|
| 19 |
credential_manager_instance: CredentialManager = fastapi_request.app.state.credential_manager
|
| 20 |
|
|
|
|
| 84 |
})
|
| 85 |
|
| 86 |
# Ensure uniqueness again after adding suffixes
|
| 87 |
+
# Add OpenAI direct variations if SA creds are available
|
| 88 |
if has_sa_creds: # OpenAI direct mode only works with SA credentials
|
| 89 |
+
# `all_model_ids` contains the comprehensive list of base models that are eligible based on current credentials
|
| 90 |
+
# We iterate through this to determine which ones get an -openai variation.
|
| 91 |
+
# `raw_vertex_models` is used here to ensure we only add -openai suffix to models that are
|
| 92 |
+
# fundamentally Vertex models, not just any model that might appear in `all_model_ids` (e.g. from Express list exclusively)
|
| 93 |
+
# if express only key is provided.
|
| 94 |
+
# We iterate through the base models from the main Vertex list.
|
| 95 |
+
for base_model_id_for_openai in raw_vertex_models: # Iterate through original list of GAIA/Vertex base models
|
| 96 |
+
display_model_id = ""
|
| 97 |
+
if EXPERIMENTAL_MARKER in base_model_id_for_openai:
|
| 98 |
+
display_model_id = f"{base_model_id_for_openai}{OPENAI_DIRECT_SUFFIX}"
|
| 99 |
+
else:
|
| 100 |
+
display_model_id = f"{PAY_PREFIX}{base_model_id_for_openai}{OPENAI_DIRECT_SUFFIX}"
|
| 101 |
+
|
| 102 |
+
# Check if already added (e.g. if remote config somehow already listed it or added as a base model)
|
| 103 |
+
if display_model_id and not any(m['id'] == display_model_id for m in dynamic_models_data):
|
| 104 |
+
dynamic_models_data.append({
|
| 105 |
+
"id": display_model_id, "object": "model", "created": current_time, "owned_by": "google",
|
| 106 |
+
"permission": [], "root": base_model_id_for_openai, "parent": None
|
| 107 |
+
})
|
| 108 |
# final_models_data_map = {m["id"]: m for m in dynamic_models_data}
|
| 109 |
# model_list = list(final_models_data_map.values())
|
| 110 |
# model_list.sort()
|