Spaces:
Paused
Paused
Commit
·
2a81a94
1
Parent(s):
9fde8ed
smart pay prefix
Browse files- app/routes/chat_api.py +7 -5
- app/routes/models_api.py +26 -14
app/routes/chat_api.py
CHANGED
|
@@ -62,11 +62,8 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 62 |
# Determine base_model_name by stripping known suffixes
|
| 63 |
# This order matters if a model could have multiple (e.g. -encrypt-auto, though not currently a pattern)
|
| 64 |
if is_openai_direct_model:
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
base_model_name = temp_base_name[len(PAY_PREFIX):]
|
| 68 |
-
else:
|
| 69 |
-
base_model_name = temp_base_name
|
| 70 |
elif is_auto_model: base_model_name = request.model[:-len("-auto")]
|
| 71 |
elif is_grounded_search: base_model_name = request.model[:-len("-search")]
|
| 72 |
elif is_encrypted_full_model: base_model_name = request.model[:-len("-encrypt-full")] # Must be before -encrypt
|
|
@@ -74,6 +71,11 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 74 |
elif is_nothinking_model: base_model_name = request.model[:-len("-nothinking")]
|
| 75 |
elif is_max_thinking_model: base_model_name = request.model[:-len("-max")]
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
# Specific model variant checks (if any remain exclusive and not covered dynamically)
|
| 78 |
if is_nothinking_model and base_model_name != "gemini-2.5-flash-preview-04-17":
|
| 79 |
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for 'gemini-2.5-flash-preview-04-17'.", "invalid_request_error"))
|
|
|
|
| 62 |
# Determine base_model_name by stripping known suffixes
|
| 63 |
# This order matters if a model could have multiple (e.g. -encrypt-auto, though not currently a pattern)
|
| 64 |
if is_openai_direct_model:
|
| 65 |
+
# The general PAY_PREFIX stripper later will handle if this result starts with [PAY]
|
| 66 |
+
base_model_name = request.model[:-len(OPENAI_DIRECT_SUFFIX)]
|
|
|
|
|
|
|
|
|
|
| 67 |
elif is_auto_model: base_model_name = request.model[:-len("-auto")]
|
| 68 |
elif is_grounded_search: base_model_name = request.model[:-len("-search")]
|
| 69 |
elif is_encrypted_full_model: base_model_name = request.model[:-len("-encrypt-full")] # Must be before -encrypt
|
|
|
|
| 71 |
elif is_nothinking_model: base_model_name = request.model[:-len("-nothinking")]
|
| 72 |
elif is_max_thinking_model: base_model_name = request.model[:-len("-max")]
|
| 73 |
|
| 74 |
+
# After all suffix stripping, if PAY_PREFIX is still at the start of base_model_name, remove it.
|
| 75 |
+
# This handles cases like "[PAY]model-id-search" correctly.
|
| 76 |
+
if base_model_name.startswith(PAY_PREFIX):
|
| 77 |
+
base_model_name = base_model_name[len(PAY_PREFIX):]
|
| 78 |
+
|
| 79 |
# Specific model variant checks (if any remain exclusive and not covered dynamically)
|
| 80 |
if is_nothinking_model and base_model_name != "gemini-2.5-flash-preview-04-17":
|
| 81 |
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for 'gemini-2.5-flash-preview-04-17'.", "invalid_request_error"))
|
app/routes/models_api.py
CHANGED
|
@@ -54,33 +54,45 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
|
|
| 54 |
current_time = int(time.time())
|
| 55 |
|
| 56 |
# Add base models and their variations
|
| 57 |
-
for
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
dynamic_models_data.append({
|
| 59 |
-
"id":
|
| 60 |
-
"permission": [], "root":
|
| 61 |
})
|
| 62 |
|
| 63 |
# Conditionally add common variations (standard suffixes)
|
| 64 |
-
if not
|
| 65 |
standard_suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
|
| 66 |
for suffix in standard_suffixes:
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
dynamic_models_data.append({
|
| 71 |
-
"id":
|
| 72 |
-
"permission": [], "root":
|
| 73 |
})
|
| 74 |
|
| 75 |
# Apply special suffixes for models starting with "gemini-2.5-flash"
|
| 76 |
-
if
|
| 77 |
special_flash_suffixes = ["-nothinking", "-max"]
|
| 78 |
for special_suffix in special_flash_suffixes:
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
| 81 |
dynamic_models_data.append({
|
| 82 |
-
"id":
|
| 83 |
-
"permission": [], "root":
|
| 84 |
})
|
| 85 |
|
| 86 |
# Ensure uniqueness again after adding suffixes
|
|
|
|
| 54 |
current_time = int(time.time())
|
| 55 |
|
| 56 |
# Add base models and their variations
|
| 57 |
+
for original_model_id in sorted(list(all_model_ids)):
|
| 58 |
+
current_display_prefix = ""
|
| 59 |
+
if has_sa_creds and not has_express_key and EXPERIMENTAL_MARKER not in original_model_id:
|
| 60 |
+
current_display_prefix = PAY_PREFIX
|
| 61 |
+
|
| 62 |
+
base_display_id = f"{current_display_prefix}{original_model_id}"
|
| 63 |
+
|
| 64 |
dynamic_models_data.append({
|
| 65 |
+
"id": base_display_id, "object": "model", "created": current_time, "owned_by": "google",
|
| 66 |
+
"permission": [], "root": original_model_id, "parent": None
|
| 67 |
})
|
| 68 |
|
| 69 |
# Conditionally add common variations (standard suffixes)
|
| 70 |
+
if not original_model_id.startswith("gemini-2.0"): # Suffix rules based on original_model_id
|
| 71 |
standard_suffixes = ["-search", "-encrypt", "-encrypt-full", "-auto"]
|
| 72 |
for suffix in standard_suffixes:
|
| 73 |
+
# Suffix is applied to the original model ID part
|
| 74 |
+
suffixed_model_part = f"{original_model_id}{suffix}"
|
| 75 |
+
# Then the whole thing is prefixed
|
| 76 |
+
final_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
|
| 77 |
+
|
| 78 |
+
# Check if this suffixed ID is already in all_model_ids (unlikely with prefix) or already added
|
| 79 |
+
if final_suffixed_display_id not in all_model_ids and not any(m['id'] == final_suffixed_display_id for m in dynamic_models_data):
|
| 80 |
dynamic_models_data.append({
|
| 81 |
+
"id": final_suffixed_display_id, "object": "model", "created": current_time, "owned_by": "google",
|
| 82 |
+
"permission": [], "root": original_model_id, "parent": None
|
| 83 |
})
|
| 84 |
|
| 85 |
# Apply special suffixes for models starting with "gemini-2.5-flash"
|
| 86 |
+
if original_model_id.startswith("gemini-2.5-flash"): # Suffix rules based on original_model_id
|
| 87 |
special_flash_suffixes = ["-nothinking", "-max"]
|
| 88 |
for special_suffix in special_flash_suffixes:
|
| 89 |
+
suffixed_model_part = f"{original_model_id}{special_suffix}"
|
| 90 |
+
final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
|
| 91 |
+
|
| 92 |
+
if final_special_suffixed_display_id not in all_model_ids and not any(m['id'] == final_special_suffixed_display_id for m in dynamic_models_data):
|
| 93 |
dynamic_models_data.append({
|
| 94 |
+
"id": final_special_suffixed_display_id, "object": "model", "created": current_time, "owned_by": "google",
|
| 95 |
+
"permission": [], "root": original_model_id, "parent": None
|
| 96 |
})
|
| 97 |
|
| 98 |
# Ensure uniqueness again after adding suffixes
|