Spaces:
Paused
Paused
Commit
·
61a24c9
1
Parent(s):
df1784a
added 0605 thinking config support
Browse files- app/routes/chat_api.py +12 -6
- app/routes/models_api.py +4 -4
- vertexModels.json +3 -1
app/routes/chat_api.py
CHANGED
|
@@ -87,10 +87,10 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 87 |
elif is_max_thinking_model: base_model_name = base_model_name[:-len("-max")]
|
| 88 |
|
| 89 |
# Specific model variant checks (if any remain exclusive and not covered dynamically)
|
| 90 |
-
if is_nothinking_model and not base_model_name.startswith("gemini-2.5-flash"):
|
| 91 |
-
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for models starting with 'gemini-2.5-flash'.", "invalid_request_error"))
|
| 92 |
-
if is_max_thinking_model and not base_model_name.startswith("gemini-2.5-flash"):
|
| 93 |
-
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-max) is only supported for models starting with 'gemini-2.5-flash'.", "invalid_request_error"))
|
| 94 |
|
| 95 |
generation_config = create_generation_config(request)
|
| 96 |
|
|
@@ -213,9 +213,15 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 213 |
generation_config["system_instruction"] = ENCRYPTION_INSTRUCTIONS
|
| 214 |
current_prompt_func = create_encrypted_full_gemini_prompt
|
| 215 |
elif is_nothinking_model:
|
| 216 |
-
|
|
|
|
|
|
|
|
|
|
| 217 |
elif is_max_thinking_model:
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
# For non-auto models, the 'base_model_name' might have suffix stripped.
|
| 221 |
# We should use the original 'request.model' for API call if it's a suffixed one,
|
|
|
|
| 87 |
elif is_max_thinking_model: base_model_name = base_model_name[:-len("-max")]
|
| 88 |
|
| 89 |
# Specific model variant checks (if any remain exclusive and not covered dynamically)
|
| 90 |
+
if is_nothinking_model and not (base_model_name.startswith("gemini-2.5-flash") or base_model_name == "gemini-2.5-pro-preview-06-05"):
|
| 91 |
+
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for models starting with 'gemini-2.5-flash' or 'gemini-2.5-pro-preview-06-05'.", "invalid_request_error"))
|
| 92 |
+
if is_max_thinking_model and not (base_model_name.startswith("gemini-2.5-flash") or base_model_name == "gemini-2.5-pro-preview-06-05"):
|
| 93 |
+
return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-max) is only supported for models starting with 'gemini-2.5-flash' or 'gemini-2.5-pro-preview-06-05'.", "invalid_request_error"))
|
| 94 |
|
| 95 |
generation_config = create_generation_config(request)
|
| 96 |
|
|
|
|
| 213 |
generation_config["system_instruction"] = ENCRYPTION_INSTRUCTIONS
|
| 214 |
current_prompt_func = create_encrypted_full_gemini_prompt
|
| 215 |
elif is_nothinking_model:
|
| 216 |
+
if base_model_name == "gemini-2.5-pro-preview-06-05":
|
| 217 |
+
generation_config["thinking_config"] = {"thinking_budget": 128}
|
| 218 |
+
else:
|
| 219 |
+
generation_config["thinking_config"] = {"thinking_budget": 0}
|
| 220 |
elif is_max_thinking_model:
|
| 221 |
+
if base_model_name == "gemini-2.5-pro-preview-06-05":
|
| 222 |
+
generation_config["thinking_config"] = {"thinking_budget": 32768}
|
| 223 |
+
else:
|
| 224 |
+
generation_config["thinking_config"] = {"thinking_budget": 24576}
|
| 225 |
|
| 226 |
# For non-auto models, the 'base_model_name' might have suffix stripped.
|
| 227 |
# We should use the original 'request.model' for API call if it's a suffixed one,
|
app/routes/models_api.py
CHANGED
|
@@ -90,10 +90,10 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
|
|
| 90 |
"permission": [], "root": original_model_id, "parent": None
|
| 91 |
})
|
| 92 |
|
| 93 |
-
# Apply special suffixes for models starting with "gemini-2.5-flash"
|
| 94 |
-
if "gemini-2.5-flash" in original_model_id: # Suffix rules based on original_model_id
|
| 95 |
-
|
| 96 |
-
for special_suffix in
|
| 97 |
suffixed_model_part = f"{original_model_id}{special_suffix}"
|
| 98 |
final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
|
| 99 |
|
|
|
|
| 90 |
"permission": [], "root": original_model_id, "parent": None
|
| 91 |
})
|
| 92 |
|
| 93 |
+
# Apply special suffixes for models starting with "gemini-2.5-flash" or specifically "gemini-2.5-pro-preview-06-05"
|
| 94 |
+
if "gemini-2.5-flash" in original_model_id or original_model_id == "gemini-2.5-pro-preview-06-05": # Suffix rules based on original_model_id
|
| 95 |
+
special_thinking_suffixes = ["-nothinking", "-max"]
|
| 96 |
+
for special_suffix in special_thinking_suffixes:
|
| 97 |
suffixed_model_part = f"{original_model_id}{special_suffix}"
|
| 98 |
final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"
|
| 99 |
|
vertexModels.json
CHANGED
|
@@ -3,6 +3,7 @@
|
|
| 3 |
"gemini-2.5-pro-exp-03-25",
|
| 4 |
"gemini-2.5-pro-preview-03-25",
|
| 5 |
"gemini-2.5-pro-preview-05-06",
|
|
|
|
| 6 |
"gemini-2.5-flash-preview-05-20",
|
| 7 |
"gemini-2.5-flash-preview-04-17",
|
| 8 |
"gemini-2.0-flash-001",
|
|
@@ -14,6 +15,7 @@
|
|
| 14 |
"gemini-2.5-pro-preview-03-25",
|
| 15 |
"gemini-2.5-flash-preview-04-17",
|
| 16 |
"gemini-2.5-flash-preview-05-20",
|
| 17 |
-
"gemini-2.5-pro-preview-05-06"
|
|
|
|
| 18 |
]
|
| 19 |
}
|
|
|
|
| 3 |
"gemini-2.5-pro-exp-03-25",
|
| 4 |
"gemini-2.5-pro-preview-03-25",
|
| 5 |
"gemini-2.5-pro-preview-05-06",
|
| 6 |
+
"gemini-2.5-pro-preview-06-05",
|
| 7 |
"gemini-2.5-flash-preview-05-20",
|
| 8 |
"gemini-2.5-flash-preview-04-17",
|
| 9 |
"gemini-2.0-flash-001",
|
|
|
|
| 15 |
"gemini-2.5-pro-preview-03-25",
|
| 16 |
"gemini-2.5-flash-preview-04-17",
|
| 17 |
"gemini-2.5-flash-preview-05-20",
|
| 18 |
+
"gemini-2.5-pro-preview-05-06",
|
| 19 |
+
"gemini-2.5-pro-preview-06-05"
|
| 20 |
]
|
| 21 |
}
|