vertex-pay

Paused

App Files Files Community

bibibi12345 commited on Jun 6, 2025

Commit

61a24c9

1 Parent(s): df1784a

added 0605 thinking config support

Browse files

Files changed (3) hide show

app/routes/chat_api.py +12 -6
app/routes/models_api.py +4 -4
vertexModels.json +3 -1

app/routes/chat_api.py CHANGED Viewed

@@ -87,10 +87,10 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
         elif is_max_thinking_model: base_model_name = base_model_name[:-len("-max")]
         # Specific model variant checks (if any remain exclusive and not covered dynamically)
-        if is_nothinking_model and not base_model_name.startswith("gemini-2.5-flash"):
-            return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for models starting with 'gemini-2.5-flash'.", "invalid_request_error"))
-        if is_max_thinking_model and not base_model_name.startswith("gemini-2.5-flash"):
-            return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-max) is only supported for models starting with 'gemini-2.5-flash'.", "invalid_request_error"))
         generation_config = create_generation_config(request)
@@ -213,9 +213,15 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
                 generation_config["system_instruction"] = ENCRYPTION_INSTRUCTIONS
                 current_prompt_func = create_encrypted_full_gemini_prompt
             elif is_nothinking_model:
-                generation_config["thinking_config"] = {"thinking_budget": 0}
             elif is_max_thinking_model:
-                generation_config["thinking_config"] = {"thinking_budget": 24576}
             # For non-auto models, the 'base_model_name' might have suffix stripped.
             # We should use the original 'request.model' for API call if it's a suffixed one,

         elif is_max_thinking_model: base_model_name = base_model_name[:-len("-max")]
         # Specific model variant checks (if any remain exclusive and not covered dynamically)
+        if is_nothinking_model and not (base_model_name.startswith("gemini-2.5-flash") or base_model_name == "gemini-2.5-pro-preview-06-05"):
+            return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-nothinking) is only supported for models starting with 'gemini-2.5-flash' or 'gemini-2.5-pro-preview-06-05'.", "invalid_request_error"))
+        if is_max_thinking_model and not (base_model_name.startswith("gemini-2.5-flash") or base_model_name == "gemini-2.5-pro-preview-06-05"):
+            return JSONResponse(status_code=400, content=create_openai_error_response(400, f"Model '{request.model}' (-max) is only supported for models starting with 'gemini-2.5-flash' or 'gemini-2.5-pro-preview-06-05'.", "invalid_request_error"))
         generation_config = create_generation_config(request)
                 generation_config["system_instruction"] = ENCRYPTION_INSTRUCTIONS
                 current_prompt_func = create_encrypted_full_gemini_prompt
             elif is_nothinking_model:
+                if base_model_name == "gemini-2.5-pro-preview-06-05":
+                    generation_config["thinking_config"] = {"thinking_budget": 128}
+                else:
+                    generation_config["thinking_config"] = {"thinking_budget": 0}
             elif is_max_thinking_model:
+                if base_model_name == "gemini-2.5-pro-preview-06-05":
+                    generation_config["thinking_config"] = {"thinking_budget": 32768}
+                else:
+                    generation_config["thinking_config"] = {"thinking_budget": 24576}
             # For non-auto models, the 'base_model_name' might have suffix stripped.
             # We should use the original 'request.model' for API call if it's a suffixed one,

app/routes/models_api.py CHANGED Viewed

@@ -90,10 +90,10 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
                         "permission": [], "root": original_model_id, "parent": None
                     })
-        # Apply special suffixes for models starting with "gemini-2.5-flash"
-        if "gemini-2.5-flash" in original_model_id: # Suffix rules based on original_model_id
-            special_flash_suffixes = ["-nothinking", "-max"]
-            for special_suffix in special_flash_suffixes:
                 suffixed_model_part = f"{original_model_id}{special_suffix}"
                 final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"

                         "permission": [], "root": original_model_id, "parent": None
                     })
+        # Apply special suffixes for models starting with "gemini-2.5-flash" or specifically "gemini-2.5-pro-preview-06-05"
+        if "gemini-2.5-flash" in original_model_id or original_model_id == "gemini-2.5-pro-preview-06-05": # Suffix rules based on original_model_id
+            special_thinking_suffixes = ["-nothinking", "-max"]
+            for special_suffix in special_thinking_suffixes:
                 suffixed_model_part = f"{original_model_id}{special_suffix}"
                 final_special_suffixed_display_id = f"{current_display_prefix}{suffixed_model_part}"

vertexModels.json CHANGED Viewed

@@ -3,6 +3,7 @@
     "gemini-2.5-pro-exp-03-25",
     "gemini-2.5-pro-preview-03-25",
     "gemini-2.5-pro-preview-05-06",
     "gemini-2.5-flash-preview-05-20",
     "gemini-2.5-flash-preview-04-17",
     "gemini-2.0-flash-001",
@@ -14,6 +15,7 @@
     "gemini-2.5-pro-preview-03-25",
     "gemini-2.5-flash-preview-04-17",
     "gemini-2.5-flash-preview-05-20",
-    "gemini-2.5-pro-preview-05-06"
   ]
 }

     "gemini-2.5-pro-exp-03-25",
     "gemini-2.5-pro-preview-03-25",
     "gemini-2.5-pro-preview-05-06",
+    "gemini-2.5-pro-preview-06-05",
     "gemini-2.5-flash-preview-05-20",
     "gemini-2.5-flash-preview-04-17",
     "gemini-2.0-flash-001",
     "gemini-2.5-pro-preview-03-25",
     "gemini-2.5-flash-preview-04-17",
     "gemini-2.5-flash-preview-05-20",
+    "gemini-2.5-pro-preview-05-06",
+    "gemini-2.5-pro-preview-06-05"
   ]
 }