vertex-pay

Paused

App Files Files Community

bibibi12345 commited on May 1, 2025

Commit

f3ac3da

verified ·

1 Parent(s): 48cc290

Update app/main.py

Browse files

Files changed (1) hide show

app/main.py +39 -30

app/main.py CHANGED Viewed

@@ -1552,39 +1552,48 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
         # --- End of specific OpenAI client model handling ---
-        # Check model type and extract base model name (Changed to elif)
-        elif request.model.endswith("-auto"):
              is_auto_model = True
-             is_grounded_search = False
-             is_encrypted_model = False
-        is_encrypted_full_model = request.model.endswith("-encrypt-full")
-        is_nothinking_model = request.model.endswith("-nothinking")
-        is_max_thinking_model = request.model.endswith("-max")
-        if is_auto_model:
-            base_model_name = request.model.replace("-auto", "")
-        elif is_grounded_search:
-            base_model_name = request.model.replace("-search", "")
-        elif is_encrypted_model:
-            base_model_name = request.model.replace("-encrypt", "")
-        elif is_encrypted_full_model:
-            base_model_name = request.model.replace("-encrypt-full", "")
-        elif is_nothinking_model:
-            base_model_name = request.model.replace("-nothinking","")
             # Specific check for the flash model requiring budget
-            if base_model_name != "gemini-2.5-flash-preview-04-17":
-                error_response = create_openai_error_response(
-                    400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
-                )
-                return JSONResponse(status_code=400, content=error_response)
-        elif is_max_thinking_model:
-            base_model_name = request.model.replace("-max","")
             # Specific check for the flash model requiring budget
-            if base_model_name != "gemini-2.5-flash-preview-04-17":
-                error_response = create_openai_error_response(
-                    400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
-                )
-                return JSONResponse(status_code=400, content=error_response)
         else:
             base_model_name = request.model

         # --- End of specific OpenAI client model handling ---
+        # Initialize flags before checking suffixes
+        is_auto_model = False
+        is_grounded_search = False
+        is_encrypted_model = False
+        is_encrypted_full_model = False
+        is_nothinking_model = False
+        is_max_thinking_model = False
+        base_model_name = request.model # Default to the full name
+        # Check model type and extract base model name
+        if request.model.endswith("-auto"):
              is_auto_model = True
+             base_model_name = request.model.replace("-auto", "")
+        elif request.model.endswith("-search"):
+             is_grounded_search = True
+             base_model_name = request.model.replace("-search", "")
+        elif request.model.endswith("-encrypt"):
+             is_encrypted_model = True
+             base_model_name = request.model.replace("-encrypt", "")
+        elif request.model.endswith("-encrypt-full"):
+             is_encrypted_full_model = True
+             base_model_name = request.model.replace("-encrypt-full", "")
+        elif request.model.endswith("-nothinking"):
+             is_nothinking_model = True
+             base_model_name = request.model.replace("-nothinking","")
             # Specific check for the flash model requiring budget
+             # Specific check for the flash model requiring budget
+             if base_model_name != "gemini-2.5-flash-preview-04-17":
+                 error_response = create_openai_error_response(
+                     400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
+                 )
+                 return JSONResponse(status_code=400, content=error_response)
+        elif request.model.endswith("-max"):
+             is_max_thinking_model = True
+             base_model_name = request.model.replace("-max","")
             # Specific check for the flash model requiring budget
+             # Specific check for the flash model requiring budget
+             if base_model_name != "gemini-2.5-flash-preview-04-17":
+                 error_response = create_openai_error_response(
+                     400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
+                 )
+                 return JSONResponse(status_code=400, content=error_response)
         else:
             base_model_name = request.model