Spaces:
Paused
Paused
Update app/main.py
Browse files- app/main.py +39 -30
app/main.py
CHANGED
|
@@ -1552,39 +1552,48 @@ async def chat_completions(request: OpenAIRequest, api_key: str = Depends(get_ap
|
|
| 1552 |
|
| 1553 |
# --- End of specific OpenAI client model handling ---
|
| 1554 |
|
| 1555 |
-
#
|
| 1556 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1557 |
is_auto_model = True
|
| 1558 |
-
|
| 1559 |
-
|
| 1560 |
-
|
| 1561 |
-
|
| 1562 |
-
|
| 1563 |
-
|
| 1564 |
-
|
| 1565 |
-
|
| 1566 |
-
|
| 1567 |
-
|
| 1568 |
-
elif
|
| 1569 |
-
|
| 1570 |
-
|
| 1571 |
-
base_model_name = request.model.replace("-encrypt-full", "")
|
| 1572 |
-
elif is_nothinking_model:
|
| 1573 |
-
base_model_name = request.model.replace("-nothinking","")
|
| 1574 |
# Specific check for the flash model requiring budget
|
| 1575 |
-
|
| 1576 |
-
|
| 1577 |
-
|
| 1578 |
-
|
| 1579 |
-
|
| 1580 |
-
|
| 1581 |
-
|
|
|
|
|
|
|
| 1582 |
# Specific check for the flash model requiring budget
|
| 1583 |
-
|
| 1584 |
-
|
| 1585 |
-
|
| 1586 |
-
|
| 1587 |
-
|
|
|
|
| 1588 |
else:
|
| 1589 |
base_model_name = request.model
|
| 1590 |
|
|
|
|
| 1552 |
|
| 1553 |
# --- End of specific OpenAI client model handling ---
|
| 1554 |
|
| 1555 |
+
# Initialize flags before checking suffixes
|
| 1556 |
+
is_auto_model = False
|
| 1557 |
+
is_grounded_search = False
|
| 1558 |
+
is_encrypted_model = False
|
| 1559 |
+
is_encrypted_full_model = False
|
| 1560 |
+
is_nothinking_model = False
|
| 1561 |
+
is_max_thinking_model = False
|
| 1562 |
+
base_model_name = request.model # Default to the full name
|
| 1563 |
+
|
| 1564 |
+
# Check model type and extract base model name
|
| 1565 |
+
if request.model.endswith("-auto"):
|
| 1566 |
is_auto_model = True
|
| 1567 |
+
base_model_name = request.model.replace("-auto", "")
|
| 1568 |
+
elif request.model.endswith("-search"):
|
| 1569 |
+
is_grounded_search = True
|
| 1570 |
+
base_model_name = request.model.replace("-search", "")
|
| 1571 |
+
elif request.model.endswith("-encrypt"):
|
| 1572 |
+
is_encrypted_model = True
|
| 1573 |
+
base_model_name = request.model.replace("-encrypt", "")
|
| 1574 |
+
elif request.model.endswith("-encrypt-full"):
|
| 1575 |
+
is_encrypted_full_model = True
|
| 1576 |
+
base_model_name = request.model.replace("-encrypt-full", "")
|
| 1577 |
+
elif request.model.endswith("-nothinking"):
|
| 1578 |
+
is_nothinking_model = True
|
| 1579 |
+
base_model_name = request.model.replace("-nothinking","")
|
|
|
|
|
|
|
|
|
|
| 1580 |
# Specific check for the flash model requiring budget
|
| 1581 |
+
# Specific check for the flash model requiring budget
|
| 1582 |
+
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
| 1583 |
+
error_response = create_openai_error_response(
|
| 1584 |
+
400, f"Model '{request.model}' does not support -nothinking variant", "invalid_request_error"
|
| 1585 |
+
)
|
| 1586 |
+
return JSONResponse(status_code=400, content=error_response)
|
| 1587 |
+
elif request.model.endswith("-max"):
|
| 1588 |
+
is_max_thinking_model = True
|
| 1589 |
+
base_model_name = request.model.replace("-max","")
|
| 1590 |
# Specific check for the flash model requiring budget
|
| 1591 |
+
# Specific check for the flash model requiring budget
|
| 1592 |
+
if base_model_name != "gemini-2.5-flash-preview-04-17":
|
| 1593 |
+
error_response = create_openai_error_response(
|
| 1594 |
+
400, f"Model '{request.model}' does not support -max variant", "invalid_request_error"
|
| 1595 |
+
)
|
| 1596 |
+
return JSONResponse(status_code=400, content=error_response)
|
| 1597 |
else:
|
| 1598 |
base_model_name = request.model
|
| 1599 |
|