Spaces:
Paused
Paused
Commit
·
90e5954
1
Parent(s):
8e357d9
addeed search for openai
Browse files- app/openai_handler.py +23 -18
- app/routes/chat_api.py +10 -5
- app/routes/models_api.py +2 -0
app/openai_handler.py
CHANGED
|
@@ -140,21 +140,26 @@ class OpenAIDirectHandler:
|
|
| 140 |
api_key=gcp_token, # OAuth token
|
| 141 |
)
|
| 142 |
|
| 143 |
-
def prepare_openai_params(self, request: OpenAIRequest, model_id: str) -> Dict[str, Any]:
|
| 144 |
-
"""
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
def prepare_extra_body(self) -> Dict[str, Any]:
|
| 160 |
"""Prepare extra body parameters for OpenAI API call."""
|
|
@@ -171,7 +176,7 @@ class OpenAIDirectHandler:
|
|
| 171 |
}
|
| 172 |
|
| 173 |
async def handle_streaming_response(
|
| 174 |
-
self,
|
| 175 |
openai_client: Any, # Can be openai.AsyncOpenAI or our wrapper
|
| 176 |
openai_params: Dict[str, Any],
|
| 177 |
openai_extra_body: Dict[str, Any],
|
|
@@ -398,7 +403,7 @@ class OpenAIDirectHandler:
|
|
| 398 |
content=create_openai_error_response(500, error_msg, "server_error")
|
| 399 |
)
|
| 400 |
|
| 401 |
-
async def process_request(self, request: OpenAIRequest, base_model_name: str, is_express: bool = False):
|
| 402 |
"""Main entry point for processing OpenAI Direct mode requests."""
|
| 403 |
print(f"INFO: Using OpenAI Direct Path for model: {request.model} (Express: {is_express})")
|
| 404 |
|
|
@@ -434,7 +439,7 @@ class OpenAIDirectHandler:
|
|
| 434 |
client = self.create_openai_client(rotated_project_id, gcp_token)
|
| 435 |
|
| 436 |
model_id = f"google/{base_model_name}"
|
| 437 |
-
openai_params = self.prepare_openai_params(request, model_id)
|
| 438 |
openai_extra_body = self.prepare_extra_body()
|
| 439 |
|
| 440 |
if request.stream:
|
|
|
|
| 140 |
api_key=gcp_token, # OAuth token
|
| 141 |
)
|
| 142 |
|
| 143 |
+
def prepare_openai_params(self, request: OpenAIRequest, model_id: str, is_openai_search: bool = False) -> Dict[str, Any]:
|
| 144 |
+
"""
|
| 145 |
+
Prepare parameters for OpenAI API call by converting the request to a dictionary,
|
| 146 |
+
and then overriding the model. This is more robust than manually picking parameters.
|
| 147 |
+
"""
|
| 148 |
+
# Convert the request to a dict, excluding unset values. `None` values inside
|
| 149 |
+
# nested models (like messages) are preserved.
|
| 150 |
+
params = request.model_dump(exclude_unset=True)
|
| 151 |
+
|
| 152 |
+
# Update model and filter out top-level None values.
|
| 153 |
+
params['model'] = model_id
|
| 154 |
+
|
| 155 |
+
if is_openai_search:
|
| 156 |
+
params['web_search_options'] = {}
|
| 157 |
+
|
| 158 |
+
openai_params = {k: v for k, v in params.items() if v is not None}
|
| 159 |
+
if "reasoning_effort" in openai_params and openai_params["reasoning_effort"] not in ["low", "medium", "high"]:
|
| 160 |
+
del openai_params["reasoning_effort"]
|
| 161 |
+
return openai_params
|
| 162 |
+
|
| 163 |
|
| 164 |
def prepare_extra_body(self) -> Dict[str, Any]:
|
| 165 |
"""Prepare extra body parameters for OpenAI API call."""
|
|
|
|
| 176 |
}
|
| 177 |
|
| 178 |
async def handle_streaming_response(
|
| 179 |
+
self,
|
| 180 |
openai_client: Any, # Can be openai.AsyncOpenAI or our wrapper
|
| 181 |
openai_params: Dict[str, Any],
|
| 182 |
openai_extra_body: Dict[str, Any],
|
|
|
|
| 403 |
content=create_openai_error_response(500, error_msg, "server_error")
|
| 404 |
)
|
| 405 |
|
| 406 |
+
async def process_request(self, request: OpenAIRequest, base_model_name: str, is_express: bool = False, is_openai_search: bool = False):
|
| 407 |
"""Main entry point for processing OpenAI Direct mode requests."""
|
| 408 |
print(f"INFO: Using OpenAI Direct Path for model: {request.model} (Express: {is_express})")
|
| 409 |
|
|
|
|
| 439 |
client = self.create_openai_client(rotated_project_id, gcp_token)
|
| 440 |
|
| 441 |
model_id = f"google/{base_model_name}"
|
| 442 |
+
openai_params = self.prepare_openai_params(request, model_id, is_openai_search)
|
| 443 |
openai_extra_body = self.prepare_extra_body()
|
| 444 |
|
| 445 |
if request.stream:
|
app/routes/chat_api.py
CHANGED
|
@@ -33,6 +33,7 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 33 |
try:
|
| 34 |
credential_manager_instance = fastapi_request.app.state.credential_manager
|
| 35 |
OPENAI_DIRECT_SUFFIX = "-openai"
|
|
|
|
| 36 |
EXPERIMENTAL_MARKER = "-exp-"
|
| 37 |
PAY_PREFIX = "[PAY]"
|
| 38 |
EXPRESS_PREFIX = "[EXPRESS] " # Note the space for easier stripping
|
|
@@ -44,8 +45,11 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 44 |
|
| 45 |
# Updated logic for is_openai_direct_model
|
| 46 |
is_openai_direct_model = False
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
|
|
|
| 49 |
# An OpenAI model can be prefixed with PAY, EXPRESS, or contain EXP
|
| 50 |
if temp_name_for_marker_check.startswith(PAY_PREFIX) or \
|
| 51 |
temp_name_for_marker_check.startswith(EXPRESS_PREFIX) or \
|
|
@@ -75,7 +79,8 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 75 |
if is_openai_direct_model: # This check is based on request.model, so it's fine here
|
| 76 |
# If it was an OpenAI direct model, its base name is request.model minus suffix.
|
| 77 |
# We need to ensure PAY_PREFIX or EXPRESS_PREFIX are also stripped if they were part of the original.
|
| 78 |
-
|
|
|
|
| 79 |
if temp_base_for_openai.startswith(EXPRESS_PREFIX):
|
| 80 |
temp_base_for_openai = temp_base_for_openai[len(EXPRESS_PREFIX):]
|
| 81 |
if temp_base_for_openai.startswith(PAY_PREFIX):
|
|
@@ -187,10 +192,10 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 187 |
# Use the new OpenAI handler
|
| 188 |
if is_express_model_request:
|
| 189 |
openai_handler = OpenAIDirectHandler(express_key_manager=express_key_manager_instance)
|
| 190 |
-
return await openai_handler.process_request(request, base_model_name, is_express=True)
|
| 191 |
else:
|
| 192 |
openai_handler = OpenAIDirectHandler(credential_manager=credential_manager_instance)
|
| 193 |
-
return await openai_handler.process_request(request, base_model_name)
|
| 194 |
elif is_auto_model:
|
| 195 |
print(f"Processing auto model: {request.model}")
|
| 196 |
attempts = [
|
|
|
|
| 33 |
try:
|
| 34 |
credential_manager_instance = fastapi_request.app.state.credential_manager
|
| 35 |
OPENAI_DIRECT_SUFFIX = "-openai"
|
| 36 |
+
OPENAI_SEARCH_SUFFIX = "-openaisearch"
|
| 37 |
EXPERIMENTAL_MARKER = "-exp-"
|
| 38 |
PAY_PREFIX = "[PAY]"
|
| 39 |
EXPRESS_PREFIX = "[EXPRESS] " # Note the space for easier stripping
|
|
|
|
| 45 |
|
| 46 |
# Updated logic for is_openai_direct_model
|
| 47 |
is_openai_direct_model = False
|
| 48 |
+
is_openai_search_model = False
|
| 49 |
+
if request.model.endswith(OPENAI_DIRECT_SUFFIX) or request.model.endswith(OPENAI_SEARCH_SUFFIX):
|
| 50 |
+
is_openai_search_model = request.model.endswith(OPENAI_SEARCH_SUFFIX)
|
| 51 |
+
suffix_to_remove = OPENAI_SEARCH_SUFFIX if is_openai_search_model else OPENAI_DIRECT_SUFFIX
|
| 52 |
+
temp_name_for_marker_check = request.model[:-len(suffix_to_remove)]
|
| 53 |
# An OpenAI model can be prefixed with PAY, EXPRESS, or contain EXP
|
| 54 |
if temp_name_for_marker_check.startswith(PAY_PREFIX) or \
|
| 55 |
temp_name_for_marker_check.startswith(EXPRESS_PREFIX) or \
|
|
|
|
| 79 |
if is_openai_direct_model: # This check is based on request.model, so it's fine here
|
| 80 |
# If it was an OpenAI direct model, its base name is request.model minus suffix.
|
| 81 |
# We need to ensure PAY_PREFIX or EXPRESS_PREFIX are also stripped if they were part of the original.
|
| 82 |
+
suffix_to_remove = OPENAI_SEARCH_SUFFIX if is_openai_search_model else OPENAI_DIRECT_SUFFIX
|
| 83 |
+
temp_base_for_openai = request.model[:-len(suffix_to_remove)]
|
| 84 |
if temp_base_for_openai.startswith(EXPRESS_PREFIX):
|
| 85 |
temp_base_for_openai = temp_base_for_openai[len(EXPRESS_PREFIX):]
|
| 86 |
if temp_base_for_openai.startswith(PAY_PREFIX):
|
|
|
|
| 192 |
# Use the new OpenAI handler
|
| 193 |
if is_express_model_request:
|
| 194 |
openai_handler = OpenAIDirectHandler(express_key_manager=express_key_manager_instance)
|
| 195 |
+
return await openai_handler.process_request(request, base_model_name, is_express=True, is_openai_search=is_openai_search_model)
|
| 196 |
else:
|
| 197 |
openai_handler = OpenAIDirectHandler(credential_manager=credential_manager_instance)
|
| 198 |
+
return await openai_handler.process_request(request, base_model_name, is_openai_search=is_openai_search_model)
|
| 199 |
elif is_auto_model:
|
| 200 |
print(f"Processing auto model: {request.model}")
|
| 201 |
attempts = [
|
app/routes/models_api.py
CHANGED
|
@@ -15,6 +15,7 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
|
|
| 15 |
PAY_PREFIX = "[PAY]"
|
| 16 |
EXPRESS_PREFIX = "[EXPRESS] "
|
| 17 |
OPENAI_DIRECT_SUFFIX = "-openai"
|
|
|
|
| 18 |
|
| 19 |
credential_manager_instance: CredentialManager = fastapi_request.app.state.credential_manager
|
| 20 |
express_key_manager_instance = fastapi_request.app.state.express_key_manager
|
|
@@ -41,6 +42,7 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
|
|
| 41 |
|
| 42 |
# Add the openai variant for all models
|
| 43 |
suffixes.append(OPENAI_DIRECT_SUFFIX)
|
|
|
|
| 44 |
|
| 45 |
for suffix in suffixes:
|
| 46 |
model_id_with_suffix = f"{base_id}{suffix}"
|
|
|
|
| 15 |
PAY_PREFIX = "[PAY]"
|
| 16 |
EXPRESS_PREFIX = "[EXPRESS] "
|
| 17 |
OPENAI_DIRECT_SUFFIX = "-openai"
|
| 18 |
+
OPENAI_SEARCH_SUFFIX = "-openaisearch"
|
| 19 |
|
| 20 |
credential_manager_instance: CredentialManager = fastapi_request.app.state.credential_manager
|
| 21 |
express_key_manager_instance = fastapi_request.app.state.express_key_manager
|
|
|
|
| 42 |
|
| 43 |
# Add the openai variant for all models
|
| 44 |
suffixes.append(OPENAI_DIRECT_SUFFIX)
|
| 45 |
+
suffixes.append(OPENAI_SEARCH_SUFFIX)
|
| 46 |
|
| 47 |
for suffix in suffixes:
|
| 48 |
model_id_with_suffix = f"{base_id}{suffix}"
|