bibibi12345 commited on
Commit
5c95d1b
·
1 Parent(s): 61a24c9

added global region to express mode

Browse files
app/message_processing.py CHANGED
@@ -241,7 +241,7 @@ def parse_gemini_response_for_reasoning_and_content(gemini_response_candidate: A
241
  reasoning_text_parts.append(part_text)
242
  else:
243
  normal_text_parts.append(part_text)
244
- if candidate_part_text: # Candidate had text but no parts and was not a thought itself
245
  normal_text_parts.append(candidate_part_text)
246
  # If no parts and no direct text on candidate, both lists remain empty.
247
 
@@ -291,10 +291,14 @@ def convert_to_openai_format(gemini_response: Any, model: str) -> Dict[str, Any]
291
  def convert_chunk_to_openai(chunk: Any, model: str, response_id: str, candidate_index: int = 0) -> str:
292
  is_encrypt_full = model.endswith("-encrypt-full")
293
  delta_payload = {}
294
- finish_reason = None
295
 
296
  if hasattr(chunk, 'candidates') and chunk.candidates:
297
- candidate = chunk.candidates[0]
 
 
 
 
298
 
299
  # For a streaming chunk, candidate might be simpler, or might have candidate.content with parts.
300
  # parse_gemini_response_for_reasoning_and_content is designed to handle both candidate and candidate.content
@@ -308,7 +312,6 @@ def convert_chunk_to_openai(chunk: Any, model: str, response_id: str, candidate_
308
  if normal_text or (not reasoning_text and not delta_payload): # Ensure content key if nothing else
309
  delta_payload['content'] = normal_text if normal_text else ""
310
 
311
-
312
  chunk_data = {
313
  "id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model,
314
  "choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": finish_reason}]
 
241
  reasoning_text_parts.append(part_text)
242
  else:
243
  normal_text_parts.append(part_text)
244
+ elif candidate_part_text: # Candidate had text but no parts and was not a thought itself
245
  normal_text_parts.append(candidate_part_text)
246
  # If no parts and no direct text on candidate, both lists remain empty.
247
 
 
291
  def convert_chunk_to_openai(chunk: Any, model: str, response_id: str, candidate_index: int = 0) -> str:
292
  is_encrypt_full = model.endswith("-encrypt-full")
293
  delta_payload = {}
294
+ finish_reason = None
295
 
296
  if hasattr(chunk, 'candidates') and chunk.candidates:
297
+ candidate = chunk.candidates[0]
298
+
299
+ # Check for finish reason
300
+ if hasattr(candidate, 'finishReason') and candidate.finishReason:
301
+ finish_reason = "stop" # Convert Gemini finish reasons to OpenAI format
302
 
303
  # For a streaming chunk, candidate might be simpler, or might have candidate.content with parts.
304
  # parse_gemini_response_for_reasoning_and_content is designed to handle both candidate and candidate.content
 
312
  if normal_text or (not reasoning_text and not delta_payload): # Ensure content key if nothing else
313
  delta_payload['content'] = normal_text if normal_text else ""
314
 
 
315
  chunk_data = {
316
  "id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model,
317
  "choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": finish_reason}]
app/requirements.txt CHANGED
@@ -6,4 +6,5 @@ pydantic==2.6.1
6
  google-genai==1.17.0
7
  httpx>=0.25.0
8
  openai
9
- google-auth-oauthlib
 
 
6
  google-genai==1.17.0
7
  httpx>=0.25.0
8
  openai
9
+ google-auth-oauthlib
10
+ aiohttp
app/routes/chat_api.py CHANGED
@@ -24,6 +24,7 @@ from api_helpers import (
24
  execute_gemini_call,
25
  )
26
  from openai_handler import OpenAIDirectHandler
 
27
 
28
  router = APIRouter()
29
 
@@ -115,8 +116,14 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
115
  if key_tuple:
116
  original_idx, key_val = key_tuple
117
  try:
118
- client_to_use = genai.Client(vertexai=True, api_key=key_val)
119
- print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
 
 
 
 
 
 
120
  break # Successfully initialized client
121
  except Exception as e:
122
  print(f"WARNING: Attempt {attempt+1}/{total_keys} - Vertex Express Mode client init failed for API key (original index: {original_idx}) for model {request.model}: {e}. Trying next key.")
@@ -177,7 +184,11 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
177
  current_gen_config = attempt["config_modifier"](generation_config.copy())
178
  try:
179
  # Pass is_auto_attempt=True for auto-mode calls
180
- return await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
 
 
 
 
181
  except Exception as e_auto:
182
  last_err = e_auto
183
  print(f"Auto-attempt '{attempt['name']}' for model {attempt['model']} failed: {e_auto}")
@@ -185,6 +196,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
185
 
186
  print(f"All auto attempts failed. Last error: {last_err}")
187
  err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
 
 
 
188
  if not request.stream and last_err:
189
  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
190
  elif request.stream:
@@ -231,9 +245,17 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
231
  # but the API call might need the full "gemini-1.5-pro-search".
232
  # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
233
  # For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
234
- return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
 
 
 
 
 
235
 
236
  except Exception as e:
237
  error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
238
  print(error_msg)
 
 
 
239
  return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))
 
24
  execute_gemini_call,
25
  )
26
  from openai_handler import OpenAIDirectHandler
27
+ from direct_vertex_client import DirectVertexClient
28
 
29
  router = APIRouter()
30
 
 
116
  if key_tuple:
117
  original_idx, key_val = key_tuple
118
  try:
119
+ # Check if model contains "gemini-2.5-pro" for direct URL approach
120
+ if "gemini-2.5-pro" in base_model_name:
121
+ client_to_use = DirectVertexClient(api_key=key_val)
122
+ await client_to_use.discover_project_id()
123
+ print(f"INFO: Attempt {attempt+1}/{total_keys} - Using DirectVertexClient for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
124
+ else:
125
+ client_to_use = genai.Client(vertexai=True, api_key=key_val)
126
+ print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode SDK for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
127
  break # Successfully initialized client
128
  except Exception as e:
129
  print(f"WARNING: Attempt {attempt+1}/{total_keys} - Vertex Express Mode client init failed for API key (original index: {original_idx}) for model {request.model}: {e}. Trying next key.")
 
184
  current_gen_config = attempt["config_modifier"](generation_config.copy())
185
  try:
186
  # Pass is_auto_attempt=True for auto-mode calls
187
+ result = await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
188
+ # Clean up DirectVertexClient session if used
189
+ if isinstance(client_to_use, DirectVertexClient):
190
+ await client_to_use.close()
191
+ return result
192
  except Exception as e_auto:
193
  last_err = e_auto
194
  print(f"Auto-attempt '{attempt['name']}' for model {attempt['model']} failed: {e_auto}")
 
196
 
197
  print(f"All auto attempts failed. Last error: {last_err}")
198
  err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
199
+ # Clean up DirectVertexClient session if used
200
+ if isinstance(client_to_use, DirectVertexClient):
201
+ await client_to_use.close()
202
  if not request.stream and last_err:
203
  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
204
  elif request.stream:
 
245
  # but the API call might need the full "gemini-1.5-pro-search".
246
  # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
247
  # For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
248
+ try:
249
+ return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
250
+ finally:
251
+ # Clean up DirectVertexClient session if used
252
+ if isinstance(client_to_use, DirectVertexClient):
253
+ await client_to_use.close()
254
 
255
  except Exception as e:
256
  error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
257
  print(error_msg)
258
+ # Clean up DirectVertexClient session if it exists
259
+ if 'client_to_use' in locals() and isinstance(client_to_use, DirectVertexClient):
260
+ await client_to_use.close()
261
  return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))