vertex-pay

Paused

App Files Files Community

bibibi12345 commited on Jun 7, 2025

Commit

5c95d1b

1 Parent(s): 61a24c9

added global region to express mode

Browse files

Files changed (3) hide show

app/message_processing.py +7 -4
app/requirements.txt +2 -1
app/routes/chat_api.py +26 -4

app/message_processing.py CHANGED Viewed

@@ -241,7 +241,7 @@ def parse_gemini_response_for_reasoning_and_content(gemini_response_candidate: A
                 reasoning_text_parts.append(part_text)
             else:
                 normal_text_parts.append(part_text)
-    if candidate_part_text: # Candidate had text but no parts and was not a thought itself
         normal_text_parts.append(candidate_part_text)
     # If no parts and no direct text on candidate, both lists remain empty.
@@ -291,10 +291,14 @@ def convert_to_openai_format(gemini_response: Any, model: str) -> Dict[str, Any]
 def convert_chunk_to_openai(chunk: Any, model: str, response_id: str, candidate_index: int = 0) -> str:
     is_encrypt_full = model.endswith("-encrypt-full")
     delta_payload = {}
-    finish_reason = None
     if hasattr(chunk, 'candidates') and chunk.candidates:
-        candidate = chunk.candidates[0]
         # For a streaming chunk, candidate might be simpler, or might have candidate.content with parts.
         # parse_gemini_response_for_reasoning_and_content is designed to handle both candidate and candidate.content
@@ -308,7 +312,6 @@ def convert_chunk_to_openai(chunk: Any, model: str, response_id: str, candidate_
         if normal_text or (not reasoning_text and not delta_payload): # Ensure content key if nothing else
             delta_payload['content'] = normal_text if normal_text else ""
     chunk_data = {
         "id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model,
         "choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": finish_reason}]

                 reasoning_text_parts.append(part_text)
             else:
                 normal_text_parts.append(part_text)
+    elif candidate_part_text: # Candidate had text but no parts and was not a thought itself
         normal_text_parts.append(candidate_part_text)
     # If no parts and no direct text on candidate, both lists remain empty.
 def convert_chunk_to_openai(chunk: Any, model: str, response_id: str, candidate_index: int = 0) -> str:
     is_encrypt_full = model.endswith("-encrypt-full")
     delta_payload = {}
+    finish_reason = None
     if hasattr(chunk, 'candidates') and chunk.candidates:
+        candidate = chunk.candidates[0]
+        # Check for finish reason
+        if hasattr(candidate, 'finishReason') and candidate.finishReason:
+            finish_reason = "stop"  # Convert Gemini finish reasons to OpenAI format
         # For a streaming chunk, candidate might be simpler, or might have candidate.content with parts.
         # parse_gemini_response_for_reasoning_and_content is designed to handle both candidate and candidate.content
         if normal_text or (not reasoning_text and not delta_payload): # Ensure content key if nothing else
             delta_payload['content'] = normal_text if normal_text else ""
     chunk_data = {
         "id": response_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": model,
         "choices": [{"index": candidate_index, "delta": delta_payload, "finish_reason": finish_reason}]

app/requirements.txt CHANGED Viewed

@@ -6,4 +6,5 @@ pydantic==2.6.1
 google-genai==1.17.0
 httpx>=0.25.0
 openai
-google-auth-oauthlib

 google-genai==1.17.0
 httpx>=0.25.0
 openai
+google-auth-oauthlib
+aiohttp

app/routes/chat_api.py CHANGED Viewed

@@ -24,6 +24,7 @@ from api_helpers import (
     execute_gemini_call,
 )
 from openai_handler import OpenAIDirectHandler
 router = APIRouter()
@@ -115,8 +116,14 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
                 if key_tuple:
                     original_idx, key_val = key_tuple
                     try:
-                        client_to_use = genai.Client(vertexai=True, api_key=key_val)
-                        print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
                         break # Successfully initialized client
                     except Exception as e:
                         print(f"WARNING: Attempt {attempt+1}/{total_keys} - Vertex Express Mode client init failed for API key (original index: {original_idx}) for model {request.model}: {e}. Trying next key.")
@@ -177,7 +184,11 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
                 current_gen_config = attempt["config_modifier"](generation_config.copy())
                 try:
                     # Pass is_auto_attempt=True for auto-mode calls
-                    return await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
                 except Exception as e_auto:
                     last_err = e_auto
                     print(f"Auto-attempt '{attempt['name']}' for model {attempt['model']} failed: {e_auto}")
@@ -185,6 +196,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
             print(f"All auto attempts failed. Last error: {last_err}")
             err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
             if not request.stream and last_err:
                  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
             elif request.stream:
@@ -231,9 +245,17 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
             # but the API call might need the full "gemini-1.5-pro-search".
             # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
             # For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
-            return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
     except Exception as e:
         error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
         print(error_msg)
         return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))

     execute_gemini_call,
 )
 from openai_handler import OpenAIDirectHandler
+from direct_vertex_client import DirectVertexClient
 router = APIRouter()
                 if key_tuple:
                     original_idx, key_val = key_tuple
                     try:
+                        # Check if model contains "gemini-2.5-pro" for direct URL approach
+                        if "gemini-2.5-pro" in base_model_name:
+                            client_to_use = DirectVertexClient(api_key=key_val)
+                            await client_to_use.discover_project_id()
+                            print(f"INFO: Attempt {attempt+1}/{total_keys} - Using DirectVertexClient for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
+                        else:
+                            client_to_use = genai.Client(vertexai=True, api_key=key_val)
+                            print(f"INFO: Attempt {attempt+1}/{total_keys} - Using Vertex Express Mode SDK for model {request.model} (base: {base_model_name}) with API key (original index: {original_idx}).")
                         break # Successfully initialized client
                     except Exception as e:
                         print(f"WARNING: Attempt {attempt+1}/{total_keys} - Vertex Express Mode client init failed for API key (original index: {original_idx}) for model {request.model}: {e}. Trying next key.")
                 current_gen_config = attempt["config_modifier"](generation_config.copy())
                 try:
                     # Pass is_auto_attempt=True for auto-mode calls
+                    result = await execute_gemini_call(client_to_use, attempt["model"], attempt["prompt_func"], current_gen_config, request, is_auto_attempt=True)
+                    # Clean up DirectVertexClient session if used
+                    if isinstance(client_to_use, DirectVertexClient):
+                        await client_to_use.close()
+                    return result
                 except Exception as e_auto:
                     last_err = e_auto
                     print(f"Auto-attempt '{attempt['name']}' for model {attempt['model']} failed: {e_auto}")
             print(f"All auto attempts failed. Last error: {last_err}")
             err_msg = f"All auto-mode attempts failed for model {request.model}. Last error: {str(last_err)}"
+            # Clean up DirectVertexClient session if used
+            if isinstance(client_to_use, DirectVertexClient):
+                await client_to_use.close()
             if not request.stream and last_err:
                  return JSONResponse(status_code=500, content=create_openai_error_response(500, err_msg, "server_error"))
             elif request.stream:
             # but the API call might need the full "gemini-1.5-pro-search".
             # Let's use `request.model` for the API call here, and `base_model_name` for checks like Express eligibility.
             # For non-auto mode, is_auto_attempt defaults to False in execute_gemini_call
+            try:
+                return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, generation_config, request)
+            finally:
+                # Clean up DirectVertexClient session if used
+                if isinstance(client_to_use, DirectVertexClient):
+                    await client_to_use.close()
     except Exception as e:
         error_msg = f"Unexpected error in chat_completions endpoint: {str(e)}"
         print(error_msg)
+        # Clean up DirectVertexClient session if it exists
+        if 'client_to_use' in locals() and isinstance(client_to_use, DirectVertexClient):
+            await client_to_use.close()
         return JSONResponse(status_code=500, content=create_openai_error_response(500, error_msg, "server_error"))