vertex

Paused

App Files Files Community

bibibi12345 commited on Jun 18, 2025

Commit

fc56b2e

1 Parent(s): e27eb24

2.5 flash lite support and bug fixes

Browse files

Files changed (3) hide show

app/message_processing.py +3 -6
app/routes/chat_api.py +17 -10
app/routes/models_api.py +1 -1

app/message_processing.py CHANGED Viewed

@@ -436,8 +436,8 @@ def convert_chunk_to_openai(chunk: Any, model_name: str, response_id: str, candi
     openai_finish_reason = None
     if hasattr(chunk, 'candidates') and chunk.candidates:
-        candidate = chunk.candidates # Process first candidate for streaming
         raw_gemini_finish_reason = getattr(candidate, 'finish_reason', None)
         if raw_gemini_finish_reason:
             if hasattr(raw_gemini_finish_reason, 'name'): raw_gemini_finish_reason_str = raw_gemini_finish_reason.name.upper()
@@ -477,10 +477,7 @@ def convert_chunk_to_openai(chunk: Any, model_name: str, response_id: str, candi
                     break
         if not function_call_detected_in_chunk:
-            if candidate and len(candidate) > 0: # Kilo Code: Ensure candidate list is not empty
-                reasoning_text, normal_text = parse_gemini_response_for_reasoning_and_content(candidate[0]) # Kilo Code: Pass the first Candidate object
-            else:
-                reasoning_text, normal_text = "", "" # Default to empty if no candidates
             if is_encrypt_full:
                 reasoning_text = deobfuscate_text(reasoning_text)
                 normal_text = deobfuscate_text(normal_text)

     openai_finish_reason = None
     if hasattr(chunk, 'candidates') and chunk.candidates:
+        candidate = chunk.candidates[0] # Process first candidate for streaming
+        print(candidate)
         raw_gemini_finish_reason = getattr(candidate, 'finish_reason', None)
         if raw_gemini_finish_reason:
             if hasattr(raw_gemini_finish_reason, 'name'): raw_gemini_finish_reason_str = raw_gemini_finish_reason.name.upper()
                     break
         if not function_call_detected_in_chunk:
+            reasoning_text, normal_text = parse_gemini_response_for_reasoning_and_content(candidate)
             if is_encrypt_full:
                 reasoning_text = deobfuscate_text(reasoning_text)
                 normal_text = deobfuscate_text(normal_text)

app/routes/chat_api.py CHANGED Viewed

@@ -97,6 +97,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
         # This will now be a dictionary
         gen_config_dict = create_generation_config(request)
         client_to_use = None
         express_key_manager_instance = fastapi_request.app.state.express_key_manager
@@ -243,16 +246,20 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
             # Example: if is_nothinking_model: gen_config_dict["thinking_config"] = {"thinking_budget": 0}
             # This is already handled by create_generation_config based on current logic.
             # If specific overrides are needed here, they would modify gen_config_dict.
-            if is_nothinking_model:
-                if "gemini-2.5-pro" in base_model_name: # Example specific override
-                    gen_config_dict["thinking_config"] = {"thinking_budget": 128}
-                else:
-                    gen_config_dict["thinking_config"] = {"thinking_budget": 0}
-            elif is_max_thinking_model:
-                if "gemini-2.5-pro" in base_model_name:
-                    gen_config_dict["thinking_config"] = {"thinking_budget": 32768}
-                else:
-                    gen_config_dict["thinking_config"] = {"thinking_budget": 24576}
             return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, gen_config_dict, request)

         # This will now be a dictionary
         gen_config_dict = create_generation_config(request)
+        if "gemini-2.5-flash-lite" in base_model_name:
+            gen_config_dict["thinking_config"]["include_thoughts"] = False
         client_to_use = None
         express_key_manager_instance = fastapi_request.app.state.express_key_manager
             # Example: if is_nothinking_model: gen_config_dict["thinking_config"] = {"thinking_budget": 0}
             # This is already handled by create_generation_config based on current logic.
             # If specific overrides are needed here, they would modify gen_config_dict.
+            if is_nothinking_model or is_max_thinking_model:
+                if is_nothinking_model:
+                    budget = 128 if "gemini-2.5-pro" in base_model_name else 0
+                else:  # is_max_thinking_model
+                    budget = 32768 if "gemini-2.5-pro" in base_model_name else 24576
+                # Ensure thinking_config is a dictionary before updating
+                if not isinstance(gen_config_dict.get("thinking_config"), dict):
+                    gen_config_dict["thinking_config"] = {}
+                gen_config_dict["thinking_config"]["thinking_budget"] = budget
+                if "gemini-2.5-flash-lite" in base_model_name and is_max_thinking_model:
+                    gen_config_dict["thinking_config"]["include_thoughts"] = True
+                if budget == 0:
+                    gen_config_dict["thinking_config"]["include_thoughts"] = False
             return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, gen_config_dict, request)

app/routes/models_api.py CHANGED Viewed

@@ -36,7 +36,7 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
         suffixes = [""] # For the base model itself
         if not base_id.startswith("gemini-2.0"):
             suffixes.extend(["-search", "-encrypt", "-encrypt-full", "-auto"])
-        if "gemini-2.5-flash" in base_id or "gemini-2.5-pro" in base_id:
             suffixes.extend(["-nothinking", "-max"])
         # Add the openai variant for all models

         suffixes = [""] # For the base model itself
         if not base_id.startswith("gemini-2.0"):
             suffixes.extend(["-search", "-encrypt", "-encrypt-full", "-auto"])
+        if "gemini-2.5-flash" in base_id or "gemini-2.5-pro" == base_id or "gemini-2.5-pro-preview-06-05" == base_id:
             suffixes.extend(["-nothinking", "-max"])
         # Add the openai variant for all models