Spaces:
Paused
Paused
Commit
·
fc56b2e
1
Parent(s):
e27eb24
2.5 flash lite support and bug fixes
Browse files- app/message_processing.py +3 -6
- app/routes/chat_api.py +17 -10
- app/routes/models_api.py +1 -1
app/message_processing.py
CHANGED
|
@@ -436,8 +436,8 @@ def convert_chunk_to_openai(chunk: Any, model_name: str, response_id: str, candi
|
|
| 436 |
openai_finish_reason = None
|
| 437 |
|
| 438 |
if hasattr(chunk, 'candidates') and chunk.candidates:
|
| 439 |
-
candidate = chunk.candidates # Process first candidate for streaming
|
| 440 |
-
|
| 441 |
raw_gemini_finish_reason = getattr(candidate, 'finish_reason', None)
|
| 442 |
if raw_gemini_finish_reason:
|
| 443 |
if hasattr(raw_gemini_finish_reason, 'name'): raw_gemini_finish_reason_str = raw_gemini_finish_reason.name.upper()
|
|
@@ -477,10 +477,7 @@ def convert_chunk_to_openai(chunk: Any, model_name: str, response_id: str, candi
|
|
| 477 |
break
|
| 478 |
|
| 479 |
if not function_call_detected_in_chunk:
|
| 480 |
-
|
| 481 |
-
reasoning_text, normal_text = parse_gemini_response_for_reasoning_and_content(candidate[0]) # Kilo Code: Pass the first Candidate object
|
| 482 |
-
else:
|
| 483 |
-
reasoning_text, normal_text = "", "" # Default to empty if no candidates
|
| 484 |
if is_encrypt_full:
|
| 485 |
reasoning_text = deobfuscate_text(reasoning_text)
|
| 486 |
normal_text = deobfuscate_text(normal_text)
|
|
|
|
| 436 |
openai_finish_reason = None
|
| 437 |
|
| 438 |
if hasattr(chunk, 'candidates') and chunk.candidates:
|
| 439 |
+
candidate = chunk.candidates[0] # Process first candidate for streaming
|
| 440 |
+
print(candidate)
|
| 441 |
raw_gemini_finish_reason = getattr(candidate, 'finish_reason', None)
|
| 442 |
if raw_gemini_finish_reason:
|
| 443 |
if hasattr(raw_gemini_finish_reason, 'name'): raw_gemini_finish_reason_str = raw_gemini_finish_reason.name.upper()
|
|
|
|
| 477 |
break
|
| 478 |
|
| 479 |
if not function_call_detected_in_chunk:
|
| 480 |
+
reasoning_text, normal_text = parse_gemini_response_for_reasoning_and_content(candidate)
|
|
|
|
|
|
|
|
|
|
| 481 |
if is_encrypt_full:
|
| 482 |
reasoning_text = deobfuscate_text(reasoning_text)
|
| 483 |
normal_text = deobfuscate_text(normal_text)
|
app/routes/chat_api.py
CHANGED
|
@@ -97,6 +97,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 97 |
# This will now be a dictionary
|
| 98 |
gen_config_dict = create_generation_config(request)
|
| 99 |
|
|
|
|
|
|
|
|
|
|
| 100 |
client_to_use = None
|
| 101 |
express_key_manager_instance = fastapi_request.app.state.express_key_manager
|
| 102 |
|
|
@@ -243,16 +246,20 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
|
|
| 243 |
# Example: if is_nothinking_model: gen_config_dict["thinking_config"] = {"thinking_budget": 0}
|
| 244 |
# This is already handled by create_generation_config based on current logic.
|
| 245 |
# If specific overrides are needed here, they would modify gen_config_dict.
|
| 246 |
-
if is_nothinking_model:
|
| 247 |
-
if
|
| 248 |
-
|
| 249 |
-
else:
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
|
| 257 |
return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, gen_config_dict, request)
|
| 258 |
|
|
|
|
| 97 |
# This will now be a dictionary
|
| 98 |
gen_config_dict = create_generation_config(request)
|
| 99 |
|
| 100 |
+
if "gemini-2.5-flash-lite" in base_model_name:
|
| 101 |
+
gen_config_dict["thinking_config"]["include_thoughts"] = False
|
| 102 |
+
|
| 103 |
client_to_use = None
|
| 104 |
express_key_manager_instance = fastapi_request.app.state.express_key_manager
|
| 105 |
|
|
|
|
| 246 |
# Example: if is_nothinking_model: gen_config_dict["thinking_config"] = {"thinking_budget": 0}
|
| 247 |
# This is already handled by create_generation_config based on current logic.
|
| 248 |
# If specific overrides are needed here, they would modify gen_config_dict.
|
| 249 |
+
if is_nothinking_model or is_max_thinking_model:
|
| 250 |
+
if is_nothinking_model:
|
| 251 |
+
budget = 128 if "gemini-2.5-pro" in base_model_name else 0
|
| 252 |
+
else: # is_max_thinking_model
|
| 253 |
+
budget = 32768 if "gemini-2.5-pro" in base_model_name else 24576
|
| 254 |
+
|
| 255 |
+
# Ensure thinking_config is a dictionary before updating
|
| 256 |
+
if not isinstance(gen_config_dict.get("thinking_config"), dict):
|
| 257 |
+
gen_config_dict["thinking_config"] = {}
|
| 258 |
+
gen_config_dict["thinking_config"]["thinking_budget"] = budget
|
| 259 |
+
if "gemini-2.5-flash-lite" in base_model_name and is_max_thinking_model:
|
| 260 |
+
gen_config_dict["thinking_config"]["include_thoughts"] = True
|
| 261 |
+
if budget == 0:
|
| 262 |
+
gen_config_dict["thinking_config"]["include_thoughts"] = False
|
| 263 |
|
| 264 |
return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, gen_config_dict, request)
|
| 265 |
|
app/routes/models_api.py
CHANGED
|
@@ -36,7 +36,7 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
|
|
| 36 |
suffixes = [""] # For the base model itself
|
| 37 |
if not base_id.startswith("gemini-2.0"):
|
| 38 |
suffixes.extend(["-search", "-encrypt", "-encrypt-full", "-auto"])
|
| 39 |
-
if "gemini-2.5-flash" in base_id or "gemini-2.5-pro"
|
| 40 |
suffixes.extend(["-nothinking", "-max"])
|
| 41 |
|
| 42 |
# Add the openai variant for all models
|
|
|
|
| 36 |
suffixes = [""] # For the base model itself
|
| 37 |
if not base_id.startswith("gemini-2.0"):
|
| 38 |
suffixes.extend(["-search", "-encrypt", "-encrypt-full", "-auto"])
|
| 39 |
+
if "gemini-2.5-flash" in base_id or "gemini-2.5-pro" == base_id or "gemini-2.5-pro-preview-06-05" == base_id:
|
| 40 |
suffixes.extend(["-nothinking", "-max"])
|
| 41 |
|
| 42 |
# Add the openai variant for all models
|