bibibi12345 commited on
Commit
fc56b2e
·
1 Parent(s): e27eb24

2.5 flash lite support and bug fixes

Browse files
app/message_processing.py CHANGED
@@ -436,8 +436,8 @@ def convert_chunk_to_openai(chunk: Any, model_name: str, response_id: str, candi
436
  openai_finish_reason = None
437
 
438
  if hasattr(chunk, 'candidates') and chunk.candidates:
439
- candidate = chunk.candidates # Process first candidate for streaming
440
-
441
  raw_gemini_finish_reason = getattr(candidate, 'finish_reason', None)
442
  if raw_gemini_finish_reason:
443
  if hasattr(raw_gemini_finish_reason, 'name'): raw_gemini_finish_reason_str = raw_gemini_finish_reason.name.upper()
@@ -477,10 +477,7 @@ def convert_chunk_to_openai(chunk: Any, model_name: str, response_id: str, candi
477
  break
478
 
479
  if not function_call_detected_in_chunk:
480
- if candidate and len(candidate) > 0: # Kilo Code: Ensure candidate list is not empty
481
- reasoning_text, normal_text = parse_gemini_response_for_reasoning_and_content(candidate[0]) # Kilo Code: Pass the first Candidate object
482
- else:
483
- reasoning_text, normal_text = "", "" # Default to empty if no candidates
484
  if is_encrypt_full:
485
  reasoning_text = deobfuscate_text(reasoning_text)
486
  normal_text = deobfuscate_text(normal_text)
 
436
  openai_finish_reason = None
437
 
438
  if hasattr(chunk, 'candidates') and chunk.candidates:
439
+ candidate = chunk.candidates[0] # Process first candidate for streaming
440
+ print(candidate)
441
  raw_gemini_finish_reason = getattr(candidate, 'finish_reason', None)
442
  if raw_gemini_finish_reason:
443
  if hasattr(raw_gemini_finish_reason, 'name'): raw_gemini_finish_reason_str = raw_gemini_finish_reason.name.upper()
 
477
  break
478
 
479
  if not function_call_detected_in_chunk:
480
+ reasoning_text, normal_text = parse_gemini_response_for_reasoning_and_content(candidate)
 
 
 
481
  if is_encrypt_full:
482
  reasoning_text = deobfuscate_text(reasoning_text)
483
  normal_text = deobfuscate_text(normal_text)
app/routes/chat_api.py CHANGED
@@ -97,6 +97,9 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
97
  # This will now be a dictionary
98
  gen_config_dict = create_generation_config(request)
99
 
 
 
 
100
  client_to_use = None
101
  express_key_manager_instance = fastapi_request.app.state.express_key_manager
102
 
@@ -243,16 +246,20 @@ async def chat_completions(fastapi_request: Request, request: OpenAIRequest, api
243
  # Example: if is_nothinking_model: gen_config_dict["thinking_config"] = {"thinking_budget": 0}
244
  # This is already handled by create_generation_config based on current logic.
245
  # If specific overrides are needed here, they would modify gen_config_dict.
246
- if is_nothinking_model:
247
- if "gemini-2.5-pro" in base_model_name: # Example specific override
248
- gen_config_dict["thinking_config"] = {"thinking_budget": 128}
249
- else:
250
- gen_config_dict["thinking_config"] = {"thinking_budget": 0}
251
- elif is_max_thinking_model:
252
- if "gemini-2.5-pro" in base_model_name:
253
- gen_config_dict["thinking_config"] = {"thinking_budget": 32768}
254
- else:
255
- gen_config_dict["thinking_config"] = {"thinking_budget": 24576}
 
 
 
 
256
 
257
  return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, gen_config_dict, request)
258
 
 
97
  # This will now be a dictionary
98
  gen_config_dict = create_generation_config(request)
99
 
100
+ if "gemini-2.5-flash-lite" in base_model_name:
101
+ gen_config_dict["thinking_config"]["include_thoughts"] = False
102
+
103
  client_to_use = None
104
  express_key_manager_instance = fastapi_request.app.state.express_key_manager
105
 
 
246
  # Example: if is_nothinking_model: gen_config_dict["thinking_config"] = {"thinking_budget": 0}
247
  # This is already handled by create_generation_config based on current logic.
248
  # If specific overrides are needed here, they would modify gen_config_dict.
249
+ if is_nothinking_model or is_max_thinking_model:
250
+ if is_nothinking_model:
251
+ budget = 128 if "gemini-2.5-pro" in base_model_name else 0
252
+ else: # is_max_thinking_model
253
+ budget = 32768 if "gemini-2.5-pro" in base_model_name else 24576
254
+
255
+ # Ensure thinking_config is a dictionary before updating
256
+ if not isinstance(gen_config_dict.get("thinking_config"), dict):
257
+ gen_config_dict["thinking_config"] = {}
258
+ gen_config_dict["thinking_config"]["thinking_budget"] = budget
259
+ if "gemini-2.5-flash-lite" in base_model_name and is_max_thinking_model:
260
+ gen_config_dict["thinking_config"]["include_thoughts"] = True
261
+ if budget == 0:
262
+ gen_config_dict["thinking_config"]["include_thoughts"] = False
263
 
264
  return await execute_gemini_call(client_to_use, base_model_name, current_prompt_func, gen_config_dict, request)
265
 
app/routes/models_api.py CHANGED
@@ -36,7 +36,7 @@ async def list_models(fastapi_request: Request, api_key: str = Depends(get_api_k
36
  suffixes = [""] # For the base model itself
37
  if not base_id.startswith("gemini-2.0"):
38
  suffixes.extend(["-search", "-encrypt", "-encrypt-full", "-auto"])
39
- if "gemini-2.5-flash" in base_id or "gemini-2.5-pro" in base_id:
40
  suffixes.extend(["-nothinking", "-max"])
41
 
42
  # Add the openai variant for all models
 
36
  suffixes = [""] # For the base model itself
37
  if not base_id.startswith("gemini-2.0"):
38
  suffixes.extend(["-search", "-encrypt", "-encrypt-full", "-auto"])
39
+ if "gemini-2.5-flash" in base_id or "gemini-2.5-pro" == base_id or "gemini-2.5-pro-preview-06-05" == base_id:
40
  suffixes.extend(["-nothinking", "-max"])
41
 
42
  # Add the openai variant for all models