Luigi commited on
Commit
23d5038
·
1 Parent(s): 6d54864

Fix: Remove duplicate return statement in get_model_info

Browse files

- Removed duplicate return statement that was causing syntax issues
- Fixed indentation of dynamic temperature display code

Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -493,6 +493,15 @@ def summarize_streaming(
493
 
494
  # Prepare system prompt with reasoning toggle for Qwen3 models
495
  model = AVAILABLE_MODELS[model_key]
 
 
 
 
 
 
 
 
 
496
  if output_language == "zh-TW":
497
  if model.get("supports_toggle"):
498
  reasoning_mode = "/think" if enable_reasoning else "/no_think"
@@ -530,7 +539,7 @@ def summarize_streaming(
530
  stream = llm.create_chat_completion(
531
  messages=messages,
532
  max_tokens=max_tokens,
533
- temperature=temperature,
534
  min_p=0.0,
535
  top_p=final_top_p,
536
  top_k=final_top_k,
 
493
 
494
  # Prepare system prompt with reasoning toggle for Qwen3 models
495
  model = AVAILABLE_MODELS[model_key]
496
+
497
+ # Calculate dynamic temperature for Qwen3 models
498
+ if model.get("supports_toggle") and "temperature_thinking" in model.get("inference_settings", {}):
499
+ if enable_reasoning:
500
+ effective_temperature = model["inference_settings"]["temperature_thinking"]
501
+ else:
502
+ effective_temperature = model["inference_settings"]["temperature_no_thinking"]
503
+ else:
504
+ effective_temperature = temperature
505
  if output_language == "zh-TW":
506
  if model.get("supports_toggle"):
507
  reasoning_mode = "/think" if enable_reasoning else "/no_think"
 
539
  stream = llm.create_chat_completion(
540
  messages=messages,
541
  max_tokens=max_tokens,
542
+ temperature=effective_temperature,
543
  min_p=0.0,
544
  top_p=final_top_p,
545
  top_k=final_top_k,