gr0010 commited on
Commit
3a6c17b
·
verified ·
1 Parent(s): 7ce766e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -7,7 +7,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
7
  # -------------------------------------------------
8
  # Model setup (loaded once at startup)
9
  # -------------------------------------------------
10
- model_name = "gr0010/CustomThinker-0-8B"
11
 
12
  # Load model and tokenizer globally
13
  print("Loading model and tokenizer...")
@@ -34,12 +34,12 @@ def generate_and_parse(messages: list, temperature: float = 0.6,
34
  and parses it into thinking and answer parts.
35
  Decorated with @spaces.GPU for Zero GPU allocation.
36
  """
37
- # Apply chat template with enable_thinking=True for Qwen3
38
  prompt_text = tokenizer.apply_chat_template(
39
  messages,
40
  tokenize=False,
41
  add_generation_prompt=True,
42
- enable_thinking=True # Explicitly enable thinking mode
43
  )
44
 
45
  # --- CONSOLE DEBUG OUTPUT ---
 
7
  # -------------------------------------------------
8
  # Model setup (loaded once at startup)
9
  # -------------------------------------------------
10
+ model_name = "CustomThinker-0-8B"
11
 
12
  # Load model and tokenizer globally
13
  print("Loading model and tokenizer...")
 
34
  and parses it into thinking and answer parts.
35
  Decorated with @spaces.GPU for Zero GPU allocation.
36
  """
37
+ # Apply chat template WITHOUT enable_thinking to preserve thinking tags in history
38
  prompt_text = tokenizer.apply_chat_template(
39
  messages,
40
  tokenize=False,
41
  add_generation_prompt=True,
42
+ enable_thinking=False # Changed to False to preserve <think> tags in context
43
  )
44
 
45
  # --- CONSOLE DEBUG OUTPUT ---