Spaces:

gr0010
/

CustomThinker-Demo

Running on Zero

gr0010 commited on 25 days ago

Commit

3a6c17b

verified ·

1 Parent(s): 7ce766e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 # -------------------------------------------------
 # Model setup (loaded once at startup)
 # -------------------------------------------------
-model_name = "gr0010/CustomThinker-0-8B"
 # Load model and tokenizer globally
 print("Loading model and tokenizer...")
@@ -34,12 +34,12 @@ def generate_and_parse(messages: list, temperature: float = 0.6,
     and parses it into thinking and answer parts.
     Decorated with @spaces.GPU for Zero GPU allocation.
     """
-    # Apply chat template with enable_thinking=True for Qwen3
     prompt_text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True,
-        enable_thinking=True  # Explicitly enable thinking mode
     )
     # --- CONSOLE DEBUG OUTPUT ---

 # -------------------------------------------------
 # Model setup (loaded once at startup)
 # -------------------------------------------------
+model_name = "CustomThinker-0-8B"
 # Load model and tokenizer globally
 print("Loading model and tokenizer...")
     and parses it into thinking and answer parts.
     Decorated with @spaces.GPU for Zero GPU allocation.
     """
+    # Apply chat template WITHOUT enable_thinking to preserve thinking tags in history
     prompt_text = tokenizer.apply_chat_template(
         messages,
         tokenize=False,
         add_generation_prompt=True,
+        enable_thinking=False  # Changed to False to preserve <think> tags in context
     )
     # --- CONSOLE DEBUG OUTPUT ---