Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
| 7 |
# -------------------------------------------------
|
| 8 |
# Model setup (loaded once at startup)
|
| 9 |
# -------------------------------------------------
|
| 10 |
-
model_name = "
|
| 11 |
|
| 12 |
# Load model and tokenizer globally
|
| 13 |
print("Loading model and tokenizer...")
|
|
@@ -34,12 +34,12 @@ def generate_and_parse(messages: list, temperature: float = 0.6,
|
|
| 34 |
and parses it into thinking and answer parts.
|
| 35 |
Decorated with @spaces.GPU for Zero GPU allocation.
|
| 36 |
"""
|
| 37 |
-
# Apply chat template
|
| 38 |
prompt_text = tokenizer.apply_chat_template(
|
| 39 |
messages,
|
| 40 |
tokenize=False,
|
| 41 |
add_generation_prompt=True,
|
| 42 |
-
enable_thinking=
|
| 43 |
)
|
| 44 |
|
| 45 |
# --- CONSOLE DEBUG OUTPUT ---
|
|
|
|
| 7 |
# -------------------------------------------------
|
| 8 |
# Model setup (loaded once at startup)
|
| 9 |
# -------------------------------------------------
|
| 10 |
+
model_name = "CustomThinker-0-8B"
|
| 11 |
|
| 12 |
# Load model and tokenizer globally
|
| 13 |
print("Loading model and tokenizer...")
|
|
|
|
| 34 |
and parses it into thinking and answer parts.
|
| 35 |
Decorated with @spaces.GPU for Zero GPU allocation.
|
| 36 |
"""
|
| 37 |
+
# Apply chat template WITHOUT enable_thinking to preserve thinking tags in history
|
| 38 |
prompt_text = tokenizer.apply_chat_template(
|
| 39 |
messages,
|
| 40 |
tokenize=False,
|
| 41 |
add_generation_prompt=True,
|
| 42 |
+
enable_thinking=False # Changed to False to preserve <think> tags in context
|
| 43 |
)
|
| 44 |
|
| 45 |
# --- CONSOLE DEBUG OUTPUT ---
|