Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -34,13 +34,13 @@ def generate_and_parse(messages: list, temperature: float = 0.6,
|
|
| 34 |
and parses it into thinking and answer parts.
|
| 35 |
Decorated with @spaces.GPU for Zero GPU allocation.
|
| 36 |
"""
|
| 37 |
-
#
|
| 38 |
-
prompt_text =
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
|
| 45 |
# --- CONSOLE DEBUG OUTPUT ---
|
| 46 |
print("\n" + "="*50)
|
|
@@ -279,15 +279,6 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
|
|
| 279 |
messages_for_model.extend(model_history)
|
| 280 |
|
| 281 |
try:
|
| 282 |
-
# --- DEBUG: Print what model sees ---
|
| 283 |
-
print("\n" + "="*80)
|
| 284 |
-
print("--- MESSAGES SENT TO MODEL (model_history) ---")
|
| 285 |
-
for i, msg in enumerate(messages_for_model):
|
| 286 |
-
print(f"\n[Message {i}] Role: {msg['role']}")
|
| 287 |
-
content_preview = msg['content'][:200] + "..." if len(msg['content']) > 200 else msg['content']
|
| 288 |
-
print(f"Content: {content_preview}")
|
| 289 |
-
print("="*80 + "\n")
|
| 290 |
-
|
| 291 |
# Generate response with hyperparameters
|
| 292 |
thinking, answer = generate_and_parse(
|
| 293 |
messages_for_model,
|
|
@@ -298,11 +289,8 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
|
|
| 298 |
max_new_tokens=max_tokens
|
| 299 |
)
|
| 300 |
|
| 301 |
-
# Update model history with
|
| 302 |
-
|
| 303 |
-
model_history.append({"role": "assistant", "content": f"<think>{thinking}</think>\n{answer}"})
|
| 304 |
-
else:
|
| 305 |
-
model_history.append({"role": "assistant", "content": answer})
|
| 306 |
|
| 307 |
# Format response for display (with HTML formatting)
|
| 308 |
if thinking and thinking.strip():
|
|
|
|
| 34 |
and parses it into thinking and answer parts.
|
| 35 |
Decorated with @spaces.GPU for Zero GPU allocation.
|
| 36 |
"""
|
| 37 |
+
# Apply chat template with enable_thinking=True for Qwen3
|
| 38 |
+
prompt_text = tokenizer.apply_chat_template(
|
| 39 |
+
messages,
|
| 40 |
+
tokenize=False,
|
| 41 |
+
add_generation_prompt=True,
|
| 42 |
+
enable_thinking=True # Explicitly enable thinking mode
|
| 43 |
+
)
|
| 44 |
|
| 45 |
# --- CONSOLE DEBUG OUTPUT ---
|
| 46 |
print("\n" + "="*50)
|
|
|
|
| 279 |
messages_for_model.extend(model_history)
|
| 280 |
|
| 281 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
# Generate response with hyperparameters
|
| 283 |
thinking, answer = generate_and_parse(
|
| 284 |
messages_for_model,
|
|
|
|
| 289 |
max_new_tokens=max_tokens
|
| 290 |
)
|
| 291 |
|
| 292 |
+
# Update model history with CLEAN answer (no HTML formatting)
|
| 293 |
+
model_history.append({"role": "assistant", "content": answer})
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
# Format response for display (with HTML formatting)
|
| 296 |
if thinking and thinking.strip():
|