gr0010 commited on
Commit
d938e2c
·
verified ·
1 Parent(s): d89decd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -21
app.py CHANGED
@@ -34,13 +34,13 @@ def generate_and_parse(messages: list, temperature: float = 0.6,
34
  and parses it into thinking and answer parts.
35
  Decorated with @spaces.GPU for Zero GPU allocation.
36
  """
37
- # Build prompt manually to preserve <think> tags in context
38
- prompt_text = ""
39
- for msg in messages:
40
- role = msg["role"]
41
- content = msg["content"]
42
- prompt_text += f"<|im_start|>{role}\n{content}<|im_end|>\n"
43
- prompt_text += "<|im_start|>assistant\n"
44
 
45
  # --- CONSOLE DEBUG OUTPUT ---
46
  print("\n" + "="*50)
@@ -279,15 +279,6 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
279
  messages_for_model.extend(model_history)
280
 
281
  try:
282
- # --- DEBUG: Print what model sees ---
283
- print("\n" + "="*80)
284
- print("--- MESSAGES SENT TO MODEL (model_history) ---")
285
- for i, msg in enumerate(messages_for_model):
286
- print(f"\n[Message {i}] Role: {msg['role']}")
287
- content_preview = msg['content'][:200] + "..." if len(msg['content']) > 200 else msg['content']
288
- print(f"Content: {content_preview}")
289
- print("="*80 + "\n")
290
-
291
  # Generate response with hyperparameters
292
  thinking, answer = generate_and_parse(
293
  messages_for_model,
@@ -298,11 +289,8 @@ Think using bullet points and short sentences to simulate thoughts and emoticons
298
  max_new_tokens=max_tokens
299
  )
300
 
301
- # Update model history with thinking AND answer (CHANGED)
302
- if thinking and thinking.strip():
303
- model_history.append({"role": "assistant", "content": f"<think>{thinking}</think>\n{answer}"})
304
- else:
305
- model_history.append({"role": "assistant", "content": answer})
306
 
307
  # Format response for display (with HTML formatting)
308
  if thinking and thinking.strip():
 
34
  and parses it into thinking and answer parts.
35
  Decorated with @spaces.GPU for Zero GPU allocation.
36
  """
37
+ # Apply chat template with enable_thinking=True for Qwen3
38
+ prompt_text = tokenizer.apply_chat_template(
39
+ messages,
40
+ tokenize=False,
41
+ add_generation_prompt=True,
42
+ enable_thinking=True # Explicitly enable thinking mode
43
+ )
44
 
45
  # --- CONSOLE DEBUG OUTPUT ---
46
  print("\n" + "="*50)
 
279
  messages_for_model.extend(model_history)
280
 
281
  try:
 
 
 
 
 
 
 
 
 
282
  # Generate response with hyperparameters
283
  thinking, answer = generate_and_parse(
284
  messages_for_model,
 
289
  max_new_tokens=max_tokens
290
  )
291
 
292
+ # Update model history with CLEAN answer (no HTML formatting)
293
+ model_history.append({"role": "assistant", "content": answer})
 
 
 
294
 
295
  # Format response for display (with HTML formatting)
296
  if thinking and thinking.strip():