Spaces:

ivxivx
/

HF-customer-service-chatbot

Sleeping

ivxivx commited on Jun 5

Commit

2dca75f

unverified ·

1 Parent(s): cb75d9f

chore: parse assistant message

Files changed (1) hide show

app.py CHANGED Viewed

@@ -77,20 +77,24 @@ def predict(message, history):
     inputs = tokenizer(prompt, return_tensors="pt").to(device)
     # 3. Generate response
     outputs = model.generate(**inputs, max_new_tokens=100)
-    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # print(f"Response: {response}, outputs: {outputs}")
     # Extract only the assistant's message (after the last user message)
-    # This works for most chat templates that append the assistant's reply at the end
-    if "<|im_start|>assistant" in decoded:
         response = decoded.split("<|im_start|>assistant")[-1]
-        # Remove possible end tokens or markers
         response = response.replace("<|im_end|>", "").strip()
     else:
         # Fallback: just return the decoded output
         response = decoded.strip()
     return response
 demo = gr.ChatInterface(predict, type="messages", examples=examples)

     inputs = tokenizer(prompt, return_tensors="pt").to(device)
     # 3. Generate response
     outputs = model.generate(**inputs, max_new_tokens=100)
+    # skip_special_tokens=False: we want to keep special tokens for easier parsing
+    decoded = tokenizer.decode(outputs[0], skip_special_tokens=False)
+    # print(f"decoded: {decoded}\n")
+    # print(f"outputs: {outputs}\n")
     # Extract only the assistant's message (after the last user message)
+    if "<|start_header_id|>assistant<|end_header_id|>" in decoded:
+        response = decoded.split("<|start_header_id|>assistant<|end_header_id|>")[-1]
+        response = response.replace("<|eot_id|>", "").strip()
+    elif "<|im_start|>assistant" in decoded:
+        # This works for most chat templates that append the assistant's reply at the end
         response = decoded.split("<|im_start|>assistant")[-1]
         response = response.replace("<|im_end|>", "").strip()
     else:
         # Fallback: just return the decoded output
         response = decoded.strip()
     return response
 demo = gr.ChatInterface(predict, type="messages", examples=examples)