ivxivx commited on
Commit
cb75d9f
·
unverified ·
1 Parent(s): eab7e53

chore: use gradio instead of streamlit

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. app.py +12 -1
README.md CHANGED
@@ -3,8 +3,8 @@ title: Customer Service Chatbot
3
  emoji: 🔮
4
  colorFrom: indigo
5
  colorTo: indigo
6
- sdk: streamlit
7
- sdk_version: 1.34.0
8
  app_file: app.py
9
  pinned: false
10
  ---
 
3
  emoji: 🔮
4
  colorFrom: indigo
5
  colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 5.23.0
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py CHANGED
@@ -77,9 +77,20 @@ def predict(message, history):
77
  inputs = tokenizer(prompt, return_tensors="pt").to(device)
78
  # 3. Generate response
79
  outputs = model.generate(**inputs, max_new_tokens=100)
80
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
81
 
82
  # print(f"Response: {response}, outputs: {outputs}")
 
 
 
 
 
 
 
 
 
 
 
83
  return response
84
 
85
  demo = gr.ChatInterface(predict, type="messages", examples=examples)
 
77
  inputs = tokenizer(prompt, return_tensors="pt").to(device)
78
  # 3. Generate response
79
  outputs = model.generate(**inputs, max_new_tokens=100)
80
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
81
 
82
  # print(f"Response: {response}, outputs: {outputs}")
83
+
84
+ # Extract only the assistant's message (after the last user message)
85
+ # This works for most chat templates that append the assistant's reply at the end
86
+ if "<|im_start|>assistant" in decoded:
87
+ response = decoded.split("<|im_start|>assistant")[-1]
88
+ # Remove possible end tokens or markers
89
+ response = response.replace("<|im_end|>", "").strip()
90
+ else:
91
+ # Fallback: just return the decoded output
92
+ response = decoded.strip()
93
+
94
  return response
95
 
96
  demo = gr.ChatInterface(predict, type="messages", examples=examples)