moevis commited on
Commit
10a6457
·
verified ·
1 Parent(s): 9b74786

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -65
app.py CHANGED
@@ -65,10 +65,15 @@ def format_messages(system, history, user_text, audio_data_list=None):
65
  for item in history:
66
  # 支持 list of dicts 格式
67
  if isinstance(item, dict) and "role" in item and "content" in item:
68
- messages.append(item)
 
 
 
69
  # 支持 Gradio ChatMessage 对象
70
  elif hasattr(item, "role") and hasattr(item, "content"):
71
- messages.append({"role": item.role, "content": item.content})
 
 
72
 
73
  # 添加当前用户消息
74
  if user_text and audio_data_list:
@@ -94,6 +99,10 @@ def format_messages(system, history, user_text, audio_data_list=None):
94
  messages.append({"role": "user", "content": user_text})
95
  elif audio_data_list:
96
  content = []
 
 
 
 
97
  for audio_data in audio_data_list:
98
  content.append({
99
  "type": "input_audio",
@@ -102,10 +111,6 @@ def format_messages(system, history, user_text, audio_data_list=None):
102
  "format": "wav"
103
  }
104
  })
105
- messages.append({
106
- "role": "user",
107
- "content": content
108
- })
109
 
110
  return messages
111
 
@@ -116,7 +121,8 @@ def chat(system_prompt, user_text, audio_file, history, max_tokens, temperature,
116
  model_name = MODEL_NAME
117
 
118
  if not user_text and not audio_file:
119
- return history or [], "Please enter text or upload audio"
 
120
 
121
  # Ensure history is a list and formatted correctly
122
  history = history or []
@@ -136,7 +142,8 @@ def chat(system_prompt, user_text, audio_file, history, max_tokens, temperature,
136
 
137
  messages = format_messages(system_prompt, history, user_text, audio_data_list)
138
  if not messages:
139
- return history or [], "Invalid input"
 
140
 
141
  # Debug: Print message format
142
  print(f"[DEBUG] Messages to API: {json.dumps(messages, ensure_ascii=False, indent=2)}")
@@ -144,6 +151,27 @@ def chat(system_prompt, user_text, audio_file, history, max_tokens, temperature,
144
  for i, msg in enumerate(messages):
145
  print(f"[DEBUG] Message {i}: {type(msg)} - {msg}")
146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  try:
148
  with httpx.Client(base_url=API_BASE_URL, timeout=120) as client:
149
  response = client.post("/chat/completions", json={
@@ -165,10 +193,13 @@ def chat(system_prompt, user_text, audio_file, history, max_tokens, temperature,
165
  error_msg += " - Bad request"
166
  elif response.status_code == 500:
167
  error_msg += " - Model error"
168
- return history, error_msg
 
169
 
170
  # Process streaming response
171
- content_parts = []
 
 
172
  for line in response.iter_lines():
173
  if not line:
174
  continue
@@ -187,66 +218,45 @@ def chat(system_prompt, user_text, audio_file, history, max_tokens, temperature,
187
  if 'choices' in data and len(data['choices']) > 0:
188
  delta = data['choices'][0].get('delta', {})
189
  if 'content' in delta:
190
- content_parts.append(delta['content'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  except json.JSONDecodeError:
192
  continue
193
 
194
- full_content = ''.join(content_parts)
195
-
196
- # Update history - only add when no error
197
- history = history or []
198
-
199
- # Add user message
200
- if audio_file:
201
- # If audio exists, show audio file and text (if any)
202
- # Gradio Chatbot supports tuple (file_path,) to show file
203
- # But in messages format, we need to construct proper content
204
- # Here we use tuple format to let Gradio render audio player, or use HTML
205
- # Simpler way: if multimodal, add messages separately
206
-
207
- # 1. Add audio message
208
- history.append({"role": "user", "content": gr.Audio(audio_file)})
209
-
210
- # 2. If text exists, add text message
211
- if user_text:
212
- history.append({"role": "user", "content": user_text})
213
- else:
214
- # Text only
215
- history.append({"role": "user", "content": user_text})
216
-
217
- # Split think and content
218
- if "</think>" in full_content:
219
- parts = full_content.split("</think>", 1)
220
- think_content = parts[0].strip()
221
- response_content = parts[1].strip()
222
-
223
- # Remove possible start tag
224
- if think_content.startswith("<think>"):
225
- think_content = think_content[len("<think>"):].strip()
226
-
227
- # Add thinking process message (use ChatMessage and metadata)
228
- if think_content:
229
- history.append(gr.ChatMessage(
230
- role="assistant",
231
- content=think_content,
232
- metadata={"title": "⏳ Thinking Process"}
233
- ))
234
-
235
- # Add formal response message
236
- if response_content:
237
- history.append({"role": "assistant", "content": response_content})
238
- else:
239
- # No think tag, add full response directly
240
- assistant_text = full_content.strip()
241
- if assistant_text:
242
- history.append({"role": "assistant", "content": assistant_text})
243
-
244
- return history, ""
245
-
246
  except httpx.ConnectError:
247
- return history, "❌ Cannot connect to vLLM API"
248
  except Exception as e:
249
- return history, f"❌ Error: {str(e)}"
250
 
251
  # Gradio Interface
252
  with gr.Blocks(title="Step Audio R1") as demo:
 
65
  for item in history:
66
  # 支持 list of dicts 格式
67
  if isinstance(item, dict) and "role" in item and "content" in item:
68
+ # Filter out non-serializable content (e.g. gr.Audio components)
69
+ content = item["content"]
70
+ if isinstance(content, (str, list, dict)):
71
+ messages.append(item)
72
  # 支持 Gradio ChatMessage 对象
73
  elif hasattr(item, "role") and hasattr(item, "content"):
74
+ content = item.content
75
+ if isinstance(content, (str, list, dict)):
76
+ messages.append({"role": item.role, "content": content})
77
 
78
  # 添加当前用户消息
79
  if user_text and audio_data_list:
 
99
  messages.append({"role": "user", "content": user_text})
100
  elif audio_data_list:
101
  content = []
102
+ messages.append({
103
+ "role": "user",
104
+ "content": content
105
+ })
106
  for audio_data in audio_data_list:
107
  content.append({
108
  "type": "input_audio",
 
111
  "format": "wav"
112
  }
113
  })
 
 
 
 
114
 
115
  return messages
116
 
 
121
  model_name = MODEL_NAME
122
 
123
  if not user_text and not audio_file:
124
+ yield history or [], "Please enter text or upload audio"
125
+ return
126
 
127
  # Ensure history is a list and formatted correctly
128
  history = history or []
 
142
 
143
  messages = format_messages(system_prompt, history, user_text, audio_data_list)
144
  if not messages:
145
+ yield history or [], "Invalid input"
146
+ return
147
 
148
  # Debug: Print message format
149
  print(f"[DEBUG] Messages to API: {json.dumps(messages, ensure_ascii=False, indent=2)}")
 
151
  for i, msg in enumerate(messages):
152
  print(f"[DEBUG] Message {i}: {type(msg)} - {msg}")
153
 
154
+ # Update history with user message immediately
155
+ if audio_file:
156
+ # 1. Add audio message
157
+ history.append({"role": "user", "content": gr.Audio(audio_file)})
158
+
159
+ # 2. If text exists, add text message
160
+ if user_text:
161
+ history.append({"role": "user", "content": user_text})
162
+ else:
163
+ # Text only
164
+ history.append({"role": "user", "content": user_text})
165
+
166
+ # Add thinking placeholder
167
+ history.append(gr.ChatMessage(
168
+ role="assistant",
169
+ content="",
170
+ metadata={"title": "⏳ Thinking Process"}
171
+ ))
172
+
173
+ yield history, "Generating..."
174
+
175
  try:
176
  with httpx.Client(base_url=API_BASE_URL, timeout=120) as client:
177
  response = client.post("/chat/completions", json={
 
193
  error_msg += " - Bad request"
194
  elif response.status_code == 500:
195
  error_msg += " - Model error"
196
+ yield history, error_msg
197
+ return
198
 
199
  # Process streaming response
200
+ buffer = ""
201
+ is_thinking = True
202
+
203
  for line in response.iter_lines():
204
  if not line:
205
  continue
 
218
  if 'choices' in data and len(data['choices']) > 0:
219
  delta = data['choices'][0].get('delta', {})
220
  if 'content' in delta:
221
+ content = delta['content']
222
+ buffer += content
223
+
224
+ if is_thinking:
225
+ if "</think>" in buffer:
226
+ is_thinking = False
227
+ parts = buffer.split("</think>", 1)
228
+ think_content = parts[0]
229
+ response_content = parts[1]
230
+
231
+ if think_content.startswith("<think>"):
232
+ think_content = think_content[len("<think>"):].strip()
233
+
234
+ # Update thinking message
235
+ history[-1].content = think_content
236
+
237
+ # Add response message
238
+ history.append({"role": "assistant", "content": response_content})
239
+ else:
240
+ # Update thinking message
241
+ current_think = buffer
242
+ if current_think.startswith("<think>"):
243
+ current_think = current_think[len("<think>"):]
244
+ history[-1].content = current_think
245
+ else:
246
+ # Already split, just update response message
247
+ parts = buffer.split("</think>", 1)
248
+ response_content = parts[1]
249
+ history[-1]["content"] = response_content
250
+
251
+ yield history, ""
252
+
253
  except json.JSONDecodeError:
254
  continue
255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  except httpx.ConnectError:
257
+ yield history, "❌ Cannot connect to vLLM API"
258
  except Exception as e:
259
+ yield history, f"❌ Error: {str(e)}"
260
 
261
  # Gradio Interface
262
  with gr.Blocks(title="Step Audio R1") as demo: