Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -65,10 +65,15 @@ def format_messages(system, history, user_text, audio_data_list=None):
|
|
| 65 |
for item in history:
|
| 66 |
# 支持 list of dicts 格式
|
| 67 |
if isinstance(item, dict) and "role" in item and "content" in item:
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
| 69 |
# 支持 Gradio ChatMessage 对象
|
| 70 |
elif hasattr(item, "role") and hasattr(item, "content"):
|
| 71 |
-
|
|
|
|
|
|
|
| 72 |
|
| 73 |
# 添加当前用户消息
|
| 74 |
if user_text and audio_data_list:
|
|
@@ -94,6 +99,10 @@ def format_messages(system, history, user_text, audio_data_list=None):
|
|
| 94 |
messages.append({"role": "user", "content": user_text})
|
| 95 |
elif audio_data_list:
|
| 96 |
content = []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
for audio_data in audio_data_list:
|
| 98 |
content.append({
|
| 99 |
"type": "input_audio",
|
|
@@ -102,10 +111,6 @@ def format_messages(system, history, user_text, audio_data_list=None):
|
|
| 102 |
"format": "wav"
|
| 103 |
}
|
| 104 |
})
|
| 105 |
-
messages.append({
|
| 106 |
-
"role": "user",
|
| 107 |
-
"content": content
|
| 108 |
-
})
|
| 109 |
|
| 110 |
return messages
|
| 111 |
|
|
@@ -116,7 +121,8 @@ def chat(system_prompt, user_text, audio_file, history, max_tokens, temperature,
|
|
| 116 |
model_name = MODEL_NAME
|
| 117 |
|
| 118 |
if not user_text and not audio_file:
|
| 119 |
-
|
|
|
|
| 120 |
|
| 121 |
# Ensure history is a list and formatted correctly
|
| 122 |
history = history or []
|
|
@@ -136,7 +142,8 @@ def chat(system_prompt, user_text, audio_file, history, max_tokens, temperature,
|
|
| 136 |
|
| 137 |
messages = format_messages(system_prompt, history, user_text, audio_data_list)
|
| 138 |
if not messages:
|
| 139 |
-
|
|
|
|
| 140 |
|
| 141 |
# Debug: Print message format
|
| 142 |
print(f"[DEBUG] Messages to API: {json.dumps(messages, ensure_ascii=False, indent=2)}")
|
|
@@ -144,6 +151,27 @@ def chat(system_prompt, user_text, audio_file, history, max_tokens, temperature,
|
|
| 144 |
for i, msg in enumerate(messages):
|
| 145 |
print(f"[DEBUG] Message {i}: {type(msg)} - {msg}")
|
| 146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
try:
|
| 148 |
with httpx.Client(base_url=API_BASE_URL, timeout=120) as client:
|
| 149 |
response = client.post("/chat/completions", json={
|
|
@@ -165,10 +193,13 @@ def chat(system_prompt, user_text, audio_file, history, max_tokens, temperature,
|
|
| 165 |
error_msg += " - Bad request"
|
| 166 |
elif response.status_code == 500:
|
| 167 |
error_msg += " - Model error"
|
| 168 |
-
|
|
|
|
| 169 |
|
| 170 |
# Process streaming response
|
| 171 |
-
|
|
|
|
|
|
|
| 172 |
for line in response.iter_lines():
|
| 173 |
if not line:
|
| 174 |
continue
|
|
@@ -187,66 +218,45 @@ def chat(system_prompt, user_text, audio_file, history, max_tokens, temperature,
|
|
| 187 |
if 'choices' in data and len(data['choices']) > 0:
|
| 188 |
delta = data['choices'][0].get('delta', {})
|
| 189 |
if 'content' in delta:
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
except json.JSONDecodeError:
|
| 192 |
continue
|
| 193 |
|
| 194 |
-
full_content = ''.join(content_parts)
|
| 195 |
-
|
| 196 |
-
# Update history - only add when no error
|
| 197 |
-
history = history or []
|
| 198 |
-
|
| 199 |
-
# Add user message
|
| 200 |
-
if audio_file:
|
| 201 |
-
# If audio exists, show audio file and text (if any)
|
| 202 |
-
# Gradio Chatbot supports tuple (file_path,) to show file
|
| 203 |
-
# But in messages format, we need to construct proper content
|
| 204 |
-
# Here we use tuple format to let Gradio render audio player, or use HTML
|
| 205 |
-
# Simpler way: if multimodal, add messages separately
|
| 206 |
-
|
| 207 |
-
# 1. Add audio message
|
| 208 |
-
history.append({"role": "user", "content": gr.Audio(audio_file)})
|
| 209 |
-
|
| 210 |
-
# 2. If text exists, add text message
|
| 211 |
-
if user_text:
|
| 212 |
-
history.append({"role": "user", "content": user_text})
|
| 213 |
-
else:
|
| 214 |
-
# Text only
|
| 215 |
-
history.append({"role": "user", "content": user_text})
|
| 216 |
-
|
| 217 |
-
# Split think and content
|
| 218 |
-
if "</think>" in full_content:
|
| 219 |
-
parts = full_content.split("</think>", 1)
|
| 220 |
-
think_content = parts[0].strip()
|
| 221 |
-
response_content = parts[1].strip()
|
| 222 |
-
|
| 223 |
-
# Remove possible start tag
|
| 224 |
-
if think_content.startswith("<think>"):
|
| 225 |
-
think_content = think_content[len("<think>"):].strip()
|
| 226 |
-
|
| 227 |
-
# Add thinking process message (use ChatMessage and metadata)
|
| 228 |
-
if think_content:
|
| 229 |
-
history.append(gr.ChatMessage(
|
| 230 |
-
role="assistant",
|
| 231 |
-
content=think_content,
|
| 232 |
-
metadata={"title": "⏳ Thinking Process"}
|
| 233 |
-
))
|
| 234 |
-
|
| 235 |
-
# Add formal response message
|
| 236 |
-
if response_content:
|
| 237 |
-
history.append({"role": "assistant", "content": response_content})
|
| 238 |
-
else:
|
| 239 |
-
# No think tag, add full response directly
|
| 240 |
-
assistant_text = full_content.strip()
|
| 241 |
-
if assistant_text:
|
| 242 |
-
history.append({"role": "assistant", "content": assistant_text})
|
| 243 |
-
|
| 244 |
-
return history, ""
|
| 245 |
-
|
| 246 |
except httpx.ConnectError:
|
| 247 |
-
|
| 248 |
except Exception as e:
|
| 249 |
-
|
| 250 |
|
| 251 |
# Gradio Interface
|
| 252 |
with gr.Blocks(title="Step Audio R1") as demo:
|
|
|
|
| 65 |
for item in history:
|
| 66 |
# 支持 list of dicts 格式
|
| 67 |
if isinstance(item, dict) and "role" in item and "content" in item:
|
| 68 |
+
# Filter out non-serializable content (e.g. gr.Audio components)
|
| 69 |
+
content = item["content"]
|
| 70 |
+
if isinstance(content, (str, list, dict)):
|
| 71 |
+
messages.append(item)
|
| 72 |
# 支持 Gradio ChatMessage 对象
|
| 73 |
elif hasattr(item, "role") and hasattr(item, "content"):
|
| 74 |
+
content = item.content
|
| 75 |
+
if isinstance(content, (str, list, dict)):
|
| 76 |
+
messages.append({"role": item.role, "content": content})
|
| 77 |
|
| 78 |
# 添加当前用户消息
|
| 79 |
if user_text and audio_data_list:
|
|
|
|
| 99 |
messages.append({"role": "user", "content": user_text})
|
| 100 |
elif audio_data_list:
|
| 101 |
content = []
|
| 102 |
+
messages.append({
|
| 103 |
+
"role": "user",
|
| 104 |
+
"content": content
|
| 105 |
+
})
|
| 106 |
for audio_data in audio_data_list:
|
| 107 |
content.append({
|
| 108 |
"type": "input_audio",
|
|
|
|
| 111 |
"format": "wav"
|
| 112 |
}
|
| 113 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
return messages
|
| 116 |
|
|
|
|
| 121 |
model_name = MODEL_NAME
|
| 122 |
|
| 123 |
if not user_text and not audio_file:
|
| 124 |
+
yield history or [], "Please enter text or upload audio"
|
| 125 |
+
return
|
| 126 |
|
| 127 |
# Ensure history is a list and formatted correctly
|
| 128 |
history = history or []
|
|
|
|
| 142 |
|
| 143 |
messages = format_messages(system_prompt, history, user_text, audio_data_list)
|
| 144 |
if not messages:
|
| 145 |
+
yield history or [], "Invalid input"
|
| 146 |
+
return
|
| 147 |
|
| 148 |
# Debug: Print message format
|
| 149 |
print(f"[DEBUG] Messages to API: {json.dumps(messages, ensure_ascii=False, indent=2)}")
|
|
|
|
| 151 |
for i, msg in enumerate(messages):
|
| 152 |
print(f"[DEBUG] Message {i}: {type(msg)} - {msg}")
|
| 153 |
|
| 154 |
+
# Update history with user message immediately
|
| 155 |
+
if audio_file:
|
| 156 |
+
# 1. Add audio message
|
| 157 |
+
history.append({"role": "user", "content": gr.Audio(audio_file)})
|
| 158 |
+
|
| 159 |
+
# 2. If text exists, add text message
|
| 160 |
+
if user_text:
|
| 161 |
+
history.append({"role": "user", "content": user_text})
|
| 162 |
+
else:
|
| 163 |
+
# Text only
|
| 164 |
+
history.append({"role": "user", "content": user_text})
|
| 165 |
+
|
| 166 |
+
# Add thinking placeholder
|
| 167 |
+
history.append(gr.ChatMessage(
|
| 168 |
+
role="assistant",
|
| 169 |
+
content="",
|
| 170 |
+
metadata={"title": "⏳ Thinking Process"}
|
| 171 |
+
))
|
| 172 |
+
|
| 173 |
+
yield history, "Generating..."
|
| 174 |
+
|
| 175 |
try:
|
| 176 |
with httpx.Client(base_url=API_BASE_URL, timeout=120) as client:
|
| 177 |
response = client.post("/chat/completions", json={
|
|
|
|
| 193 |
error_msg += " - Bad request"
|
| 194 |
elif response.status_code == 500:
|
| 195 |
error_msg += " - Model error"
|
| 196 |
+
yield history, error_msg
|
| 197 |
+
return
|
| 198 |
|
| 199 |
# Process streaming response
|
| 200 |
+
buffer = ""
|
| 201 |
+
is_thinking = True
|
| 202 |
+
|
| 203 |
for line in response.iter_lines():
|
| 204 |
if not line:
|
| 205 |
continue
|
|
|
|
| 218 |
if 'choices' in data and len(data['choices']) > 0:
|
| 219 |
delta = data['choices'][0].get('delta', {})
|
| 220 |
if 'content' in delta:
|
| 221 |
+
content = delta['content']
|
| 222 |
+
buffer += content
|
| 223 |
+
|
| 224 |
+
if is_thinking:
|
| 225 |
+
if "</think>" in buffer:
|
| 226 |
+
is_thinking = False
|
| 227 |
+
parts = buffer.split("</think>", 1)
|
| 228 |
+
think_content = parts[0]
|
| 229 |
+
response_content = parts[1]
|
| 230 |
+
|
| 231 |
+
if think_content.startswith("<think>"):
|
| 232 |
+
think_content = think_content[len("<think>"):].strip()
|
| 233 |
+
|
| 234 |
+
# Update thinking message
|
| 235 |
+
history[-1].content = think_content
|
| 236 |
+
|
| 237 |
+
# Add response message
|
| 238 |
+
history.append({"role": "assistant", "content": response_content})
|
| 239 |
+
else:
|
| 240 |
+
# Update thinking message
|
| 241 |
+
current_think = buffer
|
| 242 |
+
if current_think.startswith("<think>"):
|
| 243 |
+
current_think = current_think[len("<think>"):]
|
| 244 |
+
history[-1].content = current_think
|
| 245 |
+
else:
|
| 246 |
+
# Already split, just update response message
|
| 247 |
+
parts = buffer.split("</think>", 1)
|
| 248 |
+
response_content = parts[1]
|
| 249 |
+
history[-1]["content"] = response_content
|
| 250 |
+
|
| 251 |
+
yield history, ""
|
| 252 |
+
|
| 253 |
except json.JSONDecodeError:
|
| 254 |
continue
|
| 255 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
except httpx.ConnectError:
|
| 257 |
+
yield history, "❌ Cannot connect to vLLM API"
|
| 258 |
except Exception as e:
|
| 259 |
+
yield history, f"❌ Error: {str(e)}"
|
| 260 |
|
| 261 |
# Gradio Interface
|
| 262 |
with gr.Blocks(title="Step Audio R1") as demo:
|