zsolnai commited on
Commit Β·
24befd4
1
Parent(s): 102e36f
Fix claude mistake v6
Browse files
app.py
CHANGED
|
@@ -33,11 +33,14 @@ tts_model = TTS(model_name=TTS_MODEL_NAME, progress_bar=False)
|
|
| 33 |
def chat_with_bot(message, history, chat_history_ids=None):
|
| 34 |
"""
|
| 35 |
Chat with the conversational AI model using DialoGPT.
|
|
|
|
| 36 |
Returns: (updated_history, updated_chat_ids, response_text)
|
| 37 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
if not message or not message.strip():
|
| 39 |
-
# Add an empty entry to history to maintain the structure expected by Gradio
|
| 40 |
-
history.append(("", ""))
|
| 41 |
return history, chat_history_ids, ""
|
| 42 |
|
| 43 |
try:
|
|
@@ -70,13 +73,13 @@ def chat_with_bot(message, history, chat_history_ids=None):
|
|
| 70 |
chat_history_ids[:, bot_input_ids.shape[-1] :][0], skip_special_tokens=True
|
| 71 |
)
|
| 72 |
|
| 73 |
-
#
|
| 74 |
history.append((message, response))
|
| 75 |
|
| 76 |
return history, chat_history_ids, response
|
| 77 |
|
| 78 |
except Exception as e:
|
| 79 |
-
#
|
| 80 |
history.append((message, f"Error: {e}"))
|
| 81 |
return history, chat_history_ids, f"Error: {e}"
|
| 82 |
|
|
@@ -134,7 +137,6 @@ def speech_to_text_and_chat(audio_file_path, history, chat_history_ids):
|
|
| 134 |
)
|
| 135 |
|
| 136 |
# 2. Chatbot
|
| 137 |
-
# The third returned value, last_response_text, is the pure text response.
|
| 138 |
updated_history, updated_chat_ids, last_response_text = chat_with_bot(
|
| 139 |
transcribed_text, history, chat_history_ids
|
| 140 |
)
|
|
@@ -164,30 +166,24 @@ custom_css = """
|
|
| 164 |
}
|
| 165 |
"""
|
| 166 |
|
| 167 |
-
# CRITICAL FIX: Removed css argument from gr.Blocks()
|
| 168 |
with gr.Blocks() as demo:
|
| 169 |
gr.Markdown("# π£οΈ Integrated Voice Assistant (CPU Only)")
|
| 170 |
gr.Markdown(
|
| 171 |
"**NOTE:** This app is running on CPU-only hardware. The full voice flow will be slow due to **Text-to-Speech**."
|
| 172 |
)
|
| 173 |
|
| 174 |
-
# The global chat state can be used if tabs share history, or use local states per tab
|
| 175 |
global_chat_state = gr.State(value=None)
|
| 176 |
|
| 177 |
with gr.Tabs():
|
| 178 |
|
| 179 |
-
# ---
|
| 180 |
with gr.TabItem("π£οΈ Voice Assistant"):
|
| 181 |
gr.Markdown("## Talk to the AI Assistant")
|
| 182 |
-
gr.Markdown(
|
| 183 |
-
"Speak into the microphone. Your speech will be transcribed, sent to the chatbot, and the chatbot's text response will be converted to audio."
|
| 184 |
-
)
|
| 185 |
|
| 186 |
-
# States specific to this tab
|
| 187 |
voice_chat_history = gr.Chatbot(
|
| 188 |
label="Conversation Log", elem_classes=["chatbot"], value=[]
|
| 189 |
)
|
| 190 |
-
voice_chat_state = gr.State(value=None)
|
| 191 |
|
| 192 |
with gr.Row():
|
| 193 |
audio_in = gr.Audio(
|
|
@@ -233,7 +229,7 @@ with gr.Blocks() as demo:
|
|
| 233 |
],
|
| 234 |
)
|
| 235 |
|
| 236 |
-
# ---
|
| 237 |
with gr.TabItem("π¬ Chat β Voice Output"):
|
| 238 |
gr.Markdown("## π¬ Chat with Voice Output")
|
| 239 |
|
|
@@ -261,6 +257,7 @@ with gr.Blocks() as demo:
|
|
| 261 |
def chat_and_speak(message, history, chat_ids):
|
| 262 |
"""Send message to chat and convert response to speech."""
|
| 263 |
# 1. Chatbot
|
|
|
|
| 264 |
updated_history, updated_ids, last_response = chat_with_bot(
|
| 265 |
message, history, chat_ids
|
| 266 |
)
|
|
@@ -270,7 +267,8 @@ with gr.Blocks() as demo:
|
|
| 270 |
|
| 271 |
return updated_history, updated_ids, last_response, audio_path, status
|
| 272 |
|
| 273 |
-
|
|
|
|
| 274 |
fn=chat_and_speak,
|
| 275 |
inputs=[tts_msg, tts_chatbot, tts_chat_state],
|
| 276 |
outputs=[
|
|
@@ -282,7 +280,7 @@ with gr.Blocks() as demo:
|
|
| 282 |
],
|
| 283 |
).then(lambda: "", None, tts_msg)
|
| 284 |
|
| 285 |
-
|
| 286 |
fn=chat_and_speak,
|
| 287 |
inputs=[tts_msg, tts_chatbot, tts_chat_state],
|
| 288 |
outputs=[
|
|
@@ -306,7 +304,7 @@ with gr.Blocks() as demo:
|
|
| 306 |
],
|
| 307 |
)
|
| 308 |
|
| 309 |
-
# ---
|
| 310 |
with gr.TabItem("π¬ Text Chat Only"):
|
| 311 |
gr.Markdown("## Chat with AI Assistant")
|
| 312 |
|
|
@@ -323,8 +321,9 @@ with gr.Blocks() as demo:
|
|
| 323 |
submit_btn = gr.Button("Send", variant="primary")
|
| 324 |
clear_btn = gr.Button("Clear Chat")
|
| 325 |
|
| 326 |
-
#
|
| 327 |
fn_call = msg.submit(
|
|
|
|
| 328 |
lambda message, history, chat_state: chat_with_bot(
|
| 329 |
message, history, chat_state
|
| 330 |
)[:2],
|
|
@@ -333,6 +332,7 @@ with gr.Blocks() as demo:
|
|
| 333 |
).then(lambda: "", None, msg)
|
| 334 |
|
| 335 |
submit_btn.click(
|
|
|
|
| 336 |
lambda message, history, chat_state: chat_with_bot(
|
| 337 |
message, history, chat_state
|
| 338 |
)[:2],
|
|
@@ -342,7 +342,7 @@ with gr.Blocks() as demo:
|
|
| 342 |
|
| 343 |
clear_btn.click(lambda: ([], None), None, [chatbot, global_chat_state])
|
| 344 |
|
| 345 |
-
# ---
|
| 346 |
with gr.TabItem("π Text-to-Speech Only"):
|
| 347 |
gr.Markdown("## π Text-to-Speech (TTS)")
|
| 348 |
|
|
@@ -361,5 +361,4 @@ with gr.Blocks() as demo:
|
|
| 361 |
outputs=[standalone_audio_output, standalone_tts_status],
|
| 362 |
)
|
| 363 |
|
| 364 |
-
# CRITICAL FIX: Passed css argument to demo.launch()
|
| 365 |
demo.launch(css=custom_css)
|
|
|
|
| 33 |
def chat_with_bot(message, history, chat_history_ids=None):
|
| 34 |
"""
|
| 35 |
Chat with the conversational AI model using DialoGPT.
|
| 36 |
+
CRITICAL FIX: Ensures history is a list of tuples [("user", "bot"), ...]
|
| 37 |
Returns: (updated_history, updated_chat_ids, response_text)
|
| 38 |
"""
|
| 39 |
+
# Ensure history is correctly initialized as a list of tuples
|
| 40 |
+
if history is None:
|
| 41 |
+
history = []
|
| 42 |
+
|
| 43 |
if not message or not message.strip():
|
|
|
|
|
|
|
| 44 |
return history, chat_history_ids, ""
|
| 45 |
|
| 46 |
try:
|
|
|
|
| 73 |
chat_history_ids[:, bot_input_ids.shape[-1] :][0], skip_special_tokens=True
|
| 74 |
)
|
| 75 |
|
| 76 |
+
# FIX: Append to history in the required Gradio Chatbot (list of tuples) format
|
| 77 |
history.append((message, response))
|
| 78 |
|
| 79 |
return history, chat_history_ids, response
|
| 80 |
|
| 81 |
except Exception as e:
|
| 82 |
+
# FIX: Append error to history in the required Gradio Chatbot (list of tuples) format
|
| 83 |
history.append((message, f"Error: {e}"))
|
| 84 |
return history, chat_history_ids, f"Error: {e}"
|
| 85 |
|
|
|
|
| 137 |
)
|
| 138 |
|
| 139 |
# 2. Chatbot
|
|
|
|
| 140 |
updated_history, updated_chat_ids, last_response_text = chat_with_bot(
|
| 141 |
transcribed_text, history, chat_history_ids
|
| 142 |
)
|
|
|
|
| 166 |
}
|
| 167 |
"""
|
| 168 |
|
|
|
|
| 169 |
with gr.Blocks() as demo:
|
| 170 |
gr.Markdown("# π£οΈ Integrated Voice Assistant (CPU Only)")
|
| 171 |
gr.Markdown(
|
| 172 |
"**NOTE:** This app is running on CPU-only hardware. The full voice flow will be slow due to **Text-to-Speech**."
|
| 173 |
)
|
| 174 |
|
|
|
|
| 175 |
global_chat_state = gr.State(value=None)
|
| 176 |
|
| 177 |
with gr.Tabs():
|
| 178 |
|
| 179 |
+
# --- FULL VOICE CHAT TAB (STT -> CHAT -> TTS) ---
|
| 180 |
with gr.TabItem("π£οΈ Voice Assistant"):
|
| 181 |
gr.Markdown("## Talk to the AI Assistant")
|
|
|
|
|
|
|
|
|
|
| 182 |
|
|
|
|
| 183 |
voice_chat_history = gr.Chatbot(
|
| 184 |
label="Conversation Log", elem_classes=["chatbot"], value=[]
|
| 185 |
)
|
| 186 |
+
voice_chat_state = gr.State(value=None)
|
| 187 |
|
| 188 |
with gr.Row():
|
| 189 |
audio_in = gr.Audio(
|
|
|
|
| 229 |
],
|
| 230 |
)
|
| 231 |
|
| 232 |
+
# --- CHAT -> VOICE OUTPUT TAB ---
|
| 233 |
with gr.TabItem("π¬ Chat β Voice Output"):
|
| 234 |
gr.Markdown("## π¬ Chat with Voice Output")
|
| 235 |
|
|
|
|
| 257 |
def chat_and_speak(message, history, chat_ids):
|
| 258 |
"""Send message to chat and convert response to speech."""
|
| 259 |
# 1. Chatbot
|
| 260 |
+
# The chat_with_bot returns history in the correct tuple format
|
| 261 |
updated_history, updated_ids, last_response = chat_with_bot(
|
| 262 |
message, history, chat_ids
|
| 263 |
)
|
|
|
|
| 267 |
|
| 268 |
return updated_history, updated_ids, last_response, audio_path, status
|
| 269 |
|
| 270 |
+
# --- Submit listener for the Enter key ---
|
| 271 |
+
fn_call = tts_msg.submit(
|
| 272 |
fn=chat_and_speak,
|
| 273 |
inputs=[tts_msg, tts_chatbot, tts_chat_state],
|
| 274 |
outputs=[
|
|
|
|
| 280 |
],
|
| 281 |
).then(lambda: "", None, tts_msg)
|
| 282 |
|
| 283 |
+
tts_submit_btn.click(
|
| 284 |
fn=chat_and_speak,
|
| 285 |
inputs=[tts_msg, tts_chatbot, tts_chat_state],
|
| 286 |
outputs=[
|
|
|
|
| 304 |
],
|
| 305 |
)
|
| 306 |
|
| 307 |
+
# --- TEXT CHAT ONLY TAB ---
|
| 308 |
with gr.TabItem("π¬ Text Chat Only"):
|
| 309 |
gr.Markdown("## Chat with AI Assistant")
|
| 310 |
|
|
|
|
| 321 |
submit_btn = gr.Button("Send", variant="primary")
|
| 322 |
clear_btn = gr.Button("Clear Chat")
|
| 323 |
|
| 324 |
+
# Chat functionality (uses global state, already wired for Enter key)
|
| 325 |
fn_call = msg.submit(
|
| 326 |
+
# Use slice [:2] to discard the third output (response text)
|
| 327 |
lambda message, history, chat_state: chat_with_bot(
|
| 328 |
message, history, chat_state
|
| 329 |
)[:2],
|
|
|
|
| 332 |
).then(lambda: "", None, msg)
|
| 333 |
|
| 334 |
submit_btn.click(
|
| 335 |
+
# Use slice [:2] to discard the third output (response text)
|
| 336 |
lambda message, history, chat_state: chat_with_bot(
|
| 337 |
message, history, chat_state
|
| 338 |
)[:2],
|
|
|
|
| 342 |
|
| 343 |
clear_btn.click(lambda: ([], None), None, [chatbot, global_chat_state])
|
| 344 |
|
| 345 |
+
# --- STANDALONE TTS TAB ---
|
| 346 |
with gr.TabItem("π Text-to-Speech Only"):
|
| 347 |
gr.Markdown("## π Text-to-Speech (TTS)")
|
| 348 |
|
|
|
|
| 361 |
outputs=[standalone_audio_output, standalone_tts_status],
|
| 362 |
)
|
| 363 |
|
|
|
|
| 364 |
demo.launch(css=custom_css)
|