Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,10 +19,15 @@ print("System Ready.")
|
|
| 19 |
# The core instructions for the AI
|
| 20 |
SYSTEM_PROMPT = {"role": "system", "content": "You are a professional, intelligent AI assistant demonstrating a low-latency voice architecture. Provide concise, highly accurate, and polite responses."}
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# 2. Main Processing Logic
|
| 23 |
-
def process_voice_conversation(audio_path,
|
| 24 |
if not audio_path:
|
| 25 |
-
return
|
| 26 |
|
| 27 |
try:
|
| 28 |
# Step A: Speech-to-Text
|
|
@@ -30,9 +35,9 @@ def process_voice_conversation(audio_path, chat_history, llm_state):
|
|
| 30 |
user_text = transcription["text"].strip()
|
| 31 |
|
| 32 |
if not user_text:
|
| 33 |
-
return
|
| 34 |
|
| 35 |
-
# Add
|
| 36 |
llm_state.append({"role": "user", "content": user_text})
|
| 37 |
|
| 38 |
# Step B: LLM Processing via Groq
|
|
@@ -42,25 +47,21 @@ def process_voice_conversation(audio_path, chat_history, llm_state):
|
|
| 42 |
)
|
| 43 |
ai_text = chat_completion.choices[0].message.content
|
| 44 |
|
| 45 |
-
# Add response to
|
| 46 |
llm_state.append({"role": "assistant", "content": ai_text})
|
| 47 |
|
| 48 |
-
# Add the conversation pair to the UI Chatbot
|
| 49 |
-
chat_history.append((user_text, ai_text))
|
| 50 |
-
|
| 51 |
# Step C: Text-to-Speech
|
| 52 |
tts = gTTS(text=ai_text, lang='en', slow=False)
|
| 53 |
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
| 54 |
tts.save(temp_audio.name)
|
| 55 |
|
| 56 |
-
# Return
|
| 57 |
-
return
|
| 58 |
|
| 59 |
except Exception as e:
|
| 60 |
error_msg = f"System Error: {str(e)}"
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
return chat_history, llm_state, None, None
|
| 64 |
|
| 65 |
# Function to completely wipe the session memory and UI
|
| 66 |
def reset_conversation():
|
|
@@ -93,16 +94,16 @@ with gr.Blocks(title="VocaFree AI - Research Prototype", theme=custom_theme) as
|
|
| 93 |
with gr.Tab("🎙️ Live Interaction"):
|
| 94 |
with gr.Row():
|
| 95 |
with gr.Column(scale=2):
|
|
|
|
| 96 |
chatbot = gr.Chatbot(
|
| 97 |
label="Conversation Transcript",
|
| 98 |
height=450,
|
| 99 |
-
avatar_images=(None, "⚙️")
|
| 100 |
)
|
| 101 |
|
| 102 |
with gr.Column(scale=1):
|
| 103 |
gr.Markdown("### Input / Output Controls")
|
| 104 |
|
| 105 |
-
# FIX: Removed the waveform_options argument entirely to ensure perfect compatibility
|
| 106 |
audio_input = gr.Audio(
|
| 107 |
sources=["microphone"],
|
| 108 |
type="filepath",
|
|
@@ -145,14 +146,14 @@ with gr.Blocks(title="VocaFree AI - Research Prototype", theme=custom_theme) as
|
|
| 145 |
"""
|
| 146 |
)
|
| 147 |
|
| 148 |
-
# Event Wiring:
|
| 149 |
submit_btn.click(
|
| 150 |
fn=process_voice_conversation,
|
| 151 |
-
inputs=[audio_input,
|
| 152 |
outputs=[chatbot, llm_state, audio_output, audio_input]
|
| 153 |
)
|
| 154 |
|
| 155 |
-
# Event Wiring: Clear Session
|
| 156 |
clear_btn.click(
|
| 157 |
fn=reset_conversation,
|
| 158 |
inputs=[],
|
|
@@ -160,4 +161,5 @@ with gr.Blocks(title="VocaFree AI - Research Prototype", theme=custom_theme) as
|
|
| 160 |
)
|
| 161 |
|
| 162 |
if __name__ == "__main__":
|
|
|
|
| 163 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 19 |
# The core instructions for the AI
|
| 20 |
SYSTEM_PROMPT = {"role": "system", "content": "You are a professional, intelligent AI assistant demonstrating a low-latency voice architecture. Provide concise, highly accurate, and polite responses."}
|
| 21 |
|
| 22 |
+
# Helper function to format the memory for the new Gradio UI
|
| 23 |
+
def get_ui_chat(state):
|
| 24 |
+
# Returns all messages except the hidden system prompt
|
| 25 |
+
return [msg for msg in state if msg["role"] != "system"]
|
| 26 |
+
|
| 27 |
# 2. Main Processing Logic
|
| 28 |
+
def process_voice_conversation(audio_path, llm_state):
|
| 29 |
if not audio_path:
|
| 30 |
+
return get_ui_chat(llm_state), llm_state, None, None
|
| 31 |
|
| 32 |
try:
|
| 33 |
# Step A: Speech-to-Text
|
|
|
|
| 35 |
user_text = transcription["text"].strip()
|
| 36 |
|
| 37 |
if not user_text:
|
| 38 |
+
return get_ui_chat(llm_state), llm_state, None, None
|
| 39 |
|
| 40 |
+
# Add user prompt to internal memory
|
| 41 |
llm_state.append({"role": "user", "content": user_text})
|
| 42 |
|
| 43 |
# Step B: LLM Processing via Groq
|
|
|
|
| 47 |
)
|
| 48 |
ai_text = chat_completion.choices[0].message.content
|
| 49 |
|
| 50 |
+
# Add AI response to internal memory
|
| 51 |
llm_state.append({"role": "assistant", "content": ai_text})
|
| 52 |
|
|
|
|
|
|
|
|
|
|
| 53 |
# Step C: Text-to-Speech
|
| 54 |
tts = gTTS(text=ai_text, lang='en', slow=False)
|
| 55 |
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
| 56 |
tts.save(temp_audio.name)
|
| 57 |
|
| 58 |
+
# Return the strictly formatted dict list, memory state, output audio, and clear input
|
| 59 |
+
return get_ui_chat(llm_state), llm_state, temp_audio.name, None
|
| 60 |
|
| 61 |
except Exception as e:
|
| 62 |
error_msg = f"System Error: {str(e)}"
|
| 63 |
+
llm_state.append({"role": "assistant", "content": error_msg})
|
| 64 |
+
return get_ui_chat(llm_state), llm_state, None, None
|
|
|
|
| 65 |
|
| 66 |
# Function to completely wipe the session memory and UI
|
| 67 |
def reset_conversation():
|
|
|
|
| 94 |
with gr.Tab("🎙️ Live Interaction"):
|
| 95 |
with gr.Row():
|
| 96 |
with gr.Column(scale=2):
|
| 97 |
+
# Chatbot component specifically ready for dict-format
|
| 98 |
chatbot = gr.Chatbot(
|
| 99 |
label="Conversation Transcript",
|
| 100 |
height=450,
|
| 101 |
+
avatar_images=(None, "⚙️")
|
| 102 |
)
|
| 103 |
|
| 104 |
with gr.Column(scale=1):
|
| 105 |
gr.Markdown("### Input / Output Controls")
|
| 106 |
|
|
|
|
| 107 |
audio_input = gr.Audio(
|
| 108 |
sources=["microphone"],
|
| 109 |
type="filepath",
|
|
|
|
| 146 |
"""
|
| 147 |
)
|
| 148 |
|
| 149 |
+
# Event Wiring: Notice how we derive the UI Chatbot purely from the llm_state now
|
| 150 |
submit_btn.click(
|
| 151 |
fn=process_voice_conversation,
|
| 152 |
+
inputs=[audio_input, llm_state],
|
| 153 |
outputs=[chatbot, llm_state, audio_output, audio_input]
|
| 154 |
)
|
| 155 |
|
| 156 |
+
# Event Wiring: Clear Session
|
| 157 |
clear_btn.click(
|
| 158 |
fn=reset_conversation,
|
| 159 |
inputs=[],
|
|
|
|
| 161 |
)
|
| 162 |
|
| 163 |
if __name__ == "__main__":
|
| 164 |
+
# 0.0.0.0 binds to all interfaces, required for Docker/Hugging Face
|
| 165 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|