MalikShehram commited on
Commit
d5ea916
·
verified ·
1 Parent(s): a3083d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -18
app.py CHANGED
@@ -19,10 +19,15 @@ print("System Ready.")
19
  # The core instructions for the AI
20
  SYSTEM_PROMPT = {"role": "system", "content": "You are a professional, intelligent AI assistant demonstrating a low-latency voice architecture. Provide concise, highly accurate, and polite responses."}
21
 
 
 
 
 
 
22
  # 2. Main Processing Logic
23
- def process_voice_conversation(audio_path, chat_history, llm_state):
24
  if not audio_path:
25
- return chat_history, llm_state, None, None
26
 
27
  try:
28
  # Step A: Speech-to-Text
@@ -30,9 +35,9 @@ def process_voice_conversation(audio_path, chat_history, llm_state):
30
  user_text = transcription["text"].strip()
31
 
32
  if not user_text:
33
- return chat_history, llm_state, None, None
34
 
35
- # Add to AI's internal memory
36
  llm_state.append({"role": "user", "content": user_text})
37
 
38
  # Step B: LLM Processing via Groq
@@ -42,25 +47,21 @@ def process_voice_conversation(audio_path, chat_history, llm_state):
42
  )
43
  ai_text = chat_completion.choices[0].message.content
44
 
45
- # Add response to AI's internal memory
46
  llm_state.append({"role": "assistant", "content": ai_text})
47
 
48
- # Add the conversation pair to the UI Chatbot
49
- chat_history.append((user_text, ai_text))
50
-
51
  # Step C: Text-to-Speech
52
  tts = gTTS(text=ai_text, lang='en', slow=False)
53
  temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
54
  tts.save(temp_audio.name)
55
 
56
- # Return UI history, Memory state, Output Audio, and clear Input Audio
57
- return chat_history, llm_state, temp_audio.name, None
58
 
59
  except Exception as e:
60
  error_msg = f"System Error: {str(e)}"
61
- # Display the error in the chat interface safely
62
- chat_history.append(("Audio processed...", error_msg))
63
- return chat_history, llm_state, None, None
64
 
65
  # Function to completely wipe the session memory and UI
66
  def reset_conversation():
@@ -93,16 +94,16 @@ with gr.Blocks(title="VocaFree AI - Research Prototype", theme=custom_theme) as
93
  with gr.Tab("🎙️ Live Interaction"):
94
  with gr.Row():
95
  with gr.Column(scale=2):
 
96
  chatbot = gr.Chatbot(
97
  label="Conversation Transcript",
98
  height=450,
99
- avatar_images=(None, "⚙️") # Professional gear icon for the AI
100
  )
101
 
102
  with gr.Column(scale=1):
103
  gr.Markdown("### Input / Output Controls")
104
 
105
- # FIX: Removed the waveform_options argument entirely to ensure perfect compatibility
106
  audio_input = gr.Audio(
107
  sources=["microphone"],
108
  type="filepath",
@@ -145,14 +146,14 @@ with gr.Blocks(title="VocaFree AI - Research Prototype", theme=custom_theme) as
145
  """
146
  )
147
 
148
- # Event Wiring: Submit Audio
149
  submit_btn.click(
150
  fn=process_voice_conversation,
151
- inputs=[audio_input, chatbot, llm_state],
152
  outputs=[chatbot, llm_state, audio_output, audio_input]
153
  )
154
 
155
- # Event Wiring: Clear Session (Wipes UI and AI Memory)
156
  clear_btn.click(
157
  fn=reset_conversation,
158
  inputs=[],
@@ -160,4 +161,5 @@ with gr.Blocks(title="VocaFree AI - Research Prototype", theme=custom_theme) as
160
  )
161
 
162
  if __name__ == "__main__":
 
163
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
19
  # The core instructions for the AI
20
  SYSTEM_PROMPT = {"role": "system", "content": "You are a professional, intelligent AI assistant demonstrating a low-latency voice architecture. Provide concise, highly accurate, and polite responses."}
21
 
22
+ # Helper function to format the memory for the new Gradio UI
23
+ def get_ui_chat(state):
24
+ # Returns all messages except the hidden system prompt
25
+ return [msg for msg in state if msg["role"] != "system"]
26
+
27
  # 2. Main Processing Logic
28
+ def process_voice_conversation(audio_path, llm_state):
29
  if not audio_path:
30
+ return get_ui_chat(llm_state), llm_state, None, None
31
 
32
  try:
33
  # Step A: Speech-to-Text
 
35
  user_text = transcription["text"].strip()
36
 
37
  if not user_text:
38
+ return get_ui_chat(llm_state), llm_state, None, None
39
 
40
+ # Add user prompt to internal memory
41
  llm_state.append({"role": "user", "content": user_text})
42
 
43
  # Step B: LLM Processing via Groq
 
47
  )
48
  ai_text = chat_completion.choices[0].message.content
49
 
50
+ # Add AI response to internal memory
51
  llm_state.append({"role": "assistant", "content": ai_text})
52
 
 
 
 
53
  # Step C: Text-to-Speech
54
  tts = gTTS(text=ai_text, lang='en', slow=False)
55
  temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
56
  tts.save(temp_audio.name)
57
 
58
+ # Return the strictly formatted dict list, memory state, output audio, and clear input
59
+ return get_ui_chat(llm_state), llm_state, temp_audio.name, None
60
 
61
  except Exception as e:
62
  error_msg = f"System Error: {str(e)}"
63
+ llm_state.append({"role": "assistant", "content": error_msg})
64
+ return get_ui_chat(llm_state), llm_state, None, None
 
65
 
66
  # Function to completely wipe the session memory and UI
67
  def reset_conversation():
 
94
  with gr.Tab("🎙️ Live Interaction"):
95
  with gr.Row():
96
  with gr.Column(scale=2):
97
+ # Chatbot component specifically ready for dict-format
98
  chatbot = gr.Chatbot(
99
  label="Conversation Transcript",
100
  height=450,
101
+ avatar_images=(None, "⚙️")
102
  )
103
 
104
  with gr.Column(scale=1):
105
  gr.Markdown("### Input / Output Controls")
106
 
 
107
  audio_input = gr.Audio(
108
  sources=["microphone"],
109
  type="filepath",
 
146
  """
147
  )
148
 
149
+ # Event Wiring: Notice how we derive the UI Chatbot purely from the llm_state now
150
  submit_btn.click(
151
  fn=process_voice_conversation,
152
+ inputs=[audio_input, llm_state],
153
  outputs=[chatbot, llm_state, audio_output, audio_input]
154
  )
155
 
156
+ # Event Wiring: Clear Session
157
  clear_btn.click(
158
  fn=reset_conversation,
159
  inputs=[],
 
161
  )
162
 
163
  if __name__ == "__main__":
164
+ # 0.0.0.0 binds to all interfaces, required for Docker/Hugging Face
165
  demo.launch(server_name="0.0.0.0", server_port=7860)