Spaces:

MalikShehram
/

VocaFree_AI

Sleeping

App Files Files Community

MalikShehram commited on Feb 25

Commit

d5ea916

verified ·

1 Parent(s): a3083d4

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -18

app.py CHANGED Viewed

@@ -19,10 +19,15 @@ print("System Ready.")
 # The core instructions for the AI
 SYSTEM_PROMPT = {"role": "system", "content": "You are a professional, intelligent AI assistant demonstrating a low-latency voice architecture. Provide concise, highly accurate, and polite responses."}
 # 2. Main Processing Logic
-def process_voice_conversation(audio_path, chat_history, llm_state):
     if not audio_path:
-        return chat_history, llm_state, None, None
     try:
         # Step A: Speech-to-Text
@@ -30,9 +35,9 @@ def process_voice_conversation(audio_path, chat_history, llm_state):
         user_text = transcription["text"].strip()
         if not user_text:
-            return chat_history, llm_state, None, None
-        # Add to AI's internal memory
         llm_state.append({"role": "user", "content": user_text})
         # Step B: LLM Processing via Groq
@@ -42,25 +47,21 @@ def process_voice_conversation(audio_path, chat_history, llm_state):
         )
         ai_text = chat_completion.choices[0].message.content
-        # Add response to AI's internal memory
         llm_state.append({"role": "assistant", "content": ai_text})
-        # Add the conversation pair to the UI Chatbot
-        chat_history.append((user_text, ai_text))
         # Step C: Text-to-Speech
         tts = gTTS(text=ai_text, lang='en', slow=False)
         temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
         tts.save(temp_audio.name)
-        # Return UI history, Memory state, Output Audio, and clear Input Audio
-        return chat_history, llm_state, temp_audio.name, None
     except Exception as e:
         error_msg = f"System Error: {str(e)}"
-        # Display the error in the chat interface safely
-        chat_history.append(("Audio processed...", error_msg))
-        return chat_history, llm_state, None, None
 # Function to completely wipe the session memory and UI
 def reset_conversation():
@@ -93,16 +94,16 @@ with gr.Blocks(title="VocaFree AI - Research Prototype", theme=custom_theme) as
         with gr.Tab("🎙️ Live Interaction"):
             with gr.Row():
                 with gr.Column(scale=2):
                     chatbot = gr.Chatbot(
                         label="Conversation Transcript",
                         height=450,
-                        avatar_images=(None, "⚙️") # Professional gear icon for the AI
                     )
                 with gr.Column(scale=1):
                     gr.Markdown("### Input / Output Controls")
-                    # FIX: Removed the waveform_options argument entirely to ensure perfect compatibility
                     audio_input = gr.Audio(
                         sources=["microphone"],
                         type="filepath",
@@ -145,14 +146,14 @@ with gr.Blocks(title="VocaFree AI - Research Prototype", theme=custom_theme) as
         """
     )
-    # Event Wiring: Submit Audio
     submit_btn.click(
         fn=process_voice_conversation,
-        inputs=[audio_input, chatbot, llm_state],
         outputs=[chatbot, llm_state, audio_output, audio_input]
     )
-    # Event Wiring: Clear Session (Wipes UI and AI Memory)
     clear_btn.click(
         fn=reset_conversation,
         inputs=[],
@@ -160,4 +161,5 @@ with gr.Blocks(title="VocaFree AI - Research Prototype", theme=custom_theme) as
     )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 # The core instructions for the AI
 SYSTEM_PROMPT = {"role": "system", "content": "You are a professional, intelligent AI assistant demonstrating a low-latency voice architecture. Provide concise, highly accurate, and polite responses."}
+# Helper function to format the memory for the new Gradio UI
+def get_ui_chat(state):
+    # Returns all messages except the hidden system prompt
+    return [msg for msg in state if msg["role"] != "system"]
 # 2. Main Processing Logic
+def process_voice_conversation(audio_path, llm_state):
     if not audio_path:
+        return get_ui_chat(llm_state), llm_state, None, None
     try:
         # Step A: Speech-to-Text
         user_text = transcription["text"].strip()
         if not user_text:
+            return get_ui_chat(llm_state), llm_state, None, None
+        # Add user prompt to internal memory
         llm_state.append({"role": "user", "content": user_text})
         # Step B: LLM Processing via Groq
         )
         ai_text = chat_completion.choices[0].message.content
+        # Add AI response to internal memory
         llm_state.append({"role": "assistant", "content": ai_text})
         # Step C: Text-to-Speech
         tts = gTTS(text=ai_text, lang='en', slow=False)
         temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
         tts.save(temp_audio.name)
+        # Return the strictly formatted dict list, memory state, output audio, and clear input
+        return get_ui_chat(llm_state), llm_state, temp_audio.name, None
     except Exception as e:
         error_msg = f"System Error: {str(e)}"
+        llm_state.append({"role": "assistant", "content": error_msg})
+        return get_ui_chat(llm_state), llm_state, None, None
 # Function to completely wipe the session memory and UI
 def reset_conversation():
         with gr.Tab("🎙️ Live Interaction"):
             with gr.Row():
                 with gr.Column(scale=2):
+                    # Chatbot component specifically ready for dict-format
                     chatbot = gr.Chatbot(
                         label="Conversation Transcript",
                         height=450,
+                        avatar_images=(None, "⚙️")
                     )
                 with gr.Column(scale=1):
                     gr.Markdown("### Input / Output Controls")
                     audio_input = gr.Audio(
                         sources=["microphone"],
                         type="filepath",
         """
     )
+    # Event Wiring: Notice how we derive the UI Chatbot purely from the llm_state now
     submit_btn.click(
         fn=process_voice_conversation,
+        inputs=[audio_input, llm_state],
         outputs=[chatbot, llm_state, audio_output, audio_input]
     )
+    # Event Wiring: Clear Session
     clear_btn.click(
         fn=reset_conversation,
         inputs=[],
     )
 if __name__ == "__main__":
+    # 0.0.0.0 binds to all interfaces, required for Docker/Hugging Face
     demo.launch(server_name="0.0.0.0", server_port=7860)