Spaces:

Satyamkumar
/

Know_me-voice-bot

Build error

App Files Files Community

Satyamkumar commited on Mar 27, 2025

Commit

cf4ba1b

verified ·

1 Parent(s): 7870b54

Update app.py

Browse files

Files changed (1) hide show

app.py +119 -120

app.py CHANGED Viewed

@@ -10,38 +10,35 @@ from gtts import gTTS
 import tempfile
 import base64
 import time
-# --- Configuration & Initialization ---
-# 1. Load API Key from Hugging Face Secrets
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 if not GEMINI_API_KEY:
-    # If running locally and not on Spaces, you might use a local env variable
-    # Or raise an error if mandatory for deployment
     print("Warning: GEMINI_API_KEY secret not found. Set it in Hugging Face Space settings.")
     # raise ValueError("GEMINI_API_KEY secret not found. Please set it in your Space settings.")
-    # For local testing, you could uncomment the line below and add your key
-    # GEMINI_API_KEY = "YOUR_LOCAL_KEY_FOR_TESTING_ONLY"
-# Configure Gemini only if the key is available
 if GEMINI_API_KEY:
     try:
         genai.configure(api_key=GEMINI_API_KEY)
-        # Initialize Gemini model
-        generation_model = genai.GenerativeModel('gemini-1.5-flash') # Use 1.5 flash as 2.0 is not public
     except Exception as e:
         print(f"Error configuring Gemini or initializing model: {e}")
         generation_model = None
 else:
     generation_model = None
-# --- File Paths (Relative to app.py) ---
 PDF_PATH = "about_me.pdf"
 PROFILE_PIC_PATH = "sk.jpeg"
-# --- Utility: Load image and convert to base64 ---
-# No caching decorator needed, load once at startup
 def get_base64_of_file(file_path):
     try:
         with open(file_path, "rb") as f:
@@ -54,10 +51,40 @@ def get_base64_of_file(file_path):
         print(f"Error reading file {file_path}: {e}")
         return None
-# --- Core Logic ---
 def extract_text_from_pdf(pdf_path):
-    """Extract text from local PDF file."""
     try:
         if not os.path.exists(pdf_path):
             print(f"Error: PDF file not found at {pdf_path}")
@@ -76,16 +103,11 @@ def extract_text_from_pdf(pdf_path):
         return ""
 def create_document_embeddings(text, model):
-    """Create embeddings for document text."""
-    if not text or model is None:
-        return [], None
     try:
-        # Simple split by newline, consider more robust chunking if needed
         chunks = text.split('\n')
         chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
-        if not chunks:
-            print("No text chunks found after splitting.")
-            return [], None
         embeddings = model.encode(chunks)
         print(f"Created {len(embeddings)} embeddings for {len(chunks)} chunks.")
         return chunks, embeddings
@@ -94,19 +116,12 @@ def create_document_embeddings(text, model):
         return [], None
 def retrieve_relevant_context(query, chunks, embeddings, model, top_k=3):
-    """Perform semantic search to find most relevant context."""
-    if not query or not chunks or embeddings is None or model is None:
-        return "No context available."
     try:
         query_embedding = model.encode([query])[0]
         similarities = cosine_similarity([query_embedding], embeddings)[0]
-        # Get top_k indices, ensure indices are within bounds
-        num_chunks = len(chunks)
-        # Handle cases where top_k > num_chunks
-        k = min(top_k, num_chunks)
-        if k == 0:
-            return "No relevant context found."
-        # Argsort gives indices of smallest values, use [-k:] and reverse
         top_indices = np.argsort(similarities)[-k:][::-1]
         relevant_contexts = [chunks[i] for i in top_indices]
         return " ".join(relevant_contexts)
@@ -115,53 +130,37 @@ def retrieve_relevant_context(query, chunks, embeddings, model, top_k=3):
         return "Error finding context."
 def generate_gemini_response(query, context):
-    """Generate response using Gemini with retrieved context."""
-    if not generation_model:
-        return "Model not initialized. Check API Key."
-    if not query:
-        return "No query provided."
     full_prompt = f"""
     Context: {context}
     Question: {query}
     Based *only* on the provided context about Satyam, answer the question concisely and in a natural, spoken style, from the first-person perspective (as Satyam).
     If the context does not contain the information needed to answer the question, respond exactly with:
-    "Hmm, that specific detail isn't in my knowledge base right now. Feel free to ask Satyam next time he is around"
     Do not invent information not present in the context.
     """
     try:
         response = generation_model.generate_content(full_prompt)
-        # Check for safety ratings or blocks if necessary (depending on Gemini version/settings)
         if response.candidates:
-             # Handle potential lack of 'text' attribute gracefully
              if hasattr(response.candidates[0].content.parts[0], 'text'):
                  return response.candidates[0].content.parts[0].text.strip()
              else:
                  print("Warning: Response part does not contain text.")
-                 # You might want to inspect response.candidates[0].content.parts[0] here
                  return "Sorry, I received an unexpected response format."
         else:
-             # Handle cases where no candidates are returned (e.g., blocked content)
              print(f"Warning: No candidates returned. Response: {response}")
-             # Check prompt feedback for block reason
              block_reason = response.prompt_feedback.block_reason if hasattr(response, 'prompt_feedback') else 'Unknown'
              return f"Sorry, I couldn't generate a response. Reason: {block_reason}"
     except Exception as e:
         print(f"Error generating response from Gemini: {e}")
         return f"Sorry, I encountered an error trying to respond: {e}"
 def text_to_speech(text):
-    """Convert text to speech using gTTS."""
-    if not text:
-        return None
     try:
-        # Create a temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
-            tts = gTTS(text=text, lang='en', tld='co.za') # 'co.za' often gives a slightly deeper tone
             tts.save(temp_audio.name)
             print(f"Generated TTS audio at {temp_audio.name}")
             return temp_audio.name
@@ -169,7 +168,7 @@ def text_to_speech(text):
         print(f"Error generating text-to-speech: {e}")
         return None
-# --- Load resources once ---
 print("Loading resources...")
 embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
 document_text = extract_text_from_pdf(PDF_PATH)
@@ -178,61 +177,68 @@ profile_pic_base64 = get_base64_of_file(PROFILE_PIC_PATH)
 print("Resources loaded.")
 # --- Gradio Interface Logic ---
-# Initialize speech recognizer
 recognizer = sr.Recognizer()
-def transcribe_audio(audio_filepath, status_update_ui):
-    """Transcribes audio file to text."""
     if not audio_filepath:
         return "", "No audio input detected."
     try:
         with sr.AudioFile(audio_filepath) as source:
             status_update = "Processing audio..."
-            yield "", status_update # Update status immediately
-            audio = recognizer.record(source) # read the entire audio file
         status_update = "Transcribing..."
-        yield "", status_update
-        # Recognize speech using Google Web Speech API
         query = recognizer.recognize_google(audio)
         print(f"Transcribed query: {query}")
         status_update = f"You asked: {query}"
-        return query, status_update
     except sr.UnknownValueError:
         print("Google Speech Recognition could not understand audio")
         status_update = "Sorry, I couldn't understand what you said."
-        return "", status_update
     except sr.RequestError as e:
-        print(f"Could not request results from Google Speech Recognition service; {e}")
         status_update = "Sorry, my speech recognition service is unavailable."
-        return "", status_update
     except Exception as e:
         print(f"Error during transcription: {e}")
         status_update = f"Error during transcription: {e}"
-        return "", status_update
     finally:
-         # Clean up the temporary audio file uploaded by Gradio
         if audio_filepath and os.path.exists(audio_filepath):
-             try:
-                 os.unlink(audio_filepath)
-                 print(f"Cleaned up temp audio file: {audio_filepath}")
-             except Exception as e:
-                 print(f"Error deleting temp audio file {audio_filepath}: {e}")
 def voice_chat_pipeline(audio_filepath, chat_history_state):
-    """Main function to handle voice input, process, and generate response."""
     # 1. Transcribe Audio
-    transcription_result, status_update = transcribe_audio(audio_filepath, None) # Status handled separately now
-    if not transcription_result:
-        # If transcription failed, return current history and error status/audio
         error_audio = text_to_speech(status_update)
-        return chat_history_state, status_update, error_audio or gr.Audio(value=None) # Return None for audio if TTS fails
-    query = transcription_result
     status_update = f"Thinking about: '{query}'..."
-    yield chat_history_state, status_update, gr.Audio(value=None) # Update status, clear audio output
     # 2. Retrieve Context
     context = retrieve_relevant_context(query, document_chunks, document_embeddings, embedding_model)
@@ -244,21 +250,30 @@ def voice_chat_pipeline(audio_filepath, chat_history_state):
     response_audio_path = text_to_speech(response_text)
     # 5. Update History
-    # Ensure chat_history_state is treated as a list
     current_history = chat_history_state if chat_history_state is not None else []
     updated_history = current_history + [[query, response_text]]
-    # 6. Yield final results
     status_update = "Here's my response:"
-    # Return updated history, final status, and the path to the response audio
-    # Use gr.Audio(value=response_audio_path, autoplay=True) if you want auto-play
-    yield updated_history, status_update, gr.Audio(value=response_audio_path, autoplay=False)
-    # Clean up the generated TTS audio file after it has been sent to the user
     if response_audio_path and os.path.exists(response_audio_path):
-        # Add a small delay to ensure Gradio has served the file
-        time.sleep(2)
         try:
             os.unlink(response_audio_path)
             print(f"Cleaned up TTS audio file: {response_audio_path}")
         except Exception as e:
@@ -266,20 +281,18 @@ def voice_chat_pipeline(audio_filepath, chat_history_state):
 # --- Build Gradio App ---
-# Custom CSS (simplified)
 css = """
     .bio-card { background-color: #f4f4f4; padding: 20px; border-radius: 10px; margin: 10px 0; }
     .circular-img { width: 150px; height: 150px; object-fit: cover; border-radius: 50%; border: 3px solid #4CAF50; display: block; margin-left: auto; margin-right: auto; }
     .gradio-container { max-width: 800px !important; margin: auto; }
-    #chat_history .message.user { background-color: #e0f7fa !important; } /* Style user messages */
-    #chat_history .message.bot { background-color: #f1f8e9 !important; } /* Style bot messages */
 """
 with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
     gr.Markdown("# Voice QA Bot - Talk to Satyam's AI Assistant")
-    # Use gr.State to hold conversation history
     chat_history = gr.State([])
     with gr.Row():
@@ -289,46 +302,32 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
                  gr.HTML(f'<img src="data:image/jpeg;base64,{profile_pic_base64}" class="circular-img" alt="My Picture">')
             else:
                  gr.Markdown("_(Profile picture not loaded)_")
-            gr.HTML(
-                """
-                <div class="bio-card">
-                    <h3>Hi, I'm Satyam's AI Assistant!</h3>
-                    <p>
-                        Ask me questions based on Satyam's profile. I have information from his 'about_me.pdf'.
-                        I can tell you about his background in AI and Data Science, his interests, and professional goals.
-                        Just use the microphone!
-                    </p>
-                </div>
-                """
-            )
-            status_textbox = gr.Textbox(label="Status", value="Ready. Use the microphone to ask a question.", interactive=False)
         with gr.Column(scale=2):
             gr.Markdown("## Conversation")
             chatbot_ui = gr.Chatbot(label="Chat History", elem_id="chat_history", height=400)
             audio_input = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Ask your question:")
-            audio_output = gr.Audio(label="🔊 My Response", autoplay=False) # Set autoplay=True if desired, but browsers might block it
-            # Connect the audio input changing (i.e., recording finished) to the processing function
             audio_input.change(
                 fn=voice_chat_pipeline,
                 inputs=[audio_input, chat_history],
-                outputs=[chatbot_ui, status_textbox, audio_output],
-                show_progress="full" # Show progress indicator during processing
             )
     gr.Markdown("---")
-    gr.Markdown("Powered by Gradio, Google Gemini, Sentence Transformers, and gTTS.")
-# Launch the app (Gradio on Spaces handles this automatically via app.py)
 if __name__ == "__main__":
-    if not GEMINI_API_KEY:
-        print("\nERROR: GEMINI_API_KEY is not set. The app might not function correctly.")
-        print("If running locally, set the environment variable or modify the code.")
-        print("If running on Hugging Face Spaces, ensure the 'GEMINI_API_KEY' secret is added in the Space settings.\n")
-    if generation_model is None:
-         print("\nERROR: Gemini model could not be initialized. Check API Key and configuration.\n")
     print("Starting Gradio app...")
-    app.launch(debug=True) # debug=True for more logs locally

 import tempfile
 import base64
 import time
+# NOTE: Importing threading and trying to use the old animation logic
+# is highly discouraged and unlikely to work correctly in Gradio/Spaces.
+# This is included ONLY because you requested the original code structure.
+import threading
+# --- Configuration & Initialization ---
 GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 if not GEMINI_API_KEY:
     print("Warning: GEMINI_API_KEY secret not found. Set it in Hugging Face Space settings.")
     # raise ValueError("GEMINI_API_KEY secret not found. Please set it in your Space settings.")
+    # GEMINI_API_KEY = "YOUR_LOCAL_KEY_FOR_TESTING_ONLY" # Uncomment for local testing
 if GEMINI_API_KEY:
     try:
         genai.configure(api_key=GEMINI_API_KEY)
+        generation_model = genai.GenerativeModel('gemini-1.5-flash')
     except Exception as e:
         print(f"Error configuring Gemini or initializing model: {e}")
         generation_model = None
 else:
     generation_model = None
+# --- File Paths (Relative) ---
 PDF_PATH = "about_me.pdf"
 PROFILE_PIC_PATH = "sk.jpeg"
+# --- Utility: Base64 Image ---
 def get_base64_of_file(file_path):
     try:
         with open(file_path, "rb") as f:
         print(f"Error reading file {file_path}: {e}")
         return None
+# --- Original Sound Wave Animation Functions (Adapted Attempt - HIGHLY UNLIKELY TO WORK) ---
+# This state needs to be managed differently in Gradio. Using a simple global
+# or class member might work for single-user local testing but not reliably on Spaces.
+# Let's try managing via gr.State passed around, though the threading part remains problematic.
+def create_sound_wave(num_bars=20, max_height=50, color="#4CAF50"):
+    """Generates HTML for one frame of the sound wave."""
+    # Note: time.time() dependency makes this dynamic, which is good for animation
+    heights = [int(max_height * (0.5 + 0.5 * np.sin(i + time.time() * 5))) for i in np.linspace(0, 2 * np.pi, num_bars)]
+    bars_html = "".join([
+        f'<div style="display: inline-block; width: 5px; height: {height}px; margin: 0 2px; background-color: {color}; transition: height 0.1s ease;"></div>'
+        for height in heights
+    ])
+    return f'<div style="display: flex; justify-content: center; align-items: center; height: 60px;">{bars_html}</div>'
+# --- !! Problem Area !! ---
+# The core issue: This function relies on background threading and continuous updates
+# which doesn't map well to Gradio's event model or web server environments.
+# Trying to run this via Gradio events will likely block or fail.
+# `add_script_run_ctx` is Streamlit specific.
+# Direct updates to `gr.HTML` from a background thread are not the standard Gradio way.
+# We can define the function but calling it effectively from Gradio events is the challenge.
+# Let's *not* actually try to run the thread here, but keep the generator.
+# We will return the *static* HTML from create_sound_wave when needed instead.
+# This means NO ANIMATION, just a static wave picture.
+# If you absolutely need animation, you'd typically use JavaScript within gr.HTML
+# or find/build a custom Gradio component.
+# --- Core Logic (Mostly unchanged from previous Gradio version) ---
 def extract_text_from_pdf(pdf_path):
     try:
         if not os.path.exists(pdf_path):
             print(f"Error: PDF file not found at {pdf_path}")
         return ""
 def create_document_embeddings(text, model):
+    if not text or model is None: return [], None
     try:
         chunks = text.split('\n')
         chunks = [chunk.strip() for chunk in chunks if chunk.strip()]
+        if not chunks: return [], None
         embeddings = model.encode(chunks)
         print(f"Created {len(embeddings)} embeddings for {len(chunks)} chunks.")
         return chunks, embeddings
         return [], None
 def retrieve_relevant_context(query, chunks, embeddings, model, top_k=3):
+    if not query or not chunks or embeddings is None or model is None: return "No context available."
     try:
         query_embedding = model.encode([query])[0]
         similarities = cosine_similarity([query_embedding], embeddings)[0]
+        k = min(top_k, len(chunks))
+        if k == 0: return "No relevant context found."
         top_indices = np.argsort(similarities)[-k:][::-1]
         relevant_contexts = [chunks[i] for i in top_indices]
         return " ".join(relevant_contexts)
         return "Error finding context."
 def generate_gemini_response(query, context):
+    if not generation_model: return "Model not initialized. Check API Key."
+    if not query: return "No query provided."
     full_prompt = f"""
     Context: {context}
     Question: {query}
     Based *only* on the provided context about Satyam, answer the question concisely and in a natural, spoken style, from the first-person perspective (as Satyam).
     If the context does not contain the information needed to answer the question, respond exactly with:
+    "Hmm, that specific detail isn't in my knowledge base right now. You might need to ask me directly sometime!"
     Do not invent information not present in the context.
     """
     try:
         response = generation_model.generate_content(full_prompt)
         if response.candidates:
              if hasattr(response.candidates[0].content.parts[0], 'text'):
                  return response.candidates[0].content.parts[0].text.strip()
              else:
                  print("Warning: Response part does not contain text.")
                  return "Sorry, I received an unexpected response format."
         else:
              print(f"Warning: No candidates returned. Response: {response}")
              block_reason = response.prompt_feedback.block_reason if hasattr(response, 'prompt_feedback') else 'Unknown'
              return f"Sorry, I couldn't generate a response. Reason: {block_reason}"
     except Exception as e:
         print(f"Error generating response from Gemini: {e}")
         return f"Sorry, I encountered an error trying to respond: {e}"
 def text_to_speech(text):
+    if not text: return None
     try:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
+            tts = gTTS(text=text, lang='en', tld='co.za')
             tts.save(temp_audio.name)
             print(f"Generated TTS audio at {temp_audio.name}")
             return temp_audio.name
         print(f"Error generating text-to-speech: {e}")
         return None
+# --- Load resources ---
 print("Loading resources...")
 embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
 document_text = extract_text_from_pdf(PDF_PATH)
 print("Resources loaded.")
 # --- Gradio Interface Logic ---
 recognizer = sr.Recognizer()
+def transcribe_audio(audio_filepath):
+    """Transcribes audio file to text. Returns (query, status_message)."""
     if not audio_filepath:
         return "", "No audio input detected."
+    query = ""
+    status_update = ""
     try:
         with sr.AudioFile(audio_filepath) as source:
+            # Adjust for ambient noise (less critical with file input but doesn't hurt)
+            # recognizer.adjust_for_ambient_noise(source, duration=0.5)
             status_update = "Processing audio..."
+            # Yielding status here requires the outer function to be a generator
+            audio = recognizer.record(source)
         status_update = "Transcribing..."
+        # Yield status
         query = recognizer.recognize_google(audio)
         print(f"Transcribed query: {query}")
         status_update = f"You asked: {query}"
     except sr.UnknownValueError:
         print("Google Speech Recognition could not understand audio")
         status_update = "Sorry, I couldn't understand what you said."
     except sr.RequestError as e:
+        print(f"Could not request results; {e}")
         status_update = "Sorry, my speech recognition service is unavailable."
     except Exception as e:
         print(f"Error during transcription: {e}")
         status_update = f"Error during transcription: {e}"
     finally:
         if audio_filepath and os.path.exists(audio_filepath):
+             try: os.unlink(audio_filepath); print(f"Cleaned up temp audio: {audio_filepath}")
+             except Exception as e: print(f"Error deleting temp audio {audio_filepath}: {e}")
+    return query, status_update
 def voice_chat_pipeline(audio_filepath, chat_history_state):
+    """Main function modified to yield updates for animation attempt."""
+    # Initial state: Clear animation, set status
+    yield chat_history_state, "Processing...", gr.HTML(value=""), gr.Audio(value=None)
     # 1. Transcribe Audio
+    # Show "listening" animation (static snapshot)
+    listening_wave_html = create_sound_wave(color="#4CAF50")
+    yield chat_history_state, "Listening (processing)...", gr.HTML(value=listening_wave_html), gr.Audio(value=None)
+    query, status_update = transcribe_audio(audio_filepath)
+    # Clear animation after transcription attempt
+    yield chat_history_state, status_update, gr.HTML(value=""), gr.Audio(value=None)
+    if not query:
         error_audio = text_to_speech(status_update)
+         # Show static "error" wave? Or just keep it clear. Let's keep clear.
+        yield chat_history_state, status_update, gr.HTML(value=""), error_audio or gr.Audio(value=None)
+        # Clean up potential error audio
+        if error_audio and os.path.exists(error_audio): time.sleep(1); os.unlink(error_audio)
+        return # Stop processing if transcription failed
+    # Update status before generation
     status_update = f"Thinking about: '{query}'..."
+    yield chat_history_state, status_update, gr.HTML(value=""), gr.Audio(value=None)
     # 2. Retrieve Context
     context = retrieve_relevant_context(query, document_chunks, document_embeddings, embedding_model)
     response_audio_path = text_to_speech(response_text)
     # 5. Update History
     current_history = chat_history_state if chat_history_state is not None else []
     updated_history = current_history + [[query, response_text]]
+    # 6. Yield final results with "speaking" animation (static snapshot)
+    speaking_wave_html = create_sound_wave(color="#FF5733") # Different color for speaking
     status_update = "Here's my response:"
+    # Yield history, status, speaking wave, and audio output
+    yield updated_history, status_update, gr.HTML(value=speaking_wave_html), gr.Audio(value=response_audio_path, autoplay=False)
+    # Keep the "speaking" wave visible briefly while audio potentially plays, then clear it.
+    # This is tricky without knowing exactly when playback finishes in the browser.
+    # A simple time delay is a crude approximation.
+    time.sleep(3) # Keep wave visible for 3 seconds (adjust as needed)
+    # Final yield to clear the animation after potential playback
+    yield updated_history, status_update, gr.HTML(value=""), gr.Audio(value=response_audio_path, autoplay=False)
+    # Clean up TTS audio file
     if response_audio_path and os.path.exists(response_audio_path):
         try:
+            # Delay slightly longer before deleting to ensure Gradio served it
+            time.sleep(2)
             os.unlink(response_audio_path)
             print(f"Cleaned up TTS audio file: {response_audio_path}")
         except Exception as e:
 # --- Build Gradio App ---
 css = """
     .bio-card { background-color: #f4f4f4; padding: 20px; border-radius: 10px; margin: 10px 0; }
     .circular-img { width: 150px; height: 150px; object-fit: cover; border-radius: 50%; border: 3px solid #4CAF50; display: block; margin-left: auto; margin-right: auto; }
     .gradio-container { max-width: 800px !important; margin: auto; }
+    #chat_history .message.user { background-color: #e0f7fa !important; }
+    #chat_history .message.bot { background-color: #f1f8e9 !important; }
+    #animation_html_output div { min-height: 60px; } /* Ensure space for wave */
 """
 with gr.Blocks(css=css, theme=gr.themes.Soft()) as app:
     gr.Markdown("# Voice QA Bot - Talk to Satyam's AI Assistant")
     chat_history = gr.State([])
     with gr.Row():
                  gr.HTML(f'<img src="data:image/jpeg;base64,{profile_pic_base64}" class="circular-img" alt="My Picture">')
             else:
                  gr.Markdown("_(Profile picture not loaded)_")
+            gr.HTML("""<div class="bio-card"><h3>Hi, I'm Satyam's AI Assistant!</h3><p>Ask me questions based on Satyam's profile using the microphone.</p></div>""")
+            status_textbox = gr.Textbox(label="Status", value="Ready.", interactive=False)
+             # Placeholder for the "animation" (will show static wave snapshots)
+            animation_output = gr.HTML(elem_id="animation_html_output", value="")
         with gr.Column(scale=2):
             gr.Markdown("## Conversation")
             chatbot_ui = gr.Chatbot(label="Chat History", elem_id="chat_history", height=400)
             audio_input = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Ask your question:")
+            audio_output = gr.Audio(label="🔊 My Response", autoplay=False)
+            # Connect audio input to the pipeline
             audio_input.change(
                 fn=voice_chat_pipeline,
                 inputs=[audio_input, chat_history],
+                 # Output includes the HTML component for the wave snapshot
+                outputs=[chatbot_ui, status_textbox, animation_output, audio_output],
+                show_progress="minimal" # Use minimal progress as we have status textbox
             )
     gr.Markdown("---")
+    gr.Markdown("Powered by Gradio, Google Gemini, etc.")
+# Launch for local testing (Gradio on Spaces handles this)
 if __name__ == "__main__":
+    if not GEMINI_API_KEY: print("\nERROR: GEMINI_API_KEY not set.\n")
+    if generation_model is None: print("\nERROR: Gemini model not initialized.\n")
     print("Starting Gradio app...")
+    app.launch(debug=True)