Spaces:

Garvitj
/

emotion_llm_gradio

Sleeping

App Files Files Community

Garvitj commited on Nov 2, 2025

Commit

e5fca9d

verified ·

1 Parent(s): 5b85b38

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -36

app.py CHANGED Viewed

@@ -1,17 +1,15 @@
 import gradio as gr
 import os
-import tempfile
-# opencv and scipy are no longer needed here
 from dotenv import load_dotenv
-import analysis  # This is your existing analysis.py file
-# Load environment variables from .env file
 load_dotenv()
 def analyze_all(image_data_path, audio_data_path, user_query):
     """
-    This is the main function that Gradio will call.
-    It takes all inputs and returns all outputs.
     """
     # --- 1. Validation ---
@@ -27,7 +25,6 @@ def analyze_all(image_data_path, audio_data_path, user_query):
     transcript = ""
     # --- 2. Process Image ---
-    # image_data_path is now a file path. We pass it directly.
     try:
         facial_emotion = analysis.get_facial_emotion(image_data_path)
     except Exception as e:
@@ -35,7 +32,6 @@ def analyze_all(image_data_path, audio_data_path, user_query):
         facial_emotion = "Image Error"
     # --- 3. Process Audio ---
-    # audio_data_path is now a file path. We pass it directly.
     try:
         voice_emotion = analysis.get_voice_emotion(audio_data_path)
         transcript = analysis.get_transcript(audio_data_path)
@@ -56,15 +52,13 @@ def analyze_all(image_data_path, audio_data_path, user_query):
         print(f"Error getting LLM response: {e}")
         ai_response = f"Error in LLM: {e}"
-    # --- 5. Return all 6 values to the output components ---
-    # We return the paths to make the inputs "sticky" after the click.
     return (
         facial_emotion.capitalize(),
         voice_emotion.capitalize(),
         transcript if transcript else "No speech detected",
-        ai_response,
-        image_data_path,  # Return the path to the image input
-        audio_data_path   # Return the path to the audio input
     )
@@ -76,22 +70,34 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         "to provide an empathetic, context-aware response."
     )
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("## 1. Inputs")
             # Input 1: Image
             img_input = gr.Image(
                 sources=["webcam"],
                 label="📸 Capture Your Expression",
-                type="filepath"  # <-- Use filepath
             )
             # Input 2: Audio
             audio_input = gr.Audio(
                 sources=["microphone"],
                 label="🎙️ Record Your Voice",
-                type="filepath"  # <-- Use filepath
             )
             # Input 3: Text
             text_input = gr.Textbox(
@@ -105,39 +111,46 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         with gr.Column(scale=1):
             gr.Markdown("## 2. Analysis & Response")
-            # Output 1: Facial
             face_output = gr.Textbox(label="😊 Facial Emotion", interactive=False)
-            # Output 2: Voice
             voice_output = gr.Textbox(label="🎤 Vocal Tone", interactive=False)
-            # Output 3: Transcript
             transcript_output = gr.Textbox(label="💬 Transcription", interactive=False)
-            # Output 4: Final Response
             llm_output = gr.Textbox(label="💙 Empathetic Response", interactive=False, lines=10)
-    # Connect the button to the function
     analyze_btn.click(
         fn=analyze_all,
-        inputs=[img_input, audio_input, text_input],
-        outputs=[
-            face_output,
-            voice_output,
-            transcript_output,
-            llm_output,
-            img_input,      # <-- Make input sticky
-            audio_input     # <-- Make input sticky
-        ]
     )
-    # Add sidebar info
     with gr.Accordion("ℹ️ How to Use & Tech Stack", open=False):
         gr.Markdown("""
         ### How to Use
         1. **Allow** browser access to your webcam and microphone.
-        2. **Take a snapshot** using the webcam.
-        3. **Record** your query using the microphone.
         4. **Type** your query in the text box.
         5. **Click** the 'Analyze' button and wait for the response.
@@ -151,4 +164,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
 # Launch the app
 if __name__ == "__main__":
-    demo.launch() # No debug=True on deploy

 import gradio as gr
 import os
 from dotenv import load_dotenv
+import analysis  # Your existing analysis.py
+# Load environment variables
 load_dotenv()
 def analyze_all(image_data_path, audio_data_path, user_query):
     """
+    This is the main function. It receives file paths from the
+    gr.State variables, not the components directly.
     """
     # --- 1. Validation ---
     transcript = ""
     # --- 2. Process Image ---
     try:
         facial_emotion = analysis.get_facial_emotion(image_data_path)
     except Exception as e:
         facial_emotion = "Image Error"
     # --- 3. Process Audio ---
     try:
         voice_emotion = analysis.get_voice_emotion(audio_data_path)
         transcript = analysis.get_transcript(audio_data_path)
         print(f"Error getting LLM response: {e}")
         ai_response = f"Error in LLM: {e}"
+    # --- 5. Return all outputs ---
+    # We no longer need to return the inputs, as the State holds them.
     return (
         facial_emotion.capitalize(),
         voice_emotion.capitalize(),
         transcript if transcript else "No speech detected",
+        ai_response
     )
         "to provide an empathetic, context-aware response."
     )
+    # --- 1. DECLARE HIDDEN STATE VARIABLES ---
+    # These will store our file paths securely.
+    img_state = gr.State(None)
+    audio_state = gr.State(None)
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("## 1. Inputs")
+            # --- 2. CONNECT COMPONENTS TO STATE ---
             # Input 1: Image
             img_input = gr.Image(
                 sources=["webcam"],
                 label="📸 Capture Your Expression",
+                type="filepath"
             )
+            # Add a "success" message to show it's saved
+            img_msg = gr.Markdown("", visible=False)
             # Input 2: Audio
             audio_input = gr.Audio(
                 sources=["microphone"],
                 label="🎙️ Record Your Voice",
+                type="filepath"
             )
+            # Add a "success" message to show it's saved
+            audio_msg = gr.Markdown("", visible=False)
             # Input 3: Text
             text_input = gr.Textbox(
         with gr.Column(scale=1):
             gr.Markdown("## 2. Analysis & Response")
             face_output = gr.Textbox(label="😊 Facial Emotion", interactive=False)
             voice_output = gr.Textbox(label="🎤 Vocal Tone", interactive=False)
             transcript_output = gr.Textbox(label="💬 Transcription", interactive=False)
             llm_output = gr.Textbox(label="💙 Empathetic Response", interactive=False, lines=10)
+    # --- 3. CREATE EVENT LISTENERS TO SAVE TO STATE ---
+    # When a picture is taken (upload/change), save its path to img_state
+    def save_image_path(img_path):
+        if img_path:
+            return img_path, gr.update(value="✅ Image Saved!", visible=True)
+        return None, gr.update(visible=False)
+    img_input.upload(save_image_path, inputs=img_input, outputs=[img_state, img_msg])
+    img_input.clear(lambda: (None, gr.update(visible=False)), outputs=[img_state, img_msg])
+    # When recording stops, save its path to audio_state
+    def save_audio_path(audio_path):
+        if audio_path:
+            return audio_path, gr.update(value="✅ Audio Saved!", visible=True)
+        return None, gr.update(visible=False)
+    audio_input.stop_recording(save_audio_path, inputs=audio_input, outputs=[audio_state, audio_msg])
+    audio_input.clear(lambda: (None, gr.update(visible=False)), outputs=[audio_state, audio_msg])
+    # --- 4. CONNECT THE BUTTON TO READ FROM STATE ---
     analyze_btn.click(
         fn=analyze_all,
+        # Inputs are now the stable state variables
+        inputs=[img_state, audio_state, text_input],
+        outputs=[face_output, voice_output, transcript_output, llm_output]
     )
+    # ... (Your Accordion/Sidebar code remains the same) ...
     with gr.Accordion("ℹ️ How to Use & Tech Stack", open=False):
         gr.Markdown("""
         ### How to Use
         1. **Allow** browser access to your webcam and microphone.
+        2. **Take a snapshot** (You should see 'Image Saved!').
+        3. **Record** your query (You should see 'Audio Saved!').
         4. **Type** your query in the text box.
         5. **Click** the 'Analyze' button and wait for the response.
 # Launch the app
 if __name__ == "__main__":
+    demo.launch()