Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,17 +1,15 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
-
import tempfile
|
| 4 |
-
# opencv and scipy are no longer needed here
|
| 5 |
from dotenv import load_dotenv
|
| 6 |
-
import analysis #
|
| 7 |
|
| 8 |
-
# Load environment variables
|
| 9 |
load_dotenv()
|
| 10 |
|
| 11 |
def analyze_all(image_data_path, audio_data_path, user_query):
|
| 12 |
"""
|
| 13 |
-
This is the main function
|
| 14 |
-
|
| 15 |
"""
|
| 16 |
|
| 17 |
# --- 1. Validation ---
|
|
@@ -27,7 +25,6 @@ def analyze_all(image_data_path, audio_data_path, user_query):
|
|
| 27 |
transcript = ""
|
| 28 |
|
| 29 |
# --- 2. Process Image ---
|
| 30 |
-
# image_data_path is now a file path. We pass it directly.
|
| 31 |
try:
|
| 32 |
facial_emotion = analysis.get_facial_emotion(image_data_path)
|
| 33 |
except Exception as e:
|
|
@@ -35,7 +32,6 @@ def analyze_all(image_data_path, audio_data_path, user_query):
|
|
| 35 |
facial_emotion = "Image Error"
|
| 36 |
|
| 37 |
# --- 3. Process Audio ---
|
| 38 |
-
# audio_data_path is now a file path. We pass it directly.
|
| 39 |
try:
|
| 40 |
voice_emotion = analysis.get_voice_emotion(audio_data_path)
|
| 41 |
transcript = analysis.get_transcript(audio_data_path)
|
|
@@ -56,15 +52,13 @@ def analyze_all(image_data_path, audio_data_path, user_query):
|
|
| 56 |
print(f"Error getting LLM response: {e}")
|
| 57 |
ai_response = f"Error in LLM: {e}"
|
| 58 |
|
| 59 |
-
# --- 5. Return all
|
| 60 |
-
# We
|
| 61 |
return (
|
| 62 |
facial_emotion.capitalize(),
|
| 63 |
voice_emotion.capitalize(),
|
| 64 |
transcript if transcript else "No speech detected",
|
| 65 |
-
ai_response
|
| 66 |
-
image_data_path, # Return the path to the image input
|
| 67 |
-
audio_data_path # Return the path to the audio input
|
| 68 |
)
|
| 69 |
|
| 70 |
|
|
@@ -76,22 +70,34 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 76 |
"to provide an empathetic, context-aware response."
|
| 77 |
)
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
with gr.Row():
|
| 80 |
with gr.Column(scale=1):
|
| 81 |
gr.Markdown("## 1. Inputs")
|
|
|
|
|
|
|
|
|
|
| 82 |
# Input 1: Image
|
| 83 |
img_input = gr.Image(
|
| 84 |
sources=["webcam"],
|
| 85 |
label="📸 Capture Your Expression",
|
| 86 |
-
type="filepath"
|
| 87 |
)
|
|
|
|
|
|
|
| 88 |
|
| 89 |
# Input 2: Audio
|
| 90 |
audio_input = gr.Audio(
|
| 91 |
sources=["microphone"],
|
| 92 |
label="🎙️ Record Your Voice",
|
| 93 |
-
type="filepath"
|
| 94 |
)
|
|
|
|
|
|
|
| 95 |
|
| 96 |
# Input 3: Text
|
| 97 |
text_input = gr.Textbox(
|
|
@@ -105,39 +111,46 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 105 |
|
| 106 |
with gr.Column(scale=1):
|
| 107 |
gr.Markdown("## 2. Analysis & Response")
|
| 108 |
-
# Output 1: Facial
|
| 109 |
face_output = gr.Textbox(label="😊 Facial Emotion", interactive=False)
|
| 110 |
-
|
| 111 |
-
# Output 2: Voice
|
| 112 |
voice_output = gr.Textbox(label="🎤 Vocal Tone", interactive=False)
|
| 113 |
-
|
| 114 |
-
# Output 3: Transcript
|
| 115 |
transcript_output = gr.Textbox(label="💬 Transcription", interactive=False)
|
| 116 |
-
|
| 117 |
-
# Output 4: Final Response
|
| 118 |
llm_output = gr.Textbox(label="💙 Empathetic Response", interactive=False, lines=10)
|
| 119 |
|
| 120 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
analyze_btn.click(
|
| 122 |
fn=analyze_all,
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
voice_output,
|
| 127 |
-
transcript_output,
|
| 128 |
-
llm_output,
|
| 129 |
-
img_input, # <-- Make input sticky
|
| 130 |
-
audio_input # <-- Make input sticky
|
| 131 |
-
]
|
| 132 |
)
|
| 133 |
|
| 134 |
-
#
|
| 135 |
with gr.Accordion("ℹ️ How to Use & Tech Stack", open=False):
|
| 136 |
gr.Markdown("""
|
| 137 |
### How to Use
|
| 138 |
1. **Allow** browser access to your webcam and microphone.
|
| 139 |
-
2. **Take a snapshot**
|
| 140 |
-
3. **Record** your query
|
| 141 |
4. **Type** your query in the text box.
|
| 142 |
5. **Click** the 'Analyze' button and wait for the response.
|
| 143 |
|
|
@@ -151,4 +164,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 151 |
|
| 152 |
# Launch the app
|
| 153 |
if __name__ == "__main__":
|
| 154 |
-
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
|
|
|
|
|
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
+
import analysis # Your existing analysis.py
|
| 5 |
|
| 6 |
+
# Load environment variables
|
| 7 |
load_dotenv()
|
| 8 |
|
| 9 |
def analyze_all(image_data_path, audio_data_path, user_query):
|
| 10 |
"""
|
| 11 |
+
This is the main function. It receives file paths from the
|
| 12 |
+
gr.State variables, not the components directly.
|
| 13 |
"""
|
| 14 |
|
| 15 |
# --- 1. Validation ---
|
|
|
|
| 25 |
transcript = ""
|
| 26 |
|
| 27 |
# --- 2. Process Image ---
|
|
|
|
| 28 |
try:
|
| 29 |
facial_emotion = analysis.get_facial_emotion(image_data_path)
|
| 30 |
except Exception as e:
|
|
|
|
| 32 |
facial_emotion = "Image Error"
|
| 33 |
|
| 34 |
# --- 3. Process Audio ---
|
|
|
|
| 35 |
try:
|
| 36 |
voice_emotion = analysis.get_voice_emotion(audio_data_path)
|
| 37 |
transcript = analysis.get_transcript(audio_data_path)
|
|
|
|
| 52 |
print(f"Error getting LLM response: {e}")
|
| 53 |
ai_response = f"Error in LLM: {e}"
|
| 54 |
|
| 55 |
+
# --- 5. Return all outputs ---
|
| 56 |
+
# We no longer need to return the inputs, as the State holds them.
|
| 57 |
return (
|
| 58 |
facial_emotion.capitalize(),
|
| 59 |
voice_emotion.capitalize(),
|
| 60 |
transcript if transcript else "No speech detected",
|
| 61 |
+
ai_response
|
|
|
|
|
|
|
| 62 |
)
|
| 63 |
|
| 64 |
|
|
|
|
| 70 |
"to provide an empathetic, context-aware response."
|
| 71 |
)
|
| 72 |
|
| 73 |
+
# --- 1. DECLARE HIDDEN STATE VARIABLES ---
|
| 74 |
+
# These will store our file paths securely.
|
| 75 |
+
img_state = gr.State(None)
|
| 76 |
+
audio_state = gr.State(None)
|
| 77 |
+
|
| 78 |
with gr.Row():
|
| 79 |
with gr.Column(scale=1):
|
| 80 |
gr.Markdown("## 1. Inputs")
|
| 81 |
+
|
| 82 |
+
# --- 2. CONNECT COMPONENTS TO STATE ---
|
| 83 |
+
|
| 84 |
# Input 1: Image
|
| 85 |
img_input = gr.Image(
|
| 86 |
sources=["webcam"],
|
| 87 |
label="📸 Capture Your Expression",
|
| 88 |
+
type="filepath"
|
| 89 |
)
|
| 90 |
+
# Add a "success" message to show it's saved
|
| 91 |
+
img_msg = gr.Markdown("", visible=False)
|
| 92 |
|
| 93 |
# Input 2: Audio
|
| 94 |
audio_input = gr.Audio(
|
| 95 |
sources=["microphone"],
|
| 96 |
label="🎙️ Record Your Voice",
|
| 97 |
+
type="filepath"
|
| 98 |
)
|
| 99 |
+
# Add a "success" message to show it's saved
|
| 100 |
+
audio_msg = gr.Markdown("", visible=False)
|
| 101 |
|
| 102 |
# Input 3: Text
|
| 103 |
text_input = gr.Textbox(
|
|
|
|
| 111 |
|
| 112 |
with gr.Column(scale=1):
|
| 113 |
gr.Markdown("## 2. Analysis & Response")
|
|
|
|
| 114 |
face_output = gr.Textbox(label="😊 Facial Emotion", interactive=False)
|
|
|
|
|
|
|
| 115 |
voice_output = gr.Textbox(label="🎤 Vocal Tone", interactive=False)
|
|
|
|
|
|
|
| 116 |
transcript_output = gr.Textbox(label="💬 Transcription", interactive=False)
|
|
|
|
|
|
|
| 117 |
llm_output = gr.Textbox(label="💙 Empathetic Response", interactive=False, lines=10)
|
| 118 |
|
| 119 |
+
# --- 3. CREATE EVENT LISTENERS TO SAVE TO STATE ---
|
| 120 |
+
|
| 121 |
+
# When a picture is taken (upload/change), save its path to img_state
|
| 122 |
+
def save_image_path(img_path):
|
| 123 |
+
if img_path:
|
| 124 |
+
return img_path, gr.update(value="✅ Image Saved!", visible=True)
|
| 125 |
+
return None, gr.update(visible=False)
|
| 126 |
+
|
| 127 |
+
img_input.upload(save_image_path, inputs=img_input, outputs=[img_state, img_msg])
|
| 128 |
+
img_input.clear(lambda: (None, gr.update(visible=False)), outputs=[img_state, img_msg])
|
| 129 |
+
|
| 130 |
+
# When recording stops, save its path to audio_state
|
| 131 |
+
def save_audio_path(audio_path):
|
| 132 |
+
if audio_path:
|
| 133 |
+
return audio_path, gr.update(value="✅ Audio Saved!", visible=True)
|
| 134 |
+
return None, gr.update(visible=False)
|
| 135 |
+
|
| 136 |
+
audio_input.stop_recording(save_audio_path, inputs=audio_input, outputs=[audio_state, audio_msg])
|
| 137 |
+
audio_input.clear(lambda: (None, gr.update(visible=False)), outputs=[audio_state, audio_msg])
|
| 138 |
+
|
| 139 |
+
# --- 4. CONNECT THE BUTTON TO READ FROM STATE ---
|
| 140 |
analyze_btn.click(
|
| 141 |
fn=analyze_all,
|
| 142 |
+
# Inputs are now the stable state variables
|
| 143 |
+
inputs=[img_state, audio_state, text_input],
|
| 144 |
+
outputs=[face_output, voice_output, transcript_output, llm_output]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
)
|
| 146 |
|
| 147 |
+
# ... (Your Accordion/Sidebar code remains the same) ...
|
| 148 |
with gr.Accordion("ℹ️ How to Use & Tech Stack", open=False):
|
| 149 |
gr.Markdown("""
|
| 150 |
### How to Use
|
| 151 |
1. **Allow** browser access to your webcam and microphone.
|
| 152 |
+
2. **Take a snapshot** (You should see 'Image Saved!').
|
| 153 |
+
3. **Record** your query (You should see 'Audio Saved!').
|
| 154 |
4. **Type** your query in the text box.
|
| 155 |
5. **Click** the 'Analyze' button and wait for the response.
|
| 156 |
|
|
|
|
| 164 |
|
| 165 |
# Launch the app
|
| 166 |
if __name__ == "__main__":
|
| 167 |
+
demo.launch()
|