Garvitj commited on
Commit
e5fca9d
·
verified ·
1 Parent(s): 5b85b38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -36
app.py CHANGED
@@ -1,17 +1,15 @@
1
  import gradio as gr
2
  import os
3
- import tempfile
4
- # opencv and scipy are no longer needed here
5
  from dotenv import load_dotenv
6
- import analysis # This is your existing analysis.py file
7
 
8
- # Load environment variables from .env file
9
  load_dotenv()
10
 
11
  def analyze_all(image_data_path, audio_data_path, user_query):
12
  """
13
- This is the main function that Gradio will call.
14
- It takes all inputs and returns all outputs.
15
  """
16
 
17
  # --- 1. Validation ---
@@ -27,7 +25,6 @@ def analyze_all(image_data_path, audio_data_path, user_query):
27
  transcript = ""
28
 
29
  # --- 2. Process Image ---
30
- # image_data_path is now a file path. We pass it directly.
31
  try:
32
  facial_emotion = analysis.get_facial_emotion(image_data_path)
33
  except Exception as e:
@@ -35,7 +32,6 @@ def analyze_all(image_data_path, audio_data_path, user_query):
35
  facial_emotion = "Image Error"
36
 
37
  # --- 3. Process Audio ---
38
- # audio_data_path is now a file path. We pass it directly.
39
  try:
40
  voice_emotion = analysis.get_voice_emotion(audio_data_path)
41
  transcript = analysis.get_transcript(audio_data_path)
@@ -56,15 +52,13 @@ def analyze_all(image_data_path, audio_data_path, user_query):
56
  print(f"Error getting LLM response: {e}")
57
  ai_response = f"Error in LLM: {e}"
58
 
59
- # --- 5. Return all 6 values to the output components ---
60
- # We return the paths to make the inputs "sticky" after the click.
61
  return (
62
  facial_emotion.capitalize(),
63
  voice_emotion.capitalize(),
64
  transcript if transcript else "No speech detected",
65
- ai_response,
66
- image_data_path, # Return the path to the image input
67
- audio_data_path # Return the path to the audio input
68
  )
69
 
70
 
@@ -76,22 +70,34 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
76
  "to provide an empathetic, context-aware response."
77
  )
78
 
 
 
 
 
 
79
  with gr.Row():
80
  with gr.Column(scale=1):
81
  gr.Markdown("## 1. Inputs")
 
 
 
82
  # Input 1: Image
83
  img_input = gr.Image(
84
  sources=["webcam"],
85
  label="📸 Capture Your Expression",
86
- type="filepath" # <-- Use filepath
87
  )
 
 
88
 
89
  # Input 2: Audio
90
  audio_input = gr.Audio(
91
  sources=["microphone"],
92
  label="🎙️ Record Your Voice",
93
- type="filepath" # <-- Use filepath
94
  )
 
 
95
 
96
  # Input 3: Text
97
  text_input = gr.Textbox(
@@ -105,39 +111,46 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
105
 
106
  with gr.Column(scale=1):
107
  gr.Markdown("## 2. Analysis & Response")
108
- # Output 1: Facial
109
  face_output = gr.Textbox(label="😊 Facial Emotion", interactive=False)
110
-
111
- # Output 2: Voice
112
  voice_output = gr.Textbox(label="🎤 Vocal Tone", interactive=False)
113
-
114
- # Output 3: Transcript
115
  transcript_output = gr.Textbox(label="💬 Transcription", interactive=False)
116
-
117
- # Output 4: Final Response
118
  llm_output = gr.Textbox(label="💙 Empathetic Response", interactive=False, lines=10)
119
 
120
- # Connect the button to the function
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  analyze_btn.click(
122
  fn=analyze_all,
123
- inputs=[img_input, audio_input, text_input],
124
- outputs=[
125
- face_output,
126
- voice_output,
127
- transcript_output,
128
- llm_output,
129
- img_input, # <-- Make input sticky
130
- audio_input # <-- Make input sticky
131
- ]
132
  )
133
 
134
- # Add sidebar info
135
  with gr.Accordion("ℹ️ How to Use & Tech Stack", open=False):
136
  gr.Markdown("""
137
  ### How to Use
138
  1. **Allow** browser access to your webcam and microphone.
139
- 2. **Take a snapshot** using the webcam.
140
- 3. **Record** your query using the microphone.
141
  4. **Type** your query in the text box.
142
  5. **Click** the 'Analyze' button and wait for the response.
143
 
@@ -151,4 +164,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
151
 
152
  # Launch the app
153
  if __name__ == "__main__":
154
- demo.launch() # No debug=True on deploy
 
1
  import gradio as gr
2
  import os
 
 
3
  from dotenv import load_dotenv
4
+ import analysis # Your existing analysis.py
5
 
6
+ # Load environment variables
7
  load_dotenv()
8
 
9
  def analyze_all(image_data_path, audio_data_path, user_query):
10
  """
11
+ This is the main function. It receives file paths from the
12
+ gr.State variables, not the components directly.
13
  """
14
 
15
  # --- 1. Validation ---
 
25
  transcript = ""
26
 
27
  # --- 2. Process Image ---
 
28
  try:
29
  facial_emotion = analysis.get_facial_emotion(image_data_path)
30
  except Exception as e:
 
32
  facial_emotion = "Image Error"
33
 
34
  # --- 3. Process Audio ---
 
35
  try:
36
  voice_emotion = analysis.get_voice_emotion(audio_data_path)
37
  transcript = analysis.get_transcript(audio_data_path)
 
52
  print(f"Error getting LLM response: {e}")
53
  ai_response = f"Error in LLM: {e}"
54
 
55
+ # --- 5. Return all outputs ---
56
+ # We no longer need to return the inputs, as the State holds them.
57
  return (
58
  facial_emotion.capitalize(),
59
  voice_emotion.capitalize(),
60
  transcript if transcript else "No speech detected",
61
+ ai_response
 
 
62
  )
63
 
64
 
 
70
  "to provide an empathetic, context-aware response."
71
  )
72
 
73
+ # --- 1. DECLARE HIDDEN STATE VARIABLES ---
74
+ # These will store our file paths securely.
75
+ img_state = gr.State(None)
76
+ audio_state = gr.State(None)
77
+
78
  with gr.Row():
79
  with gr.Column(scale=1):
80
  gr.Markdown("## 1. Inputs")
81
+
82
+ # --- 2. CONNECT COMPONENTS TO STATE ---
83
+
84
  # Input 1: Image
85
  img_input = gr.Image(
86
  sources=["webcam"],
87
  label="📸 Capture Your Expression",
88
+ type="filepath"
89
  )
90
+ # Add a "success" message to show it's saved
91
+ img_msg = gr.Markdown("", visible=False)
92
 
93
  # Input 2: Audio
94
  audio_input = gr.Audio(
95
  sources=["microphone"],
96
  label="🎙️ Record Your Voice",
97
+ type="filepath"
98
  )
99
+ # Add a "success" message to show it's saved
100
+ audio_msg = gr.Markdown("", visible=False)
101
 
102
  # Input 3: Text
103
  text_input = gr.Textbox(
 
111
 
112
  with gr.Column(scale=1):
113
  gr.Markdown("## 2. Analysis & Response")
 
114
  face_output = gr.Textbox(label="😊 Facial Emotion", interactive=False)
 
 
115
  voice_output = gr.Textbox(label="🎤 Vocal Tone", interactive=False)
 
 
116
  transcript_output = gr.Textbox(label="💬 Transcription", interactive=False)
 
 
117
  llm_output = gr.Textbox(label="💙 Empathetic Response", interactive=False, lines=10)
118
 
119
+ # --- 3. CREATE EVENT LISTENERS TO SAVE TO STATE ---
120
+
121
+ # When a picture is taken (upload/change), save its path to img_state
122
+ def save_image_path(img_path):
123
+ if img_path:
124
+ return img_path, gr.update(value="✅ Image Saved!", visible=True)
125
+ return None, gr.update(visible=False)
126
+
127
+ img_input.upload(save_image_path, inputs=img_input, outputs=[img_state, img_msg])
128
+ img_input.clear(lambda: (None, gr.update(visible=False)), outputs=[img_state, img_msg])
129
+
130
+ # When recording stops, save its path to audio_state
131
+ def save_audio_path(audio_path):
132
+ if audio_path:
133
+ return audio_path, gr.update(value="✅ Audio Saved!", visible=True)
134
+ return None, gr.update(visible=False)
135
+
136
+ audio_input.stop_recording(save_audio_path, inputs=audio_input, outputs=[audio_state, audio_msg])
137
+ audio_input.clear(lambda: (None, gr.update(visible=False)), outputs=[audio_state, audio_msg])
138
+
139
+ # --- 4. CONNECT THE BUTTON TO READ FROM STATE ---
140
  analyze_btn.click(
141
  fn=analyze_all,
142
+ # Inputs are now the stable state variables
143
+ inputs=[img_state, audio_state, text_input],
144
+ outputs=[face_output, voice_output, transcript_output, llm_output]
 
 
 
 
 
 
145
  )
146
 
147
+ # ... (Your Accordion/Sidebar code remains the same) ...
148
  with gr.Accordion("ℹ️ How to Use & Tech Stack", open=False):
149
  gr.Markdown("""
150
  ### How to Use
151
  1. **Allow** browser access to your webcam and microphone.
152
+ 2. **Take a snapshot** (You should see 'Image Saved!').
153
+ 3. **Record** your query (You should see 'Audio Saved!').
154
  4. **Type** your query in the text box.
155
  5. **Click** the 'Analyze' button and wait for the response.
156
 
 
164
 
165
  # Launch the app
166
  if __name__ == "__main__":
167
+ demo.launch()