Kevin King commited on
Commit
d2457b5
·
verified ·
1 Parent(s): 66b390c

Upload 3 files

Browse files

reworked app.py file as a skeleton

Files changed (2) hide show
  1. app.py +23 -11
  2. requirements.txt +2 -1
app.py CHANGED
@@ -60,6 +60,9 @@ def load_models():
60
  ser_feature_extractor = AutoFeatureExtractor.from_pretrained(ser_model_name)
61
  ser_model = AutoModelForAudioClassification.from_pretrained(ser_model_name)
62
 
 
 
 
63
  return whisper_model, text_classifier, ser_model, ser_feature_extractor
64
 
65
  whisper_model, text_classifier, ser_model, ser_feature_extractor = load_models()
@@ -123,16 +126,30 @@ webrtc_streamer(
123
  async_processing=True,
124
  )
125
 
 
126
  st.sidebar.header("Facial Emotion")
127
  st.sidebar.metric("Current Expression", st.session_state.get('facial_emotion', 'N/A'))
128
  st.sidebar.info("Facial emotion is updated every 5 seconds to optimize performance.")
 
129
 
130
  st.sidebar.header("Audio Analysis")
131
- is_recording = st.sidebar.checkbox("Start Recording")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  if not is_recording and st.session_state.audio_buffer:
134
- st.sidebar.info("Processing audio... please wait.")
135
-
136
  # Combine audio chunks
137
  audio_data = np.concatenate(st.session_state.audio_buffer)
138
  st.session_state.audio_buffer = [] # Clear buffer
@@ -148,15 +165,13 @@ if not is_recording and st.session_state.audio_buffer:
148
  transcribed_text = result['text']
149
  except Exception as e:
150
  transcribed_text = f"Transcription failed: {e}"
151
- st.sidebar.subheader("Transcription:")
152
- st.sidebar.write(transcribed_text)
153
 
154
  # 2. Text-based Emotion
155
  with st.spinner("Analyzing text emotion..."):
156
  if transcribed_text:
157
  try:
158
  text_emotions = text_classifier(transcribed_text)[0]
159
- # Map to unified emotions
160
  unified_text_scores = {e: 0.0 for e in UNIFIED_EMOTIONS}
161
  for EMO in text_emotions:
162
  unified_emo = TEXT_TO_UNIFIED.get(EMO['label'])
@@ -168,8 +183,7 @@ if not is_recording and st.session_state.audio_buffer:
168
  dominant_text_emotion = f"Text analysis failed: {e}"
169
  else:
170
  dominant_text_emotion = "No text to analyze."
171
- st.sidebar.subheader("Text Emotion:")
172
- st.sidebar.write(dominant_text_emotion.capitalize())
173
 
174
  # 3. Speech Emotion Recognition (SER)
175
  with st.spinner("Analyzing speech emotion..."):
@@ -180,7 +194,6 @@ if not is_recording and st.session_state.audio_buffer:
180
  logits = ser_model(**inputs).logits
181
  scores = torch.nn.functional.softmax(logits, dim=1).squeeze()
182
 
183
- # Map to unified emotions
184
  unified_ser_scores = {e: 0.0 for e in UNIFIED_EMOTIONS}
185
  for i, score in enumerate(scores):
186
  raw_emo = ser_model.config.id2label[i]
@@ -191,8 +204,7 @@ if not is_recording and st.session_state.audio_buffer:
191
  dominant_ser_emotion = max(unified_ser_scores, key=unified_ser_scores.get)
192
  except Exception as e:
193
  dominant_ser_emotion = f"Speech analysis failed: {e}"
194
- st.sidebar.subheader("Speech Emotion:")
195
- st.sidebar.write(dominant_ser_emotion.capitalize())
196
 
197
  # Clean up the temporary file
198
  os.unlink(tmp_audio_file.name)
 
60
  ser_feature_extractor = AutoFeatureExtractor.from_pretrained(ser_model_name)
61
  ser_model = AutoModelForAudioClassification.from_pretrained(ser_model_name)
62
 
63
+ # NEW: Pre-load the DeepFace model to prevent lag on first use
64
+ DeepFace.build_model("Emotion")
65
+
66
  return whisper_model, text_classifier, ser_model, ser_feature_extractor
67
 
68
  whisper_model, text_classifier, ser_model, ser_feature_extractor = load_models()
 
126
  async_processing=True,
127
  )
128
 
129
+ # --- UI Layout ---
130
  st.sidebar.header("Facial Emotion")
131
  st.sidebar.metric("Current Expression", st.session_state.get('facial_emotion', 'N/A'))
132
  st.sidebar.info("Facial emotion is updated every 5 seconds to optimize performance.")
133
+ st.sidebar.divider()
134
 
135
  st.sidebar.header("Audio Analysis")
136
+ is_recording = st.sidebar.checkbox("Start Recording Audio")
137
+
138
+ # NEW: Set up placeholders for audio results
139
+ st.sidebar.subheader("Transcription:")
140
+ transcription_placeholder = st.sidebar.empty()
141
+ transcription_placeholder.write("_Waiting for audio..._")
142
+
143
+ st.sidebar.subheader("Text Emotion:")
144
+ text_emotion_placeholder = st.sidebar.empty()
145
+ text_emotion_placeholder.write("_Waiting for audio..._")
146
+
147
+ st.sidebar.subheader("Speech Emotion:")
148
+ ser_placeholder = st.sidebar.empty()
149
+ ser_placeholder.write("_Waiting for audio..._")
150
+
151
 
152
  if not is_recording and st.session_state.audio_buffer:
 
 
153
  # Combine audio chunks
154
  audio_data = np.concatenate(st.session_state.audio_buffer)
155
  st.session_state.audio_buffer = [] # Clear buffer
 
165
  transcribed_text = result['text']
166
  except Exception as e:
167
  transcribed_text = f"Transcription failed: {e}"
168
+ transcription_placeholder.write(f'"{transcribed_text}"')
 
169
 
170
  # 2. Text-based Emotion
171
  with st.spinner("Analyzing text emotion..."):
172
  if transcribed_text:
173
  try:
174
  text_emotions = text_classifier(transcribed_text)[0]
 
175
  unified_text_scores = {e: 0.0 for e in UNIFIED_EMOTIONS}
176
  for EMO in text_emotions:
177
  unified_emo = TEXT_TO_UNIFIED.get(EMO['label'])
 
183
  dominant_text_emotion = f"Text analysis failed: {e}"
184
  else:
185
  dominant_text_emotion = "No text to analyze."
186
+ text_emotion_placeholder.write(dominant_text_emotion.capitalize())
 
187
 
188
  # 3. Speech Emotion Recognition (SER)
189
  with st.spinner("Analyzing speech emotion..."):
 
194
  logits = ser_model(**inputs).logits
195
  scores = torch.nn.functional.softmax(logits, dim=1).squeeze()
196
 
 
197
  unified_ser_scores = {e: 0.0 for e in UNIFIED_EMOTIONS}
198
  for i, score in enumerate(scores):
199
  raw_emo = ser_model.config.id2label[i]
 
204
  dominant_ser_emotion = max(unified_ser_scores, key=unified_ser_scores.get)
205
  except Exception as e:
206
  dominant_ser_emotion = f"Speech analysis failed: {e}"
207
+ ser_placeholder.write(dominant_ser_emotion.capitalize())
 
208
 
209
  # Clean up the temporary file
210
  os.unlink(tmp_audio_file.name)
requirements.txt CHANGED
@@ -11,4 +11,5 @@ torchaudio
11
  openai-whisper
12
  soundfile
13
  librosa
14
- scipy
 
 
11
  openai-whisper
12
  soundfile
13
  librosa
14
+ scipy
15
+ streamlit-autorefresh