Spaces:

kingkw1
/

AffectLink

Sleeping

App Files Files Community

Kevin King commited on Aug 3, 2025

Commit

d2457b5

verified ·

1 Parent(s): 66b390c

Upload 3 files

Browse files

reworked app.py file as a skeleton

Files changed (2) hide show

app.py +23 -11
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -60,6 +60,9 @@ def load_models():
         ser_feature_extractor = AutoFeatureExtractor.from_pretrained(ser_model_name)
         ser_model = AutoModelForAudioClassification.from_pretrained(ser_model_name)
         return whisper_model, text_classifier, ser_model, ser_feature_extractor
 whisper_model, text_classifier, ser_model, ser_feature_extractor = load_models()
@@ -123,16 +126,30 @@ webrtc_streamer(
     async_processing=True,
 )
 st.sidebar.header("Facial Emotion")
 st.sidebar.metric("Current Expression", st.session_state.get('facial_emotion', 'N/A'))
 st.sidebar.info("Facial emotion is updated every 5 seconds to optimize performance.")
 st.sidebar.header("Audio Analysis")
-is_recording = st.sidebar.checkbox("Start Recording")
 if not is_recording and st.session_state.audio_buffer:
-    st.sidebar.info("Processing audio... please wait.")
     # Combine audio chunks
     audio_data = np.concatenate(st.session_state.audio_buffer)
     st.session_state.audio_buffer = []  # Clear buffer
@@ -148,15 +165,13 @@ if not is_recording and st.session_state.audio_buffer:
                 transcribed_text = result['text']
             except Exception as e:
                 transcribed_text = f"Transcription failed: {e}"
-            st.sidebar.subheader("Transcription:")
-            st.sidebar.write(transcribed_text)
         # 2. Text-based Emotion
         with st.spinner("Analyzing text emotion..."):
             if transcribed_text:
                 try:
                     text_emotions = text_classifier(transcribed_text)[0]
-                    # Map to unified emotions
                     unified_text_scores = {e: 0.0 for e in UNIFIED_EMOTIONS}
                     for EMO in text_emotions:
                         unified_emo = TEXT_TO_UNIFIED.get(EMO['label'])
@@ -168,8 +183,7 @@ if not is_recording and st.session_state.audio_buffer:
                     dominant_text_emotion = f"Text analysis failed: {e}"
             else:
                 dominant_text_emotion = "No text to analyze."
-            st.sidebar.subheader("Text Emotion:")
-            st.sidebar.write(dominant_text_emotion.capitalize())
         # 3. Speech Emotion Recognition (SER)
         with st.spinner("Analyzing speech emotion..."):
@@ -180,7 +194,6 @@ if not is_recording and st.session_state.audio_buffer:
                     logits = ser_model(**inputs).logits
                 scores = torch.nn.functional.softmax(logits, dim=1).squeeze()
-                # Map to unified emotions
                 unified_ser_scores = {e: 0.0 for e in UNIFIED_EMOTIONS}
                 for i, score in enumerate(scores):
                     raw_emo = ser_model.config.id2label[i]
@@ -191,8 +204,7 @@ if not is_recording and st.session_state.audio_buffer:
                 dominant_ser_emotion = max(unified_ser_scores, key=unified_ser_scores.get)
             except Exception as e:
                 dominant_ser_emotion = f"Speech analysis failed: {e}"
-            st.sidebar.subheader("Speech Emotion:")
-            st.sidebar.write(dominant_ser_emotion.capitalize())
         # Clean up the temporary file
         os.unlink(tmp_audio_file.name)

         ser_feature_extractor = AutoFeatureExtractor.from_pretrained(ser_model_name)
         ser_model = AutoModelForAudioClassification.from_pretrained(ser_model_name)
+        # NEW: Pre-load the DeepFace model to prevent lag on first use
+        DeepFace.build_model("Emotion")
         return whisper_model, text_classifier, ser_model, ser_feature_extractor
 whisper_model, text_classifier, ser_model, ser_feature_extractor = load_models()
     async_processing=True,
 )
+# --- UI Layout ---
 st.sidebar.header("Facial Emotion")
 st.sidebar.metric("Current Expression", st.session_state.get('facial_emotion', 'N/A'))
 st.sidebar.info("Facial emotion is updated every 5 seconds to optimize performance.")
+st.sidebar.divider()
 st.sidebar.header("Audio Analysis")
+is_recording = st.sidebar.checkbox("Start Recording Audio")
+# NEW: Set up placeholders for audio results
+st.sidebar.subheader("Transcription:")
+transcription_placeholder = st.sidebar.empty()
+transcription_placeholder.write("_Waiting for audio..._")
+st.sidebar.subheader("Text Emotion:")
+text_emotion_placeholder = st.sidebar.empty()
+text_emotion_placeholder.write("_Waiting for audio..._")
+st.sidebar.subheader("Speech Emotion:")
+ser_placeholder = st.sidebar.empty()
+ser_placeholder.write("_Waiting for audio..._")
 if not is_recording and st.session_state.audio_buffer:
     # Combine audio chunks
     audio_data = np.concatenate(st.session_state.audio_buffer)
     st.session_state.audio_buffer = []  # Clear buffer
                 transcribed_text = result['text']
             except Exception as e:
                 transcribed_text = f"Transcription failed: {e}"
+            transcription_placeholder.write(f'"{transcribed_text}"')
         # 2. Text-based Emotion
         with st.spinner("Analyzing text emotion..."):
             if transcribed_text:
                 try:
                     text_emotions = text_classifier(transcribed_text)[0]
                     unified_text_scores = {e: 0.0 for e in UNIFIED_EMOTIONS}
                     for EMO in text_emotions:
                         unified_emo = TEXT_TO_UNIFIED.get(EMO['label'])
                     dominant_text_emotion = f"Text analysis failed: {e}"
             else:
                 dominant_text_emotion = "No text to analyze."
+            text_emotion_placeholder.write(dominant_text_emotion.capitalize())
         # 3. Speech Emotion Recognition (SER)
         with st.spinner("Analyzing speech emotion..."):
                     logits = ser_model(**inputs).logits
                 scores = torch.nn.functional.softmax(logits, dim=1).squeeze()
                 unified_ser_scores = {e: 0.0 for e in UNIFIED_EMOTIONS}
                 for i, score in enumerate(scores):
                     raw_emo = ser_model.config.id2label[i]
                 dominant_ser_emotion = max(unified_ser_scores, key=unified_ser_scores.get)
             except Exception as e:
                 dominant_ser_emotion = f"Speech analysis failed: {e}"
+            ser_placeholder.write(dominant_ser_emotion.capitalize())
         # Clean up the temporary file
         os.unlink(tmp_audio_file.name)

requirements.txt CHANGED Viewed

@@ -11,4 +11,5 @@ torchaudio
 openai-whisper
 soundfile
 librosa
-scipy

 openai-whisper
 soundfile
 librosa
+scipy
+streamlit-autorefresh