Spaces:

ashutoshroy02
/

research-emotion-wave2vec

Sleeping

App Files Files Community

ashutoshroy02 commited on Dec 14, 2025

Commit

8fed6be

verified ·

1 Parent(s): 0f74a15

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +75 -15

src/streamlit_app.py CHANGED Viewed

@@ -2,8 +2,13 @@ import streamlit as st
 import torch
 import librosa
 import numpy as np
 from transformers import Wav2Vec2Processor
 from huggingface_hub import hf_hub_download
 # -------------------------
 # CONFIG
@@ -13,10 +18,10 @@ MODEL_FILE = "model.pt"
 st.set_page_config(page_title="Emotion & Stress Detection", layout="centered")
 st.title("🎤 Emotion & Stress Detection")
-st.write("Upload or record audio to detect emotion and stress")
 # -------------------------
-# LOAD MODEL
 # -------------------------
 @st.cache_resource
 def load_model():
@@ -31,9 +36,10 @@ def load_model():
     id2emotion = {v: k for k, v in emotion2id.items()}
     num_emotions = checkpoint["num_emotions"]
-    from model import Wav2Vec2_LSTM_MultiTask
-    processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base")
     model = Wav2Vec2_LSTM_MultiTask(num_emotions)
     model.load_state_dict(checkpoint["model_state"])
     model.eval()
@@ -41,23 +47,26 @@ def load_model():
     return model, processor, id2emotion
-# -------------------------
-# LOAD MODEL ON START
-# -------------------------
 with st.spinner("Loading model..."):
     model, processor, id2emotion = load_model()
 st.success("Model loaded successfully")
 # -------------------------
-# AUDIO INPUT
 # -------------------------
-uploaded_file = st.file_uploader("Upload a WAV file", type=["wav"])
-if uploaded_file is not None:
-    st.audio(uploaded_file)
-    audio, _ = librosa.load(uploaded_file, sr=16000)
     inputs = processor(
         audio,
@@ -71,6 +80,57 @@ if uploaded_file is not None:
     emotion = id2emotion[emotion_logits.argmax(dim=1).item()]
     stress = round(stress_pred.item(), 3)
-    st.subheader("🧠 Prediction")
-    st.write(f"**Emotion:** {emotion}")
-    st.write(f"**Stress Level:** {stress}")

 import torch
 import librosa
 import numpy as np
+import tempfile
 from transformers import Wav2Vec2Processor
 from huggingface_hub import hf_hub_download
+from pydub import AudioSegment
+from streamlit_mic_recorder import mic_recorder
+from model import Wav2Vec2_LSTM_MultiTask
 # -------------------------
 # CONFIG
 st.set_page_config(page_title="Emotion & Stress Detection", layout="centered")
 st.title("🎤 Emotion & Stress Detection")
+st.write("Record live audio or upload any audio file")
 # -------------------------
+# LOAD MODEL (CACHED)
 # -------------------------
 @st.cache_resource
 def load_model():
     id2emotion = {v: k for k, v in emotion2id.items()}
     num_emotions = checkpoint["num_emotions"]
+    processor = Wav2Vec2Processor.from_pretrained(
+        "facebook/wav2vec2-base"
+    )
     model = Wav2Vec2_LSTM_MultiTask(num_emotions)
     model.load_state_dict(checkpoint["model_state"])
     model.eval()
     return model, processor, id2emotion
 with st.spinner("Loading model..."):
     model, processor, id2emotion = load_model()
 st.success("Model loaded successfully")
 # -------------------------
+# AUDIO UTILITIES
 # -------------------------
+def convert_to_wav(audio_bytes):
+    """Convert any audio format to WAV (16kHz, mono)"""
+    audio = AudioSegment.from_file(audio_bytes)
+    audio = audio.set_channels(1).set_frame_rate(16000)
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+    audio.export(tmp.name, format="wav")
+    return tmp.name
+def predict_from_audio(audio_path):
+    audio, _ = librosa.load(audio_path, sr=16000)
     inputs = processor(
         audio,
     emotion = id2emotion[emotion_logits.argmax(dim=1).item()]
     stress = round(stress_pred.item(), 3)
+    return emotion, stress
+# -------------------------
+# UI TABS
+# -------------------------
+tab1, tab2 = st.tabs(["🎙️ Live Record", "📁 Upload Audio"])
+# =========================
+# 🎙️ LIVE RECORD TAB
+# =========================
+with tab1:
+    st.subheader("Record Live Audio")
+    audio_data = mic_recorder(
+        start_prompt="🎙️ Start Recording",
+        stop_prompt="⏹️ Stop Recording",
+        just_once=True,
+        use_container_width=True
+    )
+    if audio_data:
+        st.audio(audio_data["bytes"])
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+            f.write(audio_data["bytes"])
+            wav_path = f.name
+        emotion, stress = predict_from_audio(wav_path)
+        st.subheader("🧠 Prediction")
+        st.write(f"**Emotion:** {emotion}")
+        st.write(f"**Stress Level:** {stress}")
+# =========================
+# 📁 UPLOAD FILE TAB
+# =========================
+with tab2:
+    st.subheader("Upload Audio File")
+    uploaded_file = st.file_uploader(
+        "Upload audio (.wav, .mp3, .m4a, .flac)",
+        type=["wav", "mp3", "m4a", "flac"]
+    )
+    if uploaded_file:
+        st.audio(uploaded_file)
+        wav_path = convert_to_wav(uploaded_file)
+        emotion, stress = predict_from_audio(wav_path)
+        st.subheader("🧠 Prediction")
+        st.write(f"**Emotion:** {emotion}")
+        st.write(f"**Stress Level:** {stress}")