Voice-Emotion

Sleeping

App Files Files Community

aniruddhakumarpaul commited on Dec 21, 2025

Commit

6650f28

1 Parent(s): cee4eb8

Save local changes before LFS migration

Browse files

Files changed (8) hide show

backend/__init__.py +0 -0
backend/model_manager.py +50 -17
backend/reproduce_error.py +0 -31
backend/requirements.txt +8 -13
backend/test_training.py +0 -37
deploy_to_hf.py +24 -0
encoder.pkl +0 -0
streamlit_app.py +364 -0

backend/__init__.py ADDED Viewed

File without changes

backend/model_manager.py CHANGED Viewed

@@ -12,6 +12,7 @@ from tensorflow.keras.callbacks import EarlyStopping
 import pickle
 import joblib
 from joblib import Parallel, delayed
 # Parameters
 MAX_PAD_LEN = 174
@@ -30,10 +31,38 @@ def extract_features_static(file_path, duration=DURATION, sample_rate=SAMPLE_RAT
         # Normalize path
         file_path = os.path.normpath(os.path.abspath(file_path))
-        # Load audio
-        # res_type='kaiser_fast' is faster
-        audio, sr = librosa.load(file_path, res_type='kaiser_fast', duration=duration, sr=sample_rate)
         mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
         pad_width = max_pad_len - mfccs.shape[1]
         if pad_width > 0:
@@ -45,6 +74,21 @@ def extract_features_static(file_path, duration=DURATION, sample_rate=SAMPLE_RAT
         print(f"Error extracting features from {file_path}: {e}")
         return None
 class EmotionClassifier:
     def __init__(self):
         self.model = None
@@ -108,22 +152,11 @@ class EmotionClassifier:
                 log("DEBUG: No .wav files found in os.walk")
                 raise ValueError("No .wav files found for training.")
-            from .utils import get_label_from_filename
             log(f"Processing {len(files)} files for training utilizing parallel processing...")
-            # Helper to process a single file and return (features, label)
-            def process_file(file):
-                lbl = get_label_from_filename(file)
-                if lbl:
-                    feat = extract_features_static(file)
-                    if feat is not None:
-                        return (feat, lbl)
-                return None
             # Run in parallel
-            # n_jobs=-1 uses all available cores
-            results = Parallel(n_jobs=-1, verbose=5)(delayed(process_file)(f) for f in files)
             # Filter None results
             valid_results = [r for r in results if r is not None]

 import pickle
 import joblib
 from joblib import Parallel, delayed
+from .utils import get_label_from_filename
 # Parameters
 MAX_PAD_LEN = 174
         # Normalize path
         file_path = os.path.normpath(os.path.abspath(file_path))
+        audio = None
+        sr = sample_rate
+        # Try loading with librosa first
+        try:
+            audio, sr = librosa.load(file_path, res_type='kaiser_fast', duration=duration, sr=sample_rate)
+        except Exception as e_librosa:
+            print(f"Librosa load failed for {file_path}: {e_librosa}. Trying soundfile...")
+            try:
+                import soundfile as sf
+                audio, file_sr = sf.read(file_path)
+                # If we read successfuly, we might need to resample or crop/pad
+                if len(audio.shape) > 1:
+                    audio = audio[:, 0] # Take first channel if stereo
+                # Simple resampling if needed (though librosa is better at this, we can try to use librosa.resample if load failed but resample works)
+                if file_sr != sample_rate:
+                    audio = librosa.resample(audio, orig_sr=file_sr, target_sr=sample_rate)
+                # Manual duration crop
+                max_samples = int(duration * sample_rate)
+                if len(audio) > max_samples:
+                    audio = audio[:max_samples]
+                sr = sample_rate
+            except Exception as e_sf:
+                print(f"Soundfile fallback also failed for {file_path}: {e_sf}")
+                return None
+        if audio is None:
+             return None
         mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
         pad_width = max_pad_len - mfccs.shape[1]
         if pad_width > 0:
         print(f"Error extracting features from {file_path}: {e}")
         return None
+def process_file(file):
+    """
+    Helper to process a single file and return (features, label).
+    Must be at module level for joblib on Windows.
+    """
+    try:
+        lbl = get_label_from_filename(file)
+        if lbl:
+            feat = extract_features_static(file)
+            if feat is not None:
+                return (feat, lbl)
+    except Exception as e:
+        print(f"Error processing {file}: {e}")
+    return None
 class EmotionClassifier:
     def __init__(self):
         self.model = None
                 log("DEBUG: No .wav files found in os.walk")
                 raise ValueError("No .wav files found for training.")
             log(f"Processing {len(files)} files for training utilizing parallel processing...")
             # Run in parallel
+            # n_jobs=1 avoids Windows multiprocessing issues
+            results = Parallel(n_jobs=1, verbose=5)(delayed(process_file)(f) for f in files)
             # Filter None results
             valid_results = [r for r in results if r is not None]

backend/reproduce_error.py DELETED Viewed

@@ -1,31 +0,0 @@
-import os
-import librosa
-import traceback
-import soundfile as sf
-# Path to the specific file
-file_path = r"c:\Users\aniru\OneDrive\Desktop\EDUVN\data sets\Actor_01\03-01-01-01-01-01-01.wav"
-print(f"Testing loading: {file_path}")
-print(f"Does file exist? {os.path.exists(file_path)}")
-try:
-    # Mimic parameters from model_manager.py
-    DURATION = 3
-    SAMPLE_RATE = 22050
-    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast', duration=DURATION, sr=SAMPLE_RATE)
-    print("Success! Audio loaded.")
-    print(f"Shape: {audio.shape}, Sample Rate: {sample_rate}")
-except Exception as e:
-    print("FAILED to load audio.")
-    print(f"Error: {e}")
-    traceback.print_exc()
-print("-" * 20)
-print("Testing soundfile directly...")
-try:
-    data, samplerate = sf.read(file_path)
-    print(f"Soundfile read success. Shape: {data.shape}, Rate: {samplerate}")
-except Exception as e:
-    print(f"Soundfile direct read failed: {e}")

backend/requirements.txt CHANGED Viewed

@@ -1,15 +1,10 @@
-fastapi
-uvicorn
-python-multipart
-tensorflow
-librosa
-pydub
 numpy
-scikit-learn
 soundfile
-joblib
-resampy
-SpeechRecognition
-transformers
-tf-keras
-torch

 numpy
+pandas
+librosa
 soundfile
+tensorflow>=2.10.0
+scikit-learn
+pydub
+streamlit
+streamlit-audiorecorder
+plotly

backend/test_training.py DELETED Viewed

@@ -1,37 +0,0 @@
-import os
-import sys
-# When running as a module 'backend.test_training', we can use relative imports
-try:
-    from .model_manager import EmotionClassifier
-except ImportError:
-    # Fallback if run as script, but this will break relative imports in model_manager
-    # So we must fix path to import 'backend.model_manager'
-    sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
-    from backend.model_manager import EmotionClassifier
-def test_training():
-    print("Initializing Classifier...")
-    classifier = EmotionClassifier()
-    # Correct data path assuming we are in backend/
-    base_dir = os.path.dirname(os.path.abspath(__file__))
-    data_dir = os.path.join(base_dir, "../data sets")
-    print(f"Data directory: {data_dir}")
-    def log_callback(msg):
-        print(f"[TRAIN] {msg}")
-    print("Starting training test...")
-    try:
-        classifier.train_model(data_dir, log_callback=log_callback)
-        print("Training test passed!")
-    except Exception as e:
-        print(f"Training test failed: {e}")
-        import traceback
-        traceback.print_exc()
-if __name__ == "__main__":
-    test_training()

deploy_to_hf.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import os
+from huggingface_hub import HfApi
+def deploy():
+    token = os.environ.get("HF_TOKEN")
+    if not token:
+        raise ValueError("HF_TOKEN environment variable is not set")
+    api = HfApi()
+    # Upload the entire current directory to the Space
+    # exclude .git to avoid recursive confusion
+    print("Starting upload to Hugging Face Space...")
+    api.upload_folder(
+        folder_path=".",
+        repo_id="aniruddhakumarpaul/Vocal-Vibe",
+        repo_type="space",
+        token=token,
+        ignore_patterns=[".git/*", ".github/*", "venv/*", "__pycache__/*", "*.pyc"]
+    )
+    print("Upload complete!")
+if __name__ == "__main__":
+    deploy()

encoder.pkl CHANGED Viewed

Binary files a/encoder.pkl and b/encoder.pkl differ

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,364 @@

+import streamlit as st
+import os
+import numpy as np
+import plotly.graph_objects as go
+import time
+from backend.model_manager import EmotionClassifier
+from audiorecorder import audiorecorder
+# Page Config
+st.set_page_config(
+    page_title="VocalVibe - Emotion Recognition",
+    page_icon="🎙️",
+    layout="wide",
+    initial_sidebar_state="collapsed"
+)
+# Load External CSS
+def load_css(file_name):
+    with open(file_name) as f:
+        st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
+# Inject Custom CSS to override Streamlit defaults and apply original styles
+st.markdown("""
+<style>
+    /* 1. GLOBAL RESETS */
+    .stApp {
+        background-color: #0f172a; /* Dark Blue */
+    }
+    /* 2. BACKGROUND BLOBS ANIMATION */
+    @keyframes float {
+        0%, 100% { transform: translate(0, 0); }
+        50% { transform: translate(30px, -30px); }
+    }
+    @keyframes pulse-glow {
+        0%, 100% { box-shadow: 0 0 20px rgba(99, 102, 241, 0.5); }
+        50% { box-shadow: 0 0 40px rgba(99, 102, 241, 0.8); }
+    }
+    /* Hide Streamlit Default Chrome */
+    #MainMenu, footer, header {visibility: hidden;}
+    /* 3. MAIN GLASS CARD (The .block-container) */
+    .block-container {
+        background: rgba(255, 255, 255, 0.03);
+        backdrop-filter: blur(20px);
+        -webkit-backdrop-filter: blur(20px);
+        border: 1px solid rgba(255, 255, 255, 0.08); /* Faint border */
+        border-radius: 32px; /* Smoother corners */
+        padding: 3rem 2rem !important;
+        /* Layout Constraints */
+        max-width: 480px; /* Tighter width like the image */
+        margin: auto;
+        margin-top: 8vh;
+        box-shadow: 0 20px 50px rgba(0, 0, 0, 0.5); /* Deep shadow */
+    }
+    /* 4. TYPOGRAPHY */
+    h1 {
+        text-align: center;
+        font-family: 'Outfit', sans-serif !important;
+        font-size: 3.2rem !important;
+        color: white !important;
+        margin-bottom: 0px !important;
+        padding: 0 !important;
+        letter-spacing: -1px;
+    }
+    p.subtitle {
+        text-align: center;
+        font-family: 'Outfit', sans-serif !important;
+        color: #94a3b8 !important; /* Muted blue-grey */
+        font-size: 0.95rem !important;
+        margin-top: 5px !important;
+        margin-bottom: 2rem !important;
+        font-weight: 300;
+    }
+    .highlight {
+        background: linear-gradient(135deg, #a855f7 0%, #ec4899 100%); /* Purple to Pink */
+        background-clip: text;
+        -webkit-background-clip: text;
+        -webkit-text-fill-color: transparent;
+        font-weight: 700;
+    }
+    /* 5. TRAIN MODEL BUTTON (Small Pill) */
+    /* Target only the first button (Train) using nth-of-type or specific positioning logic if possible.
+       Since it's the first button in the DOM usually... */
+    div.stButton > button {
+        background: rgba(255, 255, 255, 0.05);
+        color: #818cf8; /* Light Indigo */
+        border: 1px solid rgba(99, 102, 241, 0.3);
+        border-radius: 9999px; /* Full Pill */
+        padding: 0.4rem 1.2rem;
+        font-size: 0.75rem;
+        font-family: 'Outfit', sans-serif;
+        text-transform: uppercase;
+        letter-spacing: 1px;
+        margin: 0 auto;
+        display: block;
+    }
+    div.stButton > button:hover {
+        background: rgba(99, 102, 241, 0.2);
+        border-color: #818cf8;
+        color: white;
+    }
+    /* 6. RECORDER AREA */
+    /* Since we can't fully style the iframe button, we create a visual wrapper 'look' around it or center it perfectly */
+    .recorder-wrapper {
+        display: flex;
+        justify-content: center;
+        align-items: center;
+        margin: 2rem 0;
+        flex-direction: column;
+    }
+    .recorder-label {
+        color: white;
+        font-family: 'Outfit', sans-serif;
+        font-size: 0.9rem;
+        margin-top: 1rem;
+    }
+    /* 7. DIVIDER */
+    .divider-box {
+        display: flex;
+        align-items: center;
+        color: #475569;
+        font-family: 'Outfit', sans-serif;
+        font-size: 0.8rem;
+        margin: 2.5rem 0;
+    }
+    .divider-line {
+        flex-grow: 1;
+        height: 1px;
+        background-color: #334155;
+    }
+    .divider-text {
+        margin: 0 1rem;
+    }
+    /* 8. UPLOAD BOX (Dashed) */
+    [data-testid='stFileUploader'] {
+        border: 2px dashed #334155; /* Darker dash */
+        border-radius: 20px;
+        padding: 1.5rem;
+        background-color: rgba(15, 23, 42, 0.3); /* Dark fill */
+    }
+    [data-testid='stFileUploader'] section {
+        padding: 0;
+    }
+    /* Hide the 'Drag and drop file here' default text via font-size 0 hack?
+       No, that hides the file name too on upload.
+       We rely on adding our custom specific label. */
+    /* 9. WATERMARK Pill */
+    .watermark-pill {
+        position: fixed;
+        bottom: 30px;
+        right: 30px;
+        background: rgba(15, 23, 42, 0.6);
+        border: 1px solid rgba(255,255,255,0.1);
+        padding: 8px 16px;
+        border-radius: 20px;
+        color: #cbd5e1;
+        font-family: 'Outfit', sans-serif;
+        font-size: 0.8rem;
+        backdrop-filter: blur(4px);
+    }
+</style>
+""", unsafe_allow_html=True)
+# Google Fonts
+st.markdown('<link href="https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;600;700&display=swap" rel="stylesheet">', unsafe_allow_html=True)
+# FontAwesome
+st.markdown('<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">', unsafe_allow_html=True)
+# Initialize Classifier
+@st.cache_resource
+def get_classifier():
+    return EmotionClassifier()
+try:
+    classifier = get_classifier()
+except Exception as e:
+    st.error(f"Failed to load model: {e}")
+    st.stop()
+def main():
+    # Background Blobs (Fixed Position)
+    st.markdown("""
+    <div style="position: fixed; top: -100px; left: -100px; width: 500px; height: 500px; background: radial-gradient(circle, rgba(99,102,241,0.4) 0%, rgba(0,0,0,0) 70%); z-index: -1; animation: float 8s ease-in-out infinite;"></div>
+    <div style="position: fixed; bottom: -100px; right: -100px; width: 600px; height: 600px; background: radial-gradient(circle, rgba(168,85,247,0.3) 0%, rgba(0,0,0,0) 70%); z-index: -1; animation: float 12s ease-in-out infinite alternate;"></div>
+    """, unsafe_allow_html=True)
+    # --- CONTENT INSIDE GLASS CARD ---
+    # 1. Header
+    st.markdown("""
+        <h1>Vocal<span class="highlight">Vibe</span></h1>
+        <p class="subtitle">AI-Powered Speech Emotion Recognition</p>
+    """, unsafe_allow_html=True)
+    # 2. Train Model Button (Pill)
+    # Using a column to ensure it centers nicely if the CSS margin check fails
+    col_t1, col_t2, col_t3 = st.columns([1, 1, 1])
+    with col_t2:
+        if st.button("Training Mode 🔒"):
+            st.toast("Admin access required for training.", icon="⚠️")
+    # 3. Recorder Section (The Big Feature)
+    st.markdown("<br>", unsafe_allow_html=True)
+    # We use columns to center the recorder component
+    col_r1, col_r2, col_r3 = st.columns([1, 1.5, 1])
+    with col_r2:
+        # User requested "Click & Hold". The lib allows 'Click to record'.
+        # We can't change the component's internal logic, but we can match the text.
+        audio = audiorecorder("Click & Hold to Record", "Recording...")
+    # 4. Divider
+    st.markdown("""
+    <div class="divider-box">
+        <div class="divider-line"></div>
+        <div class="divider-text">OR</div>
+        <div class="divider-line"></div>
+    </div>
+    """, unsafe_allow_html=True)
+    # 5. Upload Section
+    # Custom Icon Header for the Upload Box
+    st.markdown("""
+    <div style="text-align: center; color: #cbd5e1; margin-bottom: 5px; font-size: 1.2rem;">
+        <i class="fa-solid fa-cloud-arrow-up"></i>
+    </div>
+    """, unsafe_allow_html=True)
+    uploaded_file = st.file_uploader("Upload Audio (WAV)", type=['wav'], label_visibility="collapsed")
+    if not uploaded_file:
+         st.markdown("""
+         <div style="text-align: center; color: #64748b; font-size: 0.8rem; margin-top: -10px;">
+            Drag & Drop Audio File <br>
+            <span style="font-size: 0.7rem; opacity: 0.7;">Limit 200MB per file • WAV</span>
+         </div>
+         """, unsafe_allow_html=True)
+    # --- PROCESSING ---
+    audio_file = None
+    source_type = ""
+    if len(audio) > 0:
+        ts = int(time.time())
+        temp_filename = f"temp_rec_{ts}.wav"
+        audio.export(temp_filename, format="wav")
+        audio_file = temp_filename
+        source_type = "recording"
+    if uploaded_file is not None:
+        audio_file = uploaded_file
+        source_type = "upload"
+    if audio_file:
+        st.markdown("---")
+        st.audio(audio_file)
+        # We style the Analyze button to look like a primary action
+        # Streamlit 'primary' type is usually red/pink. We use custom CSS above to target it if needed,
+        # or just accept the default primary which is often red/orange in dark mode, but we can verify.
+        if st.button("Analyze Emotion", type="primary", use_container_width=True):
+             with st.spinner("Processing Audio..."):
+                try:
+                    target_path = "temp_analysis.wav"
+                    if source_type == "upload":
+                        with open(target_path, "wb") as f:
+                            f.write(audio_file.getbuffer())
+                    else:
+                        target_path = audio_file
+                    result = classifier.predict_emotion(target_path)
+                    if os.path.exists(target_path) and target_path != audio_file:
+                        try: os.remove(target_path)
+                        except: pass
+                    # Result Card
+                    st.markdown(f"""
+                    <div style="background: rgba(255,255,255,0.05); border: 1px solid rgba(255,255,255,0.1); border-radius: 24px; padding: 2rem; text-align: center; margin-top: 2rem;">
+                        <div style="font-size: 4rem; margin-bottom: 10px; filter: drop-shadow(0 0 20px rgba(255,255,255,0.3));">{get_emoji(result['label'])}</div>
+                        <h2 style="font-family: 'Outfit'; color: white; margin: 0; font-size: 2rem; letter-spacing: 1px;">{result['label'].title()}</h2>
+                        <div style="display: inline-block; background: rgba(99, 102, 241, 0.2); color: #818cf8; padding: 4px 12px; border-radius: 20px; font-size: 0.8rem; margin-top: 10px;">
+                            Confidence: {result['confidence']:.1%}
+                        </div>
+                    </div>
+                    """, unsafe_allow_html=True)
+                    # Plot
+                    dist = result['distribution']
+                    labels = list(dist.keys())
+                    values = list(dist.values())
+                    fig = go.Figure(data=[go.Bar(
+                        x=labels, y=values,
+                        marker_color=['#a855f7' if l == result['label'] else '#334155' for l in labels],
+                        text=[f"{v:.0%}" for v in values],
+                        textposition='auto',
+                    )])
+                    fig.update_layout(
+                        paper_bgcolor='rgba(0,0,0,0)',
+                        plot_bgcolor='rgba(0,0,0,0)',
+                        font=dict(color='#94a3b8', family="Outfit"),
+                        height=220,
+                        margin=dict(l=10, r=10, t=10, b=10),
+                        yaxis=dict(showgrid=False, showticklabels=False),
+                        xaxis=dict(showgrid=False)
+                    )
+                    st.plotly_chart(fig, use_container_width=True)
+                    # Feedback
+                    with st.expander("📝 Provide Feedback"):
+                        correct_label = st.selectbox("Actual Emotion", options=classifier.le.classes_)
+                        if st.button("Submit Feedback"):
+                            save_feedback(audio_file, correct_label, source_type)
+                            st.success("Thank you for your feedback!")
+                except Exception as e:
+                    st.error(f"Analysis Failed: {e}")
+    # WATERMARK
+    st.markdown("""
+    <div class="watermark-pill">
+        Made by : Aniruddha Paul
+    </div>
+    """, unsafe_allow_html=True)
+def get_emoji(label):
+    emojis = {
+        'anger': '😠', 'neutral': '😐', 'sadness': '😢', 'happiness': '😊',
+        'fear': '😨', 'disgust': '🤢', 'surprise': '😲', 'calm': '😌'
+    }
+    return emojis.get(label, '🤔')
+def save_feedback(audio_source, label, source_type):
+    try:
+        feedback_dir = os.path.join("data sets", "user_feedback")
+        os.makedirs(feedback_dir, exist_ok=True)
+        timestamp = int(time.time())
+        filename = f"feedback_{label}_{timestamp}.wav"
+        target_path = os.path.join(feedback_dir, filename)
+        if source_type == "upload":
+             with open(target_path, "wb") as f:
+                f.write(audio_source.getbuffer())
+        else:
+             import shutil
+             shutil.copy(audio_source, target_path)
+    except Exception as e:
+        st.error(f"Could not save feedback: {e}")
+if __name__ == "__main__":
+    main()