Voice-Emotion

Sleeping

App Files Files Community

aniruddhakumarpaul commited on Dec 21, 2025

Commit

b0f32b1

0 Parent(s):

Initial commit - VocalVibe App

Browse files

Files changed (23) hide show

.gitattributes +2 -0
.gitignore +22 -0
README.md +23 -0
backend/evaluate_model.py +68 -0
backend/main.py +288 -0
backend/model_manager.py +216 -0
backend/nlp_manager.py +79 -0
backend/reproduce_error.py +31 -0
backend/requirements.txt +15 -0
backend/test_caching.py +68 -0
backend/test_nlp.py +39 -0
backend/test_prediction.py +50 -0
backend/test_training.py +37 -0
backend/utils.py +57 -0
encoder.pkl +0 -0
frontend/favicon.ico +14 -0
frontend/index.html +131 -0
frontend/script.js +400 -0
frontend/style.css +649 -0
frontend/style.css_append +41 -0
frontend/test_prompt.html +14 -0
model.h5 +3 -0
run_app.ps1 +17 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.h5 filter=lfs diff=lfs merge=lfs -text
2	+ model.h5 filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,22 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+# Virtual Environment
+venv/
+env/
+.env
+# Data
+data sets/
+data/
+*.wav
+# IDE
+.vscode/
+.idea/
+# OS
+Thumbs.db
+Desktop.ini

README.md ADDED Viewed

	@@ -0,0 +1,23 @@

+# VocalVibe - Emotion Recognition App
+## How to Run
+1. Open a terminal in the project directory.
+2. Run the startup script:
+   ```powershell
+   .\run_app.ps1
+   ```
+   *Note: If you encounter permission errors, use:*
+   ```powershell
+   PowerShell.exe -ExecutionPolicy Bypass -File .\run_app.ps1
+   ```
+3. The application will start at [http://localhost:8000](http://localhost:8000).
+## How to Stop
+1. Go to the terminal window where the server is running.
+2. Press **Ctrl + C** to stop the process.
+3. If prompted "Terminate batch job (Y/N)?", type `Y` and press Enter.
+## Project Structure
+- `backend/`: FastAPI server and emotion recognition logic.
+- `frontend/`: HTML/CSS/JS user interface.
+- `data/`: Directory for storing audio samples and feedback.

backend/evaluate_model.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+import sys
+import numpy as np
+import pickle
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, accuracy_score, f1_score
+from tensorflow.keras.models import load_model
+from tensorflow.keras.utils import to_categorical
+# Data Paths
+BASE_DIR = os.path.dirname(os.path.abspath(__file__)) # This is backend/
+ROOT_DIR = os.path.dirname(BASE_DIR) # This is project root
+DATA_DIR = os.path.join(ROOT_DIR, "data sets")
+MODEL_PATH = os.path.join(ROOT_DIR, "model.h5")
+ENCODER_PATH = os.path.join(ROOT_DIR, "encoder.pkl")
+FEATURES_PATH = os.path.join(DATA_DIR, "features_cache.npy")
+LABELS_PATH = os.path.join(DATA_DIR, "labels_cache.npy")
+def evaluate():
+    print("Loading data from cache...")
+    if not os.path.exists(FEATURES_PATH) or not os.path.exists(LABELS_PATH):
+        print("Error: Cached features not found. Please train the model first.")
+        return
+    X = np.load(FEATURES_PATH)
+    y = np.load(LABELS_PATH)
+    print(f"Loaded {len(X)} samples.")
+    print("Loading Label Encoder...")
+    with open(ENCODER_PATH, 'rb') as f:
+        le = pickle.load(f)
+    # Encode labels (Same logic as training)
+    y_encoded = to_categorical(le.fit_transform(y))
+    # Split (Same random_state as training to ensure same test set)
+    print("Splitting data (random_state=42)...")
+    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y)
+    print(f"Test Set Size: {len(X_test)}")
+    print("Loading Model...")
+    model = load_model(MODEL_PATH)
+    print("Evaluating...")
+    # Predict
+    y_pred_prob = model.predict(X_test, verbose=0)
+    y_pred = np.argmax(y_pred_prob, axis=1)
+    y_true = np.argmax(y_test, axis=1)
+    # Calculate Metrics
+    accuracy = accuracy_score(y_true, y_pred)
+    f1 = f1_score(y_true, y_pred, average='weighted')
+    print("\n" + "="*30)
+    print(f"Accuracy: {accuracy:.4f}")
+    print(f"F1 Score (Weighted): {f1:.4f}")
+    print("="*30 + "\n")
+    # Detailed Report
+    target_names = le.classes_
+    print("Classification Report:")
+    print(classification_report(y_true, y_pred, target_names=target_names))
+if __name__ == "__main__":
+    evaluate()

backend/main.py ADDED Viewed

	@@ -0,0 +1,288 @@

+import os
+import time
+import shutil
+from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from .model_manager import EmotionClassifier
+from .nlp_manager import NLPManager
+from .utils import convert_to_wav
+# ... imports ...
+app = FastAPI(title="Speech Emotion Recognition API")
+# ... middleware ...
+# Initialize Models
+print("Loading Audio Model...")
+classifier = EmotionClassifier()
+print("Loading NLP Model...")
+nlp_manager = NLPManager()
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+DATA_DIR = os.path.join(BASE_DIR, "../data sets")
+if not os.path.exists(DATA_DIR):
+    os.makedirs(DATA_DIR)
+# Mount Frontend - STATIC FILES MUST BE LAST usually, but FastAPI handles specific routes first
+# We will mount it at the end of the file.
+def fuse_predictions(audio_dist, text_emotion, audio_confidence):
+    """
+    Combines Acoustic (Audio) and Semantic (Text) probabilities.
+    Logic:
+    - If Audio Confidence > 0.85, trust Audio (Tone is strong).
+    - Else, weighted average.
+    """
+    # 1. Normalize Labels
+    # Map NLP labels to Audio labels (Audio is master list)
+    # Audio: neutral, calm, happiness, sadness, anger, fear, disgust, surprise
+    # NLP: neutral, joy, sadness, anger, fear, disgust, surprise
+    LABEL_MAP = {
+        'joy': 'happiness',
+        'sad': 'sadness',
+        'angry': 'anger',
+        'surprised': 'surprise'
+    }
+    # Initialize aggregated scores with audio distribution
+    combined_scores = audio_dist.copy()
+    # Validation: if no text emotion, return audio result
+    if not text_emotion or not text_emotion.get('all_scores'):
+        return {"label": max(audio_dist, key=audio_dist.get), "confidence": audio_confidence, "is_fusion": False}
+    # 2. Add weighted text scores
+    # Default Weights
+    w_audio = 0.6
+    w_text = 0.4
+    # Dynamic Weighting Logic
+    if audio_confidence > 0.80:
+        # Audio is strong -> Trust Audio
+        w_audio = 0.9
+        w_text = 0.1
+    elif audio_confidence < 0.55:
+        # Audio is weak/uncertain -> Trust Text more (if available)
+        w_audio = 0.4
+        w_text = 0.6
+    # Process text scores
+    for item in text_emotion['all_scores']:
+        label = item['label']
+        score = item['score']
+        # Normalize label
+        mapped_label = LABEL_MAP.get(label, label)
+        # If label exists in audio classes, fuse it. Else ignore (or map to neutral)
+        if mapped_label in combined_scores:
+            # Weighted average: (Score_A * Wa) + (Score_T * Wt)
+            # Note: combined_scores[l] currently holds pure Audio Prob
+            # Since we iterate all text labels, we need to be careful not to double count
+            # We'll create a new dict for fusion
+            pass
+    # Better approach: Create new clean dict
+    final_scores = {}
+    all_labels = set(combined_scores.keys())
+    # Convert text scores list to dict
+    text_scores_map = {}
+    for item in text_emotion['all_scores']:
+        l = LABEL_MAP.get(item['label'], item['label'])
+        text_scores_map[l] = item['score']
+    for label in all_labels:
+        s_audio = combined_scores.get(label, 0.0)
+        s_text = text_scores_map.get(label, 0.0)
+        # Special case: 'calm' exists in Audio but not Text.
+        # If Text predicts 'neutral' or 'no strong emotion', it shouldn't penalty 'calm' too much?
+        # For simplicity, just linear blend.
+        fused = (s_audio * w_audio) + (s_text * w_text)
+        final_scores[label] = fused
+    # 3. Find winner
+    winner_label = max(final_scores, key=final_scores.get)
+    winner_score = final_scores[winner_label]
+    # Re-normalize confidence? Not strictly necessary for ranking but good for UI
+    total_score = sum(final_scores.values())
+    normalized_confidence = winner_score / total_score if total_score > 0 else 0.0
+    return {
+        "label": winner_label,
+        "confidence": normalized_confidence,
+        "is_fusion": True,
+        "distribution": final_scores
+    }
+class FeedbackRequest(BaseModel):
+    filename: str # Temporary filename
+    correct_emotion: str
+    original_emotion: str
+@app.get("/api/health")
+def health_check():
+    return {"status": "ok", "message": "SER API is running"}
+@app.post("/predict")
+async def predict_audio(file: UploadFile = File(...)):
+    """
+    Receives an audio file, saves it temporarily, and returns prediction.
+    """
+    # Create temp file
+    temp_filename = f"temp_{int(time.time())}_{file.filename}"
+    temp_path = os.path.join(DATA_DIR, temp_filename)
+    with open(temp_path, "wb") as buffer:
+        shutil.copyfileobj(file.file, buffer)
+    # Always convert to ensuring standard PCM WAV (required for SpeechRecognition)
+    # Browser MediaRecorder might send WebM inside .wav extension
+    processed_wav_path = temp_path + "_processed.wav"
+    if convert_to_wav(temp_path, processed_wav_path):
+        # Conversion successful, use the processed file
+        # We keep temp_path to remove later if needed, but let's just swap them
+        os.remove(temp_path)
+        temp_path = processed_wav_path
+    else:
+        # Fallback: if conversion fails (e.g. missing ffmpeg), try using original
+        # but warn about potential NLP failure
+        print("Warning: Audio conversion failed. NLP might fail if format is not PCM WAV.")
+    try:
+        # 1. Audio Analysis
+        audio_result = classifier.predict_emotion(temp_path)
+        # 2. NLP Analysis
+        nlp_result = nlp_manager.process(temp_path)
+        # 3. Hybrid Fusion
+        try:
+            hybrid_result = fuse_predictions(
+                audio_result["distribution"],
+                nlp_result.get("text_emotion"),
+                audio_result["confidence"]
+            )
+        except Exception as e:
+            print(f"Fusion error: {e}")
+            # Fallback to audio if fusion fails
+            hybrid_result = {
+                "label": audio_result["label"],
+                "confidence": audio_result["confidence"],
+                "is_fusion": False
+            }
+        return {
+            "prediction": hybrid_result["label"], # Main result
+            "confidence": hybrid_result["confidence"],
+            "audio_emotion": audio_result,
+            "nlp_analysis": nlp_result,
+            "hybrid_analysis": hybrid_result,
+            "temp_filename": os.path.basename(temp_path)
+        }
+    except ValueError as e:
+        if "Model not loaded" in str(e):
+            # Cold Start: Allow labeling even if no model
+            return {
+                "prediction": "neutral",
+                "confidence": 0.0,
+                "audio_emotion": {"label": "neutral", "confidence": 0.0},
+                "nlp_analysis": {"transcription": None, "text_emotion": None},
+                "temp_filename": os.path.basename(temp_path),
+                "is_fallback": True
+            }
+        raise HTTPException(status_code=500, detail=str(e))
+    except Exception as e:
+        print(f"Prediction Error: {e}")
+        raise HTTPException(status_code=500, detail="Prediction failed")
+import glob
+@app.post("/feedback")
+async def submit_feedback(feedback: FeedbackRequest, background_tasks: BackgroundTasks):
+    """
+    User corrects the prediction. We rename/move the temp file to be permanently part of the dataset.
+    Auto-retrains every 5000 user uploads.
+    """
+    temp_path = os.path.join(DATA_DIR, feedback.filename)
+    if not os.path.exists(temp_path):
+        raise HTTPException(status_code=404, detail="Original temp file not found")
+    # Create new permanent filename
+    # Format: user_upload_{emotion}_{timestamp}.wav
+    timestamp = int(time.time())
+    new_filename = f"user_upload_{feedback.correct_emotion}_{timestamp}.wav"
+    new_path = os.path.join(DATA_DIR, new_filename)
+    try:
+        shutil.move(temp_path, new_path)
+        # Check for Auto-Retraining Logic
+        # Optimize: glob can be slow if millions of files, but for 5000 it's fine.
+        # Alternatively we could maintain a counter in a file.
+        user_files = glob.glob(os.path.join(DATA_DIR, "user_upload_*.wav"))
+        count = len(user_files)
+        # Threshold: 5000
+        if count > 0 and count % 5000 == 0:
+            print(f"Auto-Retraining Threshold Reached ({count} files). Starting training...")
+            global training_logs
+            training_logs = []
+            background_tasks.add_task(classifier.train_model, DATA_DIR, log_message)
+            return {"status": "success", "saved_as": new_filename, "auto_train": True}
+        return {"status": "success", "saved_as": new_filename}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to save feedback: {e}")
+# Global training logs
+training_logs = []
+def log_message(msg: str):
+    training_logs.append(msg)
+@app.get("/logs")
+def get_logs(after: int = 0):
+    """
+    Returns logs generated during training.
+    """
+    if after < 0:
+        after = 0
+    return {"logs": training_logs[after:], "next_index": len(training_logs)}
+class TrainRequest(BaseModel):
+    password: str
+@app.post("/train")
+async def trigger_training(request: TrainRequest, background_tasks: BackgroundTasks):
+    """
+    Triggers model retraining in the background. Requires Admin Password.
+    """
+    if request.password != "ani24680":
+        raise HTTPException(status_code=401, detail="Unauthorized: Incorrect Admin Password")
+    global training_logs
+    training_logs = [] # Clear logs
+    if not classifier.is_loaded:
+        # If no model, we might want to wait, but allowing async training is fine
+        pass
+    background_tasks.add_task(classifier.train_model, DATA_DIR, log_message)
+    return {"status": "training_started", "message": "Model is training in background"}
+# Mount Static Files (Frontend)
+FRONTEND_DIR = os.path.join(BASE_DIR, "../frontend")
+app.mount("/", StaticFiles(directory=FRONTEND_DIR, html=True), name="static")

backend/model_manager.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import os
+import numpy as np
+import librosa
+import tensorflow as tf
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import LabelEncoder
+from tensorflow.keras.utils import to_categorical
+from tensorflow.keras.models import Sequential, load_model
+from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import EarlyStopping
+import pickle
+import joblib
+from joblib import Parallel, delayed
+# Parameters
+MAX_PAD_LEN = 174
+N_MFCC = 40
+DURATION = 3
+SAMPLE_RATE = 22050
+MODEL_PATH = "model.h5"
+ENCODER_PATH = "encoder.pkl"
+def extract_features_static(file_path, duration=DURATION, sample_rate=SAMPLE_RATE, n_mfcc=N_MFCC, max_pad_len=MAX_PAD_LEN):
+    """
+    Static helper for feature extraction to allow pickling for joblib parallel processing.
+    """
+    try:
+        # Normalize path
+        file_path = os.path.normpath(os.path.abspath(file_path))
+        # Load audio
+        # res_type='kaiser_fast' is faster
+        audio, sr = librosa.load(file_path, res_type='kaiser_fast', duration=duration, sr=sample_rate)
+        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
+        pad_width = max_pad_len - mfccs.shape[1]
+        if pad_width > 0:
+            mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
+        else:
+            mfccs = mfccs[:, :max_pad_len]
+        return mfccs.T
+    except Exception as e:
+        print(f"Error extracting features from {file_path}: {e}")
+        return None
+class EmotionClassifier:
+    def __init__(self):
+        self.model = None
+        self.le = LabelEncoder()
+        self.is_loaded = False
+        self.load_artifacts()
+    def load_artifacts(self):
+        if os.path.exists(MODEL_PATH) and os.path.exists(ENCODER_PATH):
+            try:
+                self.model = load_model(MODEL_PATH)
+                with open(ENCODER_PATH, 'rb') as f:
+                    self.le = pickle.load(f)
+                self.is_loaded = True
+                print("Model and encoder loaded successfully.")
+            except Exception as e:
+                print(f"Failed to load artifacts: {e}")
+        else:
+            print("No pre-trained model found. System ready for training.")
+    def extract_features(self, file_path):
+        """Wrapper for static extraction method."""
+        return extract_features_static(file_path)
+    def train_model(self, data_path, log_callback=None):
+        """Trains the model from scratch using data in data_path."""
+        def log(msg):
+            if log_callback:
+                log_callback(msg)
+            else:
+                print(msg)
+        # Cache paths
+        features_cache_path = os.path.join(data_path, "features_cache.npy")
+        labels_cache_path = os.path.join(data_path, "labels_cache.npy")
+        X = None
+        y = None
+        # Check cache
+        if os.path.exists(features_cache_path) and os.path.exists(labels_cache_path):
+            log("Found cached features. Loading from disk...")
+            try:
+                X = np.load(features_cache_path)
+                y = np.load(labels_cache_path)
+                log(f"Loaded {len(X)} cached samples.")
+            except Exception as e:
+                log(f"Failed to load cache: {e}. Recomputing...")
+                X = None
+                y = None
+        if X is None or y is None:
+            files = []
+            # Walk through directory
+            for root, _, filenames in os.walk(data_path):
+                for f in filenames:
+                    if f.endswith('.wav'):
+                        files.append(os.path.join(root, f))
+            if not files:
+                log("DEBUG: No .wav files found in os.walk")
+                raise ValueError("No .wav files found for training.")
+            from .utils import get_label_from_filename
+            log(f"Processing {len(files)} files for training utilizing parallel processing...")
+            # Helper to process a single file and return (features, label)
+            def process_file(file):
+                lbl = get_label_from_filename(file)
+                if lbl:
+                    feat = extract_features_static(file)
+                    if feat is not None:
+                        return (feat, lbl)
+                return None
+            # Run in parallel
+            # n_jobs=-1 uses all available cores
+            results = Parallel(n_jobs=-1, verbose=5)(delayed(process_file)(f) for f in files)
+            # Filter None results
+            valid_results = [r for r in results if r is not None]
+            if not valid_results:
+                log("CRITICAL: No features extracted successfully!")
+                raise ValueError("No features extracted. Check files and labels.")
+            log(f"Successfully processed {len(valid_results)}/{len(files)} files.")
+            features = [r[0] for r in valid_results]
+            labels = [r[1] for r in valid_results]
+            X = np.array(features, dtype='float32')
+            y = np.array(labels)
+            # Save cache
+            log("Saving features to cache...")
+            np.save(features_cache_path, X)
+            np.save(labels_cache_path, y)
+        # Encode labels
+        y_encoded = to_categorical(self.le.fit_transform(y))
+        # Split
+        X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y) # stratify=y handles class imbalance better
+        # Build Model
+        self.model = Sequential([
+            Conv1D(128, kernel_size=5, padding="same", activation="relu", input_shape=(X_train.shape[1], X_train.shape[2])),
+            MaxPooling1D(pool_size=2),
+            BatchNormalization(),
+            Conv1D(256, kernel_size=5, padding="same", activation="relu"),
+            MaxPooling1D(pool_size=2),
+            BatchNormalization(),
+            Dropout(0.3),
+            Flatten(),
+            Dense(256, activation='relu'),
+            Dropout(0.4),
+            Dense(y_encoded.shape[1], activation='softmax')
+        ])
+        self.model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])
+        early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
+        log("Starting training...")
+        class LogCallback(tf.keras.callbacks.Callback):
+            def on_epoch_end(self, epoch, logs=None):
+                log(f"Epoch {epoch+1}: loss={logs['loss']:.4f}, acc={logs['accuracy']:.4f}, val_loss={logs['val_loss']:.4f}")
+        self.model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stop, LogCallback()], verbose=0)
+        # Save artifacts
+        self.model.save(MODEL_PATH)
+        with open(ENCODER_PATH, 'wb') as f:
+            pickle.dump(self.le, f)
+        self.is_loaded = True
+        log("Training complete and model saved.")
+        return {"accuracy": self.model.evaluate(X_test, y_test)[1]}
+    def predict_emotion(self, file_path):
+        if not self.is_loaded:
+            raise ValueError("Model not loaded. Train the model first.")
+        mfcc = self.extract_features(file_path)
+        if mfcc is None:
+            raise ValueError("Could not extract features.")
+        mfcc = mfcc[np.newaxis, :, :] # Add batch dimension
+        prediction = self.model.predict(mfcc, verbose=0)
+        # Get all probabilities
+        probs = prediction[0]
+        classes = self.le.classes_
+        # Create distribution dict {label: score}
+        distribution = {label: float(score) for label, score in zip(classes, probs)}
+        predicted_index = np.argmax(prediction)
+        predicted_label = self.le.inverse_transform([predicted_index])[0]
+        confidence = float(prediction[0][predicted_index])
+        return {
+            "label": predicted_label,
+            "confidence": confidence,
+            "distribution": distribution
+        }

backend/nlp_manager.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import speech_recognition as sr
+import torch
+from transformers import pipeline
+import os
+class NLPManager:
+    def __init__(self):
+        print("Initializing NLP Manager...")
+        self.recognizer = sr.Recognizer()
+        # Load emotion classification pipeline
+        # Using a model fine-tuned for emotion detection
+        # Falls back to default cache if downloaded
+        print("Loading Sentiment Analysis Model (DistilRoBERTa)...")
+        self.classifier = pipeline(
+            "text-classification",
+            model="j-hartmann/emotion-english-distilroberta-base",
+            return_all_scores=True
+        )
+        print("NLP Manager Ready.")
+    def transcribe(self, audio_path):
+        """
+        Converts audio file to text using Google Web Speech API.
+        """
+        try:
+            with sr.AudioFile(audio_path) as source:
+                audio_data = self.recognizer.record(source)
+                # recognize_google is free and works well for short clips
+                text = self.recognizer.recognize_google(audio_data)
+                return text
+        except sr.UnknownValueError:
+            return None # Speech is unintelligible
+        except sr.RequestError as e:
+            print(f"Could not request results from Google Speech Recognition service; {e}")
+            return None
+        except Exception as e:
+            print(f"Transcription error: {e}")
+            return None
+    def analyze_sentiment(self, text):
+        """
+        Analyzes the emotion of the text.
+        Returns the dominant emotion and confidence.
+        """
+        if not text:
+            return None
+        # Predict
+        results = self.classifier(text)[0]
+        # results is list of dicts: [{'label': 'joy', 'score': 0.9}, ...]
+        # Sort by score descending
+        results.sort(key=lambda x: x['score'], reverse=True)
+        top_result = results[0]
+        return {
+            "label": top_result['label'],
+            "score": top_result['score'],
+            "all_scores": results
+        }
+    def process(self, audio_path):
+        """
+        Full pipeline: Audio -> Text -> Emotion
+        """
+        transcription = self.transcribe(audio_path)
+        if not transcription:
+            return {
+                "transcription": None,
+                "text_emotion": None
+            }
+        emotion_analysis = self.analyze_sentiment(transcription)
+        return {
+            "transcription": transcription,
+            "text_emotion": emotion_analysis
+        }

backend/reproduce_error.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import os
+import librosa
+import traceback
+import soundfile as sf
+# Path to the specific file
+file_path = r"c:\Users\aniru\OneDrive\Desktop\EDUVN\data sets\Actor_01\03-01-01-01-01-01-01.wav"
+print(f"Testing loading: {file_path}")
+print(f"Does file exist? {os.path.exists(file_path)}")
+try:
+    # Mimic parameters from model_manager.py
+    DURATION = 3
+    SAMPLE_RATE = 22050
+    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast', duration=DURATION, sr=SAMPLE_RATE)
+    print("Success! Audio loaded.")
+    print(f"Shape: {audio.shape}, Sample Rate: {sample_rate}")
+except Exception as e:
+    print("FAILED to load audio.")
+    print(f"Error: {e}")
+    traceback.print_exc()
+print("-" * 20)
+print("Testing soundfile directly...")
+try:
+    data, samplerate = sf.read(file_path)
+    print(f"Soundfile read success. Shape: {data.shape}, Rate: {samplerate}")
+except Exception as e:
+    print(f"Soundfile direct read failed: {e}")

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+fastapi
+uvicorn
+python-multipart
+tensorflow
+librosa
+pydub
+numpy
+scikit-learn
+soundfile
+joblib
+resampy
+SpeechRecognition
+transformers
+tf-keras
+torch

backend/test_caching.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+import sys
+import shutil
+import numpy as np
+import librosa
+import soundfile as sf
+# Setup path
+try:
+    from .model_manager import EmotionClassifier
+except ImportError:
+    sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+    from backend.model_manager import EmotionClassifier
+def create_dummy_wav(path):
+    sr = 22050
+    t = np.linspace(0, 1, sr)
+    audio = np.sin(2*np.pi*440*t)
+    sf.write(path, audio, sr)
+def test_caching():
+    base_dir = os.path.dirname(os.path.abspath(__file__))
+    test_data_dir = os.path.join(base_dir, "test_data_cache")
+    if os.path.exists(test_data_dir):
+        shutil.rmtree(test_data_dir)
+    os.makedirs(test_data_dir)
+    # Create dummy files
+    print("Creating dummy files...")
+    for i in range(10):
+        # Filename must imply label: 03-01-01...
+        # label is 3rd part: 01=neutral
+        name = f"03-01-01-01-01-{i:02d}-01.wav"
+        create_dummy_wav(os.path.join(test_data_dir, name))
+    classifier = EmotionClassifier()
+    logs = []
+    def log_callback(msg):
+        logs.append(msg)
+        print(f"[TEST] {msg}")
+    print("\n--- RUN 1: Extraction ---")
+    classifier.train_model(test_data_dir, log_callback=log_callback)
+    # Verify cache created
+    if os.path.exists(os.path.join(test_data_dir, "features_cache.npy")):
+        print("PASS: Cache file created.")
+    else:
+        print("FAIL: Cache file NOT created.")
+    print("\n--- RUN 2: Caching ---")
+    logs.clear()
+    classifier.train_model(test_data_dir, log_callback=log_callback)
+    # Verify log says "Found cached features"
+    if any("Found cached features" in log for log in logs):
+        print("PASS: Loaded from cache.")
+    else:
+        print("FAIL: Did NOT load from cache.")
+    # Cleanup
+    shutil.rmtree(test_data_dir)
+if __name__ == "__main__":
+    test_caching()

backend/test_nlp.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import os
+import sys
+# Setup path
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+try:
+    from nlp_manager import NLPManager
+except ImportError:
+    sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+    from backend.nlp_manager import NLPManager
+def test_nlp():
+    print("Testing NLP Manager Integration...")
+    try:
+        nlp = NLPManager()
+        print("PASS: Manager Initialized")
+    except Exception as e:
+        print(f"FAIL: Initialization Error: {e}")
+        return
+    # Test Sentiment Analysis
+    test_text = "I am so happy that this works!"
+    print(f"Analyzing text: '{test_text}'")
+    result = nlp.analyze_sentiment(test_text)
+    if result and result['label'] in ['joy', 'happiness', 'happy']:
+        print(f"PASS: Correctly identified emotion: {result['label']} (Score: {result['score']:.2f})")
+    else:
+        print(f"WARN: Analysis result: {result}")
+    # Test Transcribe (Mocking audio not easy without file, just ensuring method exists)
+    if hasattr(nlp, 'transcribe'):
+        print("PASS: Transcribe method exists.")
+if __name__ == "__main__":
+    test_nlp()

backend/test_prediction.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+import sys
+import numpy as np
+import soundfile as sf
+# Setup path to import backend modules
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+try:
+    from backend.model_manager import EmotionClassifier
+except ImportError:
+    # Fallback if running from backend dir directly
+    from model_manager import EmotionClassifier
+def create_dummy_wav(path):
+    sr = 22050
+    t = np.linspace(0, 1, sr)
+    audio = np.sin(2*np.pi*440*t)
+    sf.write(path, audio, sr)
+def test_prediction():
+    print("Initializing Classifier...")
+    try:
+        classifier = EmotionClassifier()
+    except Exception as e:
+        print(f"FAILED to initialize classifier: {e}")
+        return
+    # Create a dummy file
+    test_file = "test_audio_prediction.wav"
+    create_dummy_wav(test_file)
+    print(f"Created dummy file: {test_file}")
+    print("Attempting prediction...")
+    try:
+        result = classifier.predict_emotion(test_file)
+        print("Prediction Success!")
+        print(f"Result: {result}")
+    except Exception as e:
+        print("Prediction FAILED.")
+        print(f"Error: {e}")
+        import traceback
+        traceback.print_exc()
+    finally:
+        if os.path.exists(test_file):
+            os.remove(test_file)
+if __name__ == "__main__":
+    test_prediction()

backend/test_training.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+import sys
+# When running as a module 'backend.test_training', we can use relative imports
+try:
+    from .model_manager import EmotionClassifier
+except ImportError:
+    # Fallback if run as script, but this will break relative imports in model_manager
+    # So we must fix path to import 'backend.model_manager'
+    sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+    from backend.model_manager import EmotionClassifier
+def test_training():
+    print("Initializing Classifier...")
+    classifier = EmotionClassifier()
+    # Correct data path assuming we are in backend/
+    base_dir = os.path.dirname(os.path.abspath(__file__))
+    data_dir = os.path.join(base_dir, "../data sets")
+    print(f"Data directory: {data_dir}")
+    def log_callback(msg):
+        print(f"[TRAIN] {msg}")
+    print("Starting training test...")
+    try:
+        classifier.train_model(data_dir, log_callback=log_callback)
+        print("Training test passed!")
+    except Exception as e:
+        print(f"Training test failed: {e}")
+        import traceback
+        traceback.print_exc()
+if __name__ == "__main__":
+    test_training()

backend/utils.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+import shutil
+from pydub import AudioSegment
+RAVDESS_MAP = {
+    '01': 'neutral', '02': 'calm', '03': 'happiness', '04': 'sadness',
+    '05': 'anger', '06': 'fear', '07': 'disgust', '08': 'surprise'
+}
+KNOWN_EMOTIONS = ['anger', 'neutral', 'sadness', 'happiness', 'fear', 'disgust', 'surprise', 'calm']
+# Handling some common synonyms/variations
+EMOTION_ALIASES = {
+    'happy': 'happiness',
+    'sad': 'sadness',
+    'angry': 'anger',
+    'surprised': 'surprise'
+}
+def convert_to_wav(source_path, target_path):
+    """Converts audio file to WAV format."""
+    try:
+        audio = AudioSegment.from_file(source_path)
+        audio.export(target_path, format="wav")
+        return True
+    except Exception as e:
+        print(f"Error converting {source_path}: {e}")
+        return False
+def get_label_from_filename(filename):
+    """Extracts emotion label from filename based on patterns."""
+    filename = os.path.basename(filename).lower()
+    # Rule 1: RAVDESS dataset (e.g., 03-01-01-01-01-01-01.wav)
+    if filename.count('-') == 6 and filename.startswith('03'):
+        parts = filename.replace('.wav', '').split('-')
+        if len(parts) > 2:
+            return RAVDESS_MAP.get(parts[2])
+    # Rule 2: General emotion words
+    # Check for exact matches or surrounded by underscores/dots
+    for emotion in KNOWN_EMOTIONS + list(EMOTION_ALIASES.keys()):
+        # Simple containment check might be too loose (e.g., "fearless"),
+        # but matches the original script's logic roughly.
+        # Improving it with delimiters for safety.
+        patterns = [
+            f"_{emotion}_",
+            f"{emotion}_",
+            f"_{emotion}.",
+            f"upload_{emotion}_"
+        ]
+        # Also check if it STARTS with the emotion (common in some datasets)
+        if filename.startswith(f"{emotion}_") or any(p in filename for p in patterns):
+            normalized = EMOTION_ALIASES.get(emotion, emotion)
+            return normalized
+    return None

encoder.pkl ADDED Viewed

Binary file (527 Bytes). View file

frontend/favicon.ico ADDED Viewed

frontend/index.html ADDED Viewed

	@@ -0,0 +1,131 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>VocalVibe - Emotion Recognition</title>
+    <link rel="preconnect" href="https://fonts.googleapis.com">
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+    <link href="https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;600;700&display=swap" rel="stylesheet">
+    <link rel="stylesheet" href="style.css">
+    <!-- FontAwesome for Icons -->
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
+</head>
+<body>
+    <div class="background-blobs">
+        <div class="blob blob-1"></div>
+        <div class="blob blob-2"></div>
+        <div class="blob blob-3"></div>
+    </div>
+    <!-- Toast Container -->
+    <div id="toast-container"></div>
+    <main class="glass-container">
+        <header>
+            <h1>Vocal<span class="highlight">Vibe</span></h1>
+            <p>AI-Powered Speech Emotion Recognition</p>
+            <button id="trainBtn" class="btn-secondary" style="font-size: 0.8rem; padding: 0.3rem 0.8rem;"><i
+                    class="fa-solid fa-brain"></i> Train Model</button>
+        </header>
+        <section class="controls">
+            <!-- Recording Section -->
+            <div class="record-area">
+                <button id="micBtn" class="mic-button" title="Hold to Record">
+                    <i class="fa-solid fa-microphone"></i>
+                </button>
+                <p id="statusText">Click & Hold to Record</p>
+                <div id="visualizer" class="visualizer hidden">
+                    <div class="bar"></div>
+                    <div class="bar"></div>
+                    <div class="bar"></div>
+                    <div class="bar"></div>
+                    <div class="bar"></div>
+                </div>
+            </div>
+            <div class="divider">
+                <span>OR</span>
+            </div>
+            <!-- Upload Section -->
+            <div class="upload-area" id="dropZone">
+                <i class="fa-solid fa-cloud-arrow-up"></i>
+                <p>Drag & Drop Audio File</p>
+                <input type="file" id="fileInput" accept="audio/*" hidden>
+                <button class="btn-secondary" onclick="document.getElementById('fileInput').click()">Browse
+                    Files</button>
+            </div>
+        </section>
+    </main>
+    <!-- Password Modal -->
+    <div id="passwordModal" class="modal hidden">
+        <div class="modal-content glass-card" style="max-width: 400px; text-align: center;">
+            <button class="close-btn" id="closePasswordModal">&times;</button>
+            <h2>Admin Access</h2>
+            <p style="margin-bottom: 15px; color: #cbd5e1;">Enter password to start training</p>
+            <input type="password" id="adminPasswordInput" class="password-input" placeholder="Password">
+            <button id="submitPasswordBtn" class="btn-primary"
+                style="margin-top: 15px; width: 100%;">Authenticate</button>
+        </div>
+    </div>
+    <!-- Training Terminal Modal -->
+    <div id="trainingModal" class="modal hidden">
+        <div class="modal-content terminal-card">
+            <div class="terminal-header">
+                <span class="terminal-title"><i class="fa-solid fa-terminal"></i> Model Training</span>
+                <button class="close-btn" id="closeTrainingModal">&times;</button>
+            </div>
+            <div class="terminal-body" id="trainingLog">
+                <span class="log-line">Waiting for command...</span>
+            </div>
+        </div>
+    </div>
+    <!-- Result Modal -->
+    <div id="resultModal" class="modal hidden">
+        <div class="modal-content glass-card">
+            <button class="close-btn" id="closeModal">&times;</button>
+            <div class="emoji-display" id="resultEmoji">🤔</div>
+            <h2 id="resultLabel">Analyzing...</h2>
+            <p id="resultConfidence">Confidence: --%</p>
+            <div class="feedback-section">
+                <p>Was this correct?</p>
+                <div class="feedback-buttons">
+                    <button class="btn-feedback correct" id="btnCorrect"><i class="fa-solid fa-check"></i> Yes</button>
+                    <button class="btn-feedback incorrect" id="btnIncorrect"><i class="fa-solid fa-xmark"></i>
+                        No</button>
+                </div>
+                <!-- Correction Dropdown (Hidden initially) -->
+                <div id="correctionArea" class="hidden">
+                    <select id="emotionSelect">
+                        <option value="" disabled selected>Select actual emotion</option>
+                        <option value="neutral">Neutral</option>
+                        <option value="calm">Calm</option>
+                        <option value="happiness">Happiness</option>
+                        <option value="sadness">Sadness</option>
+                        <option value="anger">Anger</option>
+                        <option value="fear">Fear</option>
+                        <option value="disgust">Disgust</option>
+                        <option value="surprise">Surprise</option>
+                    </select>
+                    <button id="submitCorrection" class="btn-primary">Submit Correction</button>
+                </div>
+            </div>
+        </div>
+    </div>
+    <footer class="watermark">
+        <p>Made by : Aniruddha Paul</p>
+    </footer>
+    <script src="script.js"></script>
+</body>
+</html>

frontend/script.js ADDED Viewed

	@@ -0,0 +1,400 @@

+const micBtn = document.getElementById('micBtn');
+const statusText = document.getElementById('statusText');
+const visualizer = document.getElementById('visualizer');
+const dropZone = document.getElementById('dropZone');
+const fileInput = document.getElementById('fileInput');
+// Result Modal Elements
+const resultModal = document.getElementById('resultModal');
+const closeModal = document.getElementById('closeModal');
+const resultEmoji = document.getElementById('resultEmoji');
+const resultLabel = document.getElementById('resultLabel');
+const resultConfidence = document.getElementById('resultConfidence');
+const btnCorrect = document.getElementById('btnCorrect');
+const btnIncorrect = document.getElementById('btnIncorrect');
+const correctionArea = document.getElementById('correctionArea');
+const submitCorrection = document.getElementById('submitCorrection');
+let mediaRecorder;
+let audioChunks = [];
+let currentTempFilename = null;
+let currentPrediction = null;
+// Emotion to Emoji Map
+const emotionEmojis = {
+    'neutral': '😐',
+    'calm': '😌',
+    'happiness': '😄',
+    'happy': '😄',
+    'sadness': '😢',
+    'sad': '😢',
+    'anger': '😠',
+    'angry': '😠',
+    'fear': '😱',
+    'disgust': '🤢',
+    'surprise': '😲'
+};
+// --- Recording Logic ---
+micBtn.addEventListener('mousedown', startRecording);
+micBtn.addEventListener('mouseup', stopRecording);
+micBtn.addEventListener('mouseleave', () => {
+    if (mediaRecorder && mediaRecorder.state === 'recording') {
+        stopRecording();
+    }
+});
+// Training Logic
+const trainingModal = document.getElementById('trainingModal');
+const trainingLog = document.getElementById('trainingLog');
+const closeTrainingModal = document.getElementById('closeTrainingModal');
+closeTrainingModal.addEventListener('click', () => {
+    trainingModal.classList.add('hidden');
+});
+// --- Training & Password Logic ---
+const passwordModal = document.getElementById('passwordModal');
+const closePasswordModal = document.getElementById('closePasswordModal');
+const submitPasswordBtn = document.getElementById('submitPasswordBtn');
+const adminPasswordInput = document.getElementById('adminPasswordInput');
+// Open Password Modal
+document.getElementById('trainBtn').addEventListener('click', () => {
+    passwordModal.classList.remove('hidden');
+    adminPasswordInput.value = '';
+    adminPasswordInput.focus();
+});
+// Close Password Modal
+closePasswordModal.addEventListener('click', () => {
+    passwordModal.classList.add('hidden');
+});
+// Handle Password Submission
+// Handle Password Submission
+function submitPassword() {
+    const password = adminPasswordInput.value;
+    if (!password) {
+        showToast("Please enter a password", "error");
+        return;
+    }
+    passwordModal.classList.add('hidden');
+    startTraining(password);
+}
+submitPasswordBtn.addEventListener('click', submitPassword);
+// Allow Enter key to submit password and Esc to close modals
+document.addEventListener('keydown', (e) => {
+    // Enter Key in Password Input
+    if (e.key === 'Enter' && document.activeElement === adminPasswordInput) {
+        submitPassword();
+    }
+    // Escape Key Global
+    if (e.key === 'Escape') {
+        passwordModal.classList.add('hidden');
+        resultModal.classList.add('hidden');
+        trainingModal.classList.add('hidden');
+    }
+});
+async function startTraining(password) {
+    // Open Terminal
+    trainingModal.classList.remove('hidden');
+    trainingLog.innerHTML = '<span class="log-line">Authenticating...</span>';
+    try {
+        const response = await fetch('/train', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify({ password: password })
+        });
+        if (response.status === 401) {
+            trainingLog.innerHTML += '<span class="log-line" style="color:red">Error: Unauthorized. Incorrect Password.</span>';
+            showToast("Incorrect Admin Password", "error");
+            return;
+        }
+        const data = await response.json();
+        if (data.status === 'training_started') {
+            trainingLog.innerHTML += '<span class="log-line">Access Granted. Starting training sequence...</span>';
+            pollLogs();
+        }
+    } catch (e) {
+        showToast("Failed to start training.", "error");
+        trainingLog.innerHTML += `<span class="log-line" style="color:red">Error: ${e.message}</span>`;
+    }
+}
+async function pollLogs(startIndex = 0) {
+    try {
+        const response = await fetch(`/logs?after=${startIndex}`);
+        const data = await response.json();
+        if (data.logs && data.logs.length > 0) {
+            data.logs.forEach(log => {
+                const line = document.createElement('span');
+                line.className = 'log-line';
+                line.innerText = log;
+                if (log.includes("CRITICAL") || log.includes("Error")) line.style.color = '#ff5555';
+                if (log.includes("Success") || log.includes("complete")) line.style.color = '#55ff55';
+                trainingLog.appendChild(line);
+            });
+            // Auto scroll
+            trainingLog.scrollTop = trainingLog.scrollHeight;
+        }
+        // Continue polling if not complete (simple check: if logs stop or specific message?)
+        // Better: The backend just keeps logs coming. We'll poll until we see "Training complete"
+        const lastLog = data.logs.length > 0 ? data.logs[data.logs.length - 1] : "";
+        if (lastLog.includes("Training complete")) {
+            trainingLog.innerHTML += '<span class="log-line">>> Process finished. You may close this window.</span>';
+            return;
+        }
+        setTimeout(() => pollLogs(data.next_index), 500); // Poll every 500ms
+    } catch (e) {
+        console.error("Polling error", e);
+        setTimeout(() => pollLogs(startIndex), 2000); // Retry slower on error
+    }
+}
+// Touch support for mobile
+micBtn.addEventListener('touchstart', (e) => { e.preventDefault(); startRecording(); });
+micBtn.addEventListener('touchend', (e) => { e.preventDefault(); stopRecording(); });
+function startRecording() {
+    statusText.innerText = "Recording...";
+    micBtn.classList.add('recording');
+    visualizer.classList.remove('hidden');
+    audioChunks = [];
+    navigator.mediaDevices.getUserMedia({ audio: true })
+        .then(stream => {
+            mediaRecorder = new MediaRecorder(stream);
+            mediaRecorder.start();
+            mediaRecorder.addEventListener("dataavailable", event => {
+                audioChunks.push(event.data);
+            });
+            mediaRecorder.addEventListener("stop", () => {
+                const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); // Default typically webm, assumes backend handles it or we send as file
+                // Usually comes as webm/ogg from browser. We'll verify mimetype.
+                uploadAudio(audioBlob, "recording.wav"); // Naming it .wav but content might be webm, backend pydub handles it.
+            });
+        })
+        .catch(err => {
+            console.error("Error accessing mic:", err);
+            statusText.innerText = "Error Accessing Mic";
+        });
+}
+function stopRecording() {
+    if (mediaRecorder && mediaRecorder.state !== 'inactive') {
+        mediaRecorder.stop();
+        statusText.innerText = "Processing...";
+        micBtn.classList.remove('recording');
+        visualizer.classList.add('hidden');
+    }
+}
+// --- File Upload Logic ---
+fileInput.addEventListener('change', (e) => {
+    if (e.target.files.length > 0) {
+        handleFile(e.target.files[0]);
+    }
+});
+dropZone.addEventListener('dragover', (e) => {
+    e.preventDefault();
+    dropZone.classList.add('dragover');
+});
+dropZone.addEventListener('dragleave', () => dropZone.classList.remove('dragover'));
+dropZone.addEventListener('drop', (e) => {
+    e.preventDefault();
+    dropZone.classList.remove('dragover');
+    if (e.dataTransfer.files.length > 0) {
+        handleFile(e.dataTransfer.files[0]);
+    }
+});
+function handleFile(file) {
+    statusText.innerText = `Uploading ${file.name}...`;
+    uploadAudio(file, file.name);
+}
+// --- Toast Notifications ---
+function showToast(message, type = 'info') {
+    const container = document.getElementById('toast-container');
+    const toast = document.createElement('div');
+    toast.className = `toast ${type}`;
+    // Icon selection
+    let icon = 'fa-info-circle';
+    if (type === 'success') icon = 'fa-check-circle';
+    if (type === 'error') icon = 'fa-exclamation-circle';
+    toast.innerHTML = `
+        <i class="fa-solid ${icon}"></i>
+        <span>${message}</span>
+    `;
+    container.appendChild(toast);
+    // Auto remove
+    setTimeout(() => {
+        toast.classList.add('hide');
+        toast.addEventListener('animationend', () => toast.remove());
+    }, 3000);
+}
+// --- API Calls ---
+async function uploadAudio(fileOrBlob, filename) {
+    const formData = new FormData();
+    formData.append("file", fileOrBlob, filename); // Append file
+    try {
+        const response = await fetch('/predict', {
+            method: 'POST',
+            body: formData
+        });
+        if (!response.ok) {
+            const errData = await response.json();
+            throw new Error(errData.detail || "Prediction failed");
+        }
+        const data = await response.json();
+        showResult(data);
+        statusText.innerText = "Click & Hold to Record";
+        showToast("Analysis Complete", "success");
+    } catch (error) {
+        console.error(error);
+        statusText.innerText = "Error: " + error.message;
+        showToast("Error: " + error.message, "error");
+    }
+}
+function showResult(data) {
+    currentTempFilename = data.temp_filename;
+    currentPrediction = data.prediction;
+    resultEmoji.innerText = emotionEmojis[data.prediction.toLowerCase()] || '❓';
+    resultLabel.innerText = data.prediction.charAt(0).toUpperCase() + data.prediction.slice(1);
+    resultConfidence.innerText = `Confidence: ${(data.confidence * 100).toFixed(1)}%`;
+    // Reset feedback UI
+    correctionArea.classList.add('hidden');
+    resultModal.classList.remove('hidden');
+    if (data.is_fallback) {
+        showToast("Model not trained. Please label this audio to build the dataset.", "info");
+        correctionArea.classList.remove('hidden');
+        resultLabel.innerText = "Label Required";
+        resultEmoji.innerText = "🏷️";
+        resultConfidence.innerText = "Help the AI learn!";
+    }
+    // --- NLP Analysis Display ---
+    let nlpDiv = document.getElementById('nlp-results');
+    if (!nlpDiv) {
+        nlpDiv = document.createElement('div');
+        nlpDiv.id = 'nlp-results';
+        nlpDiv.className = 'nlp-container';
+        // Insert before feedback section
+        const feedbackSection = resultModal.querySelector('.feedback-section');
+        resultModal.querySelector('.modal-content').insertBefore(nlpDiv, feedbackSection);
+    }
+    // Clear previous
+    nlpDiv.innerHTML = '';
+    if (data.nlp_analysis && data.nlp_analysis.transcription) {
+        const textEmotion = data.nlp_analysis.text_emotion;
+        const confidencePct = (textEmotion.score * 100).toFixed(1);
+        // Show Hybrid Breakdown
+        nlpDiv.innerHTML = `
+            <div class="divider">Hybrid Analysis</div>
+            <p class="transcription">"${data.nlp_analysis.transcription}"</p>
+            <div class="breakdown-grid" style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; margin-top: 10px; font-size: 0.9rem;">
+                <div class="breakdown-item">
+                    <div style="color: #94a3b8;">Audio Tone</div>
+                    <div class="highlight">${data.audio_emotion.label}</div>
+                    <div class="confidence-small">${(data.audio_emotion.confidence * 100).toFixed(1)}%</div>
+                </div>
+                <div class="breakdown-item">
+                     <div style="color: #94a3b8;">Text Context</div>
+                    <div class="highlight">${textEmotion.label}</div>
+                    <div class="confidence-small">${confidencePct}%</div>
+                </div>
+            </div>
+            <div style="margin-top: 10px; font-size: 0.8rem; color: #64748b;">
+                Result fused from acoustic and semantic models.
+            </div>
+        `;
+    } else {
+        nlpDiv.innerHTML = `
+            <div class="divider">Context Analysis</div>
+            <p style="color: #64748b; font-style: italic;">No speech detected or analysis unavailable.</p>
+        `;
+    }
+}
+// --- Modal & Feedback ---
+closeModal.addEventListener('click', () => resultModal.classList.add('hidden'));
+window.onclick = (event) => {
+    if (event.target == resultModal) resultModal.classList.add('hidden');
+};
+btnCorrect.addEventListener('click', () => {
+    submitFeedback(currentPrediction);
+});
+btnIncorrect.addEventListener('click', () => {
+    correctionArea.classList.remove('hidden');
+});
+submitCorrection.addEventListener('click', () => {
+    const selected = document.getElementById('emotionSelect').value;
+    if (selected) {
+        submitFeedback(selected);
+    }
+});
+async function submitFeedback(correctLabel) {
+    try {
+        const response = await fetch('/feedback', {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({
+                filename: currentTempFilename,
+                original_emotion: currentPrediction,
+                correct_emotion: correctLabel
+            })
+        });
+        const res = await response.json();
+        if (res.status === 'success') {
+            showToast("Feedback saved successfully!", "success");
+            resultModal.classList.add('hidden');
+        }
+    } catch (e) {
+        showToast("Failed to save feedback.", "error");
+    }
+}

frontend/style.css ADDED Viewed

	@@ -0,0 +1,649 @@

+:root {
+    --primary: #6366f1;
+    --secondary: #a855f7;
+    --accent: #ec4899;
+    --bg-dark: #0f172a;
+    --text-light: #f8fafc;
+    --glass-bg: rgba(255, 255, 255, 0.05);
+    --glass-border: rgba(255, 255, 255, 0.1);
+    --glass-highlight: rgba(255, 255, 255, 0.15);
+    --toast-success: #22c55e;
+    --toast-error: #ef4444;
+    --toast-info: #3b82f6;
+}
+* {
+    margin: 0;
+    padding: 0;
+    box-sizing: border-box;
+    font-family: 'Outfit', sans-serif;
+}
+body {
+    background-color: var(--bg-dark);
+    color: var(--text-light);
+    height: 100vh;
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    overflow: hidden;
+    position: relative;
+}
+/* Dynamic Background Blobs */
+.background-blobs {
+    position: absolute;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    z-index: -1;
+    filter: blur(80px);
+}
+.blob {
+    position: absolute;
+    border-radius: 50%;
+    opacity: 0.6;
+    animation: float 10s infinite ease-in-out;
+}
+.blob-1 {
+    width: 400px;
+    height: 400px;
+    background: var(--primary);
+    top: -50px;
+    left: -50px;
+}
+.blob-2 {
+    width: 300px;
+    height: 300px;
+    background: var(--secondary);
+    bottom: 50px;
+    right: -50px;
+    animation-delay: 2s;
+}
+.blob-3 {
+    width: 200px;
+    height: 200px;
+    background: var(--accent);
+    top: 40%;
+    left: 40%;
+    animation-delay: 4s;
+}
+@keyframes float {
+    0%,
+    100% {
+        transform: translate(0, 0);
+    }
+    50% {
+        transform: translate(20px, -20px);
+    }
+}
+/* Glass Container */
+.glass-container {
+    background: var(--glass-bg);
+    backdrop-filter: blur(16px);
+    -webkit-backdrop-filter: blur(16px);
+    border: 1px solid var(--glass-border);
+    border-radius: 24px;
+    padding: 3rem;
+    width: 90%;
+    max-width: 500px;
+    text-align: center;
+    box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37);
+}
+.glass-card {
+    background: var(--glass-bg);
+    backdrop-filter: blur(16px);
+    -webkit-backdrop-filter: blur(16px);
+    border: 1px solid var(--glass-border);
+    box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37);
+}
+header h1 {
+    font-size: 3rem;
+    margin-bottom: 0.5rem;
+}
+.highlight {
+    background: linear-gradient(45deg, var(--primary), var(--accent));
+    background-clip: text;
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+}
+header p {
+    color: #cbd5e1;
+    margin-bottom: 2rem;
+}
+/* Mic Button */
+.mic-button {
+    width: 100px;
+    height: 100px;
+    border-radius: 50%;
+    border: none;
+    background: linear-gradient(135deg, var(--primary), var(--secondary));
+    color: white;
+    font-size: 2.5rem;
+    cursor: pointer;
+    box-shadow: 0 0 20px rgba(99, 102, 241, 0.5);
+    transition: transform 0.2s, box-shadow 0.2s;
+    outline: none;
+}
+.mic-button:hover {
+    transform: scale(1.05);
+    box-shadow: 0 0 30px rgba(99, 102, 241, 0.7);
+}
+.mic-button:active,
+.mic-button.recording {
+    transform: scale(0.95);
+    box-shadow: 0 0 10px rgba(99, 102, 241, 0.8);
+    background: var(--accent);
+}
+.mic-button.recording {
+    animation: pulse 1.5s infinite;
+}
+@keyframes pulse {
+    0% {
+        box-shadow: 0 0 0 0 rgba(236, 72, 153, 0.7);
+    }
+    70% {
+        box-shadow: 0 0 0 20px rgba(236, 72, 153, 0);
+    }
+    100% {
+        box-shadow: 0 0 0 0 rgba(236, 72, 153, 0);
+    }
+}
+/* Visualizer */
+.visualizer {
+    display: flex;
+    justify-content: center;
+    gap: 4px;
+    height: 30px;
+    align-items: center;
+    margin-top: 10px;
+}
+.visualizer.hidden {
+    visibility: hidden;
+}
+.bar {
+    width: 4px;
+    height: 10px;
+    background: white;
+    border-radius: 2px;
+    animation: equalise 0.5s infinite;
+    animation-play-state: paused;
+}
+.recording~.visualizer .bar {
+    animation-play-state: running;
+}
+@keyframes equalise {
+    0%,
+    100% {
+        height: 10px;
+    }
+    50% {
+        height: 25px;
+    }
+}
+.bar:nth-child(1) {
+    animation-delay: 0.1s;
+}
+.bar:nth-child(2) {
+    animation-delay: 0.2s;
+}
+.bar:nth-child(3) {
+    animation-delay: 0.3s;
+}
+.bar:nth-child(4) {
+    animation-delay: 0.2s;
+}
+.bar:nth-child(5) {
+    animation-delay: 0.1s;
+}
+/* Upload Area */
+.divider {
+    margin: 2rem 0;
+    position: relative;
+    color: #64748b;
+}
+.divider::before,
+.divider::after {
+    content: '';
+    position: absolute;
+    top: 50%;
+    width: 40%;
+    height: 1px;
+    background: #475569;
+}
+.divider::before {
+    left: 0;
+}
+.divider::after {
+    right: 0;
+}
+.upload-area {
+    border: 2px dashed #475569;
+    border-radius: 12px;
+    padding: 1.5rem;
+    transition: all 0.3s;
+    cursor: pointer;
+}
+.upload-area:hover,
+.upload-area.dragover {
+    border-color: var(--secondary);
+    background: rgba(168, 85, 247, 0.1);
+}
+.upload-area i {
+    font-size: 2rem;
+    color: #94a3b8;
+    margin-bottom: 0.5rem;
+}
+.btn-secondary {
+    margin-top: 1rem;
+    padding: 0.5rem 1.5rem;
+    border-radius: 8px;
+    border: 1px solid var(--primary);
+    background: transparent;
+    color: var(--primary);
+    cursor: pointer;
+    transition: 0.2s;
+}
+.btn-secondary:hover {
+    background: rgba(99, 102, 241, 0.2);
+}
+/* Modal */
+.modal {
+    position: fixed;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    background: rgba(0, 0, 0, 0.7);
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    z-index: 100;
+    opacity: 1;
+    transition: opacity 0.3s;
+}
+.modal.hidden {
+    opacity: 0;
+    pointer-events: none;
+}
+.modal-content {
+    background: #1e293b;
+    padding: 2rem;
+    border-radius: 20px;
+    text-align: center;
+    width: 90%;
+    max-width: 400px;
+    position: relative;
+    border: 1px solid var(--glass-border);
+}
+.close-btn {
+    position: absolute;
+    top: 10px;
+    right: 15px;
+    background: none;
+    border: none;
+    color: white;
+    font-size: 1.5rem;
+    cursor: pointer;
+}
+.emoji-display {
+    font-size: 5rem;
+    margin-bottom: 1rem;
+    animation: pop 0.5s cubic-bezier(0.175, 0.885, 0.32, 1.275);
+}
+@keyframes pop {
+    0% {
+        transform: scale(0);
+    }
+    100% {
+        transform: scale(1);
+    }
+}
+.feedback-buttons {
+    display: flex;
+    justify-content: center;
+    gap: 1rem;
+    margin-top: 1rem;
+}
+.btn-feedback {
+    padding: 0.5rem 1rem;
+    border: none;
+    border-radius: 8px;
+    cursor: pointer;
+    font-weight: 600;
+}
+.correct {
+    background: #22c55e;
+    color: white;
+}
+.incorrect {
+    background: #ef4444;
+    color: white;
+}
+#correctionArea {
+    margin-top: 1rem;
+    display: flex;
+    flex-direction: column;
+    gap: 0.5rem;
+}
+#emotionSelect {
+    padding: 0.5rem;
+    border-radius: 6px;
+    border: 1px solid #475569;
+    background: #0f172a;
+    color: white;
+}
+/* Toast Notifications */
+#toast-container {
+    position: fixed;
+    top: 20px;
+    right: 20px;
+    z-index: 1000;
+    display: flex;
+    flex-direction: column;
+    gap: 10px;
+}
+.toast {
+    background: rgba(15, 23, 42, 0.8);
+    backdrop-filter: blur(12px);
+    -webkit-backdrop-filter: blur(12px);
+    border: 1px solid var(--glass-border);
+    color: white;
+    padding: 12px 24px;
+    border-radius: 12px;
+    display: flex;
+    align-items: center;
+    gap: 12px;
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
+    animation: slideIn 0.3s ease-out forwards;
+    min-width: 300px;
+    overflow: hidden;
+    position: relative;
+}
+.toast::before {
+    content: '';
+    position: absolute;
+    left: 0;
+    top: 0;
+    bottom: 0;
+    width: 4px;
+    background: var(--toast-info);
+}
+.toast.success::before {
+    background: var(--toast-success);
+}
+.toast.error::before {
+    background: var(--toast-error);
+}
+.toast.info::before {
+    background: var(--toast-info);
+}
+.toast.hide {
+    animation: slideOut 0.3s ease-in forwards;
+}
+@keyframes slideIn {
+    from {
+        transform: translateX(100%);
+        opacity: 0;
+    }
+    to {
+        transform: translateX(0);
+        opacity: 1;
+    }
+}
+@keyframes slideOut {
+    from {
+        transform: translateX(0);
+        opacity: 1;
+    }
+    to {
+        transform: translateX(100%);
+        opacity: 0;
+    }
+}
+.btn-primary,
+.btn-secondary,
+.btn-feedback {
+    transition: all 0.2s ease;
+    font-weight: 500;
+    letter-spacing: 0.5px;
+}
+.btn-primary:active,
+.btn-secondary:active {
+    transform: scale(0.98);
+}
+.modal-content {
+    background: rgba(15, 23, 42, 0.95);
+    box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.5);
+    border: 1px solid var(--glass-highlight);
+}
+/* Training Terminal Styles */
+.terminal-card {
+    background: #0c0c0c;
+    border: 1px solid #333;
+    width: 90%;
+    max-width: 700px;
+    height: 400px;
+    padding: 0;
+    display: flex;
+    flex-direction: column;
+    overflow: hidden;
+    box-shadow: 0 0 40px rgba(0, 255, 0, 0.1);
+}
+.terminal-header {
+    background: #1a1a1a;
+    padding: 10px 15px;
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    border-bottom: 1px solid #333;
+}
+.terminal-title {
+    color: #0f0;
+    font-family: 'Consolas', monospace;
+    font-size: 0.9rem;
+    display: flex;
+    align-items: center;
+    gap: 8px;
+}
+.terminal-body {
+    flex: 1;
+    padding: 15px;
+    background: #000;
+    overflow-y: auto;
+    text-align: left;
+    font-family: 'Consolas', monospace;
+    font-size: 0.9rem;
+    color: #e0e0e0;
+}
+.log-line {
+    display: block;
+    margin-bottom: 4px;
+    word-wrap: break-word;
+}
+.log-line::before {
+    content: '> ';
+    color: #0f0;
+}
+.terminal-body::-webkit-scrollbar {
+    width: 8px;
+}
+.terminal-body::-webkit-scrollbar-track {
+    background: #111;
+}
+.terminal-body::-webkit-scrollbar-thumb {
+    background: #333;
+    border-radius: 4px;
+}
+.terminal-body::-webkit-scrollbar-thumb:hover {
+    background: #444;
+}
+.terminal-header .close-btn {
+    position: static;
+    font-size: 1.2rem;
+    color: #666;
+}
+.terminal-header .close-btn:hover {
+    color: #fff;
+}
+.nlp-container {
+    margin-top: 1rem;
+    text-align: left;
+    background: rgba(0, 0, 0, 0.2);
+    padding: 1rem;
+    border-radius: 12px;
+}
+.transcription {
+    font-style: italic;
+    color: #e2e8f0;
+    margin-bottom: 0.5rem;
+    border-left: 3px solid var(--secondary);
+    padding-left: 10px;
+}
+.nlp-emotion {
+    font-weight: 500;
+}
+.confidence-small {
+    font-size: 0.8rem;
+    color: #94a3b8;
+}
+.password-input {
+    width: 100%;
+    padding: 10px;
+    border-radius: 10px;
+    border: 1px solid rgba(255, 255, 255, 0.2);
+    background: rgba(255, 255, 255, 0.05);
+    color: white;
+    font-size: 1rem;
+    outline: none;
+    transition: all 0.3s ease;
+}
+.password-input:focus {
+    border-color: var(--secondary);
+    background: rgba(255, 255, 255, 0.1);
+}
+/* --- NEW WATERMARK CODE (3D Glassy) --- */
+.watermark {
+    position: fixed;
+    bottom: 20px;
+    right: 20px;
+    padding: 12px 24px;
+    background: rgba(255, 255, 255, 0.05);
+    backdrop-filter: blur(12px);
+    -webkit-backdrop-filter: blur(12px);
+    border: 1px solid rgba(255, 255, 255, 0.1);
+    border-top: 1px solid rgba(255, 255, 255, 0.3);
+    border-left: 1px solid rgba(255, 255, 255, 0.3);
+    border-radius: 24px;
+    box-shadow:
+        10px 10px 20px rgba(0, 0, 0, 0.3),
+        -5px -5px 15px rgba(255, 255, 255, 0.02);
+    color: rgba(255, 255, 255, 0.9);
+    font-size: 0.9rem;
+    font-weight: 500;
+    pointer-events: auto;
+    /* Allow hover */
+    z-index: 1000;
+    font-family: 'Outfit', sans-serif;
+    transform: perspective(1000px) rotateX(5deg) rotateY(-5deg);
+    transition: all 0.4s ease;
+    text-shadow: 0 2px 4px rgba(0, 0, 0, 0.5);
+    letter-spacing: 0.5px;
+}
+.watermark:hover {
+    transform: perspective(1000px) rotateX(0deg) rotateY(0deg) scale(1.05);
+    background: rgba(255, 255, 255, 0.1);
+    color: #fff;
+    box-shadow:
+        0 0 30px rgba(255, 255, 255, 0.1),
+        0 10px 20px rgba(0, 0, 0, 0.4);
+    cursor: default;
+}

frontend/style.css_append ADDED Viewed

	@@ -0,0 +1,41 @@

+/* Terminal Toast */
+.toast-terminal {
+    min-width: 400px;
+    flex-direction: column;
+    align-items: flex-start;
+    gap: 8px;
+    background: rgba(15, 23, 42, 0.95);
+}
+.terminal-window {
+    width: 100%;
+    height: 200px;
+    background: #000;
+    color: #0f0;
+    font-family: 'Courier New', Courier, monospace;
+    font-size: 0.8rem;
+    padding: 10px;
+    border-radius: 6px;
+    overflow-y: auto;
+    border: 1px solid #333;
+    white-space: pre-wrap;
+    box-shadow: inset 0 0 10px rgba(0,0,0,0.5);
+}
+.terminal-window::-webkit-scrollbar {
+    width: 8px;
+}
+.terminal-window::-webkit-scrollbar-thumb {
+    background: #333;
+    border-radius: 4px;
+}
+.terminal-header {
+    font-weight: bold;
+    color: #fff;
+    margin-bottom: 5px;
+    display: flex;
+    justify-content: space-between;
+    width: 100%;
+}

frontend/test_prompt.html ADDED Viewed

	@@ -0,0 +1,14 @@

+<!DOCTYPE html>
+<html>
+<body>
+    <button id="testBtn">Test Prompt</button>
+    <script>
+        document.getElementById('testBtn').addEventListener('click', () => {
+            const result = prompt("Test Prompt", "default");
+            alert("You entered: " + result);
+        });
+    </script>
+</body>
+</html>

model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:970a38d43836d6c44f6421ada2331151466d44a566a8dcc241f0a766ecfa4e77
+size 36193352

run_app.ps1 ADDED Viewed

	@@ -0,0 +1,17 @@

+$ErrorActionPreference = "Stop"
+Write-Host "Starting VocalVibe Application..." -ForegroundColor Cyan
+# Check for venv
+if (-not (Test-Path "venv")) {
+    Write-Host "Virtual environment not found. Please run the setup first." -ForegroundColor Red
+    exit 1
+}
+# Activate venv and run uvicorn
+Write-Host "Activating environment and starting server..." -ForegroundColor Green
+Write-Host "App will differ at: http://localhost:8000" -ForegroundColor Yellow
+$env:PYTHONPATH = "backend"
+$env:Path = "$PSScriptRoot\venv\Scripts;$env:Path"
+.\venv\Scripts\uvicorn backend.main:app --reload --host 0.0.0.0 --port 8000