aniruddhakumarpaul commited on
Commit
b0f32b1
·
0 Parent(s):

Initial commit - VocalVibe App

Browse files
.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.h5 filter=lfs diff=lfs merge=lfs -text
2
+ model.h5 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Virtual Environment
7
+ venv/
8
+ env/
9
+ .env
10
+
11
+ # Data
12
+ data sets/
13
+ data/
14
+ *.wav
15
+
16
+ # IDE
17
+ .vscode/
18
+ .idea/
19
+
20
+ # OS
21
+ Thumbs.db
22
+ Desktop.ini
README.md ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # VocalVibe - Emotion Recognition App
2
+
3
+ ## How to Run
4
+ 1. Open a terminal in the project directory.
5
+ 2. Run the startup script:
6
+ ```powershell
7
+ .\run_app.ps1
8
+ ```
9
+ *Note: If you encounter permission errors, use:*
10
+ ```powershell
11
+ PowerShell.exe -ExecutionPolicy Bypass -File .\run_app.ps1
12
+ ```
13
+ 3. The application will start at [http://localhost:8000](http://localhost:8000).
14
+
15
+ ## How to Stop
16
+ 1. Go to the terminal window where the server is running.
17
+ 2. Press **Ctrl + C** to stop the process.
18
+ 3. If prompted "Terminate batch job (Y/N)?", type `Y` and press Enter.
19
+
20
+ ## Project Structure
21
+ - `backend/`: FastAPI server and emotion recognition logic.
22
+ - `frontend/`: HTML/CSS/JS user interface.
23
+ - `data/`: Directory for storing audio samples and feedback.
backend/evaluate_model.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import sys
4
+ import numpy as np
5
+ import pickle
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.metrics import classification_report, accuracy_score, f1_score
8
+ from tensorflow.keras.models import load_model
9
+ from tensorflow.keras.utils import to_categorical
10
+
11
+ # Data Paths
12
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__)) # This is backend/
13
+ ROOT_DIR = os.path.dirname(BASE_DIR) # This is project root
14
+ DATA_DIR = os.path.join(ROOT_DIR, "data sets")
15
+ MODEL_PATH = os.path.join(ROOT_DIR, "model.h5")
16
+ ENCODER_PATH = os.path.join(ROOT_DIR, "encoder.pkl")
17
+ FEATURES_PATH = os.path.join(DATA_DIR, "features_cache.npy")
18
+ LABELS_PATH = os.path.join(DATA_DIR, "labels_cache.npy")
19
+
20
+ def evaluate():
21
+ print("Loading data from cache...")
22
+ if not os.path.exists(FEATURES_PATH) or not os.path.exists(LABELS_PATH):
23
+ print("Error: Cached features not found. Please train the model first.")
24
+ return
25
+
26
+ X = np.load(FEATURES_PATH)
27
+ y = np.load(LABELS_PATH)
28
+
29
+ print(f"Loaded {len(X)} samples.")
30
+
31
+ print("Loading Label Encoder...")
32
+ with open(ENCODER_PATH, 'rb') as f:
33
+ le = pickle.load(f)
34
+
35
+ # Encode labels (Same logic as training)
36
+ y_encoded = to_categorical(le.fit_transform(y))
37
+
38
+ # Split (Same random_state as training to ensure same test set)
39
+ print("Splitting data (random_state=42)...")
40
+ X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y)
41
+
42
+ print(f"Test Set Size: {len(X_test)}")
43
+
44
+ print("Loading Model...")
45
+ model = load_model(MODEL_PATH)
46
+
47
+ print("Evaluating...")
48
+ # Predict
49
+ y_pred_prob = model.predict(X_test, verbose=0)
50
+ y_pred = np.argmax(y_pred_prob, axis=1)
51
+ y_true = np.argmax(y_test, axis=1)
52
+
53
+ # Calculate Metrics
54
+ accuracy = accuracy_score(y_true, y_pred)
55
+ f1 = f1_score(y_true, y_pred, average='weighted')
56
+
57
+ print("\n" + "="*30)
58
+ print(f"Accuracy: {accuracy:.4f}")
59
+ print(f"F1 Score (Weighted): {f1:.4f}")
60
+ print("="*30 + "\n")
61
+
62
+ # Detailed Report
63
+ target_names = le.classes_
64
+ print("Classification Report:")
65
+ print(classification_report(y_true, y_pred, target_names=target_names))
66
+
67
+ if __name__ == "__main__":
68
+ evaluate()
backend/main.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import shutil
4
+ from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks
5
+ from fastapi.staticfiles import StaticFiles
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from pydantic import BaseModel
8
+ from .model_manager import EmotionClassifier
9
+ from .nlp_manager import NLPManager
10
+ from .utils import convert_to_wav
11
+
12
+ # ... imports ...
13
+
14
+ app = FastAPI(title="Speech Emotion Recognition API")
15
+
16
+ # ... middleware ...
17
+
18
+ # Initialize Models
19
+ print("Loading Audio Model...")
20
+ classifier = EmotionClassifier()
21
+ print("Loading NLP Model...")
22
+ nlp_manager = NLPManager()
23
+
24
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
25
+ DATA_DIR = os.path.join(BASE_DIR, "../data sets")
26
+ if not os.path.exists(DATA_DIR):
27
+ os.makedirs(DATA_DIR)
28
+
29
+ # Mount Frontend - STATIC FILES MUST BE LAST usually, but FastAPI handles specific routes first
30
+ # We will mount it at the end of the file.
31
+
32
+
33
+
34
+ def fuse_predictions(audio_dist, text_emotion, audio_confidence):
35
+ """
36
+ Combines Acoustic (Audio) and Semantic (Text) probabilities.
37
+ Logic:
38
+ - If Audio Confidence > 0.85, trust Audio (Tone is strong).
39
+ - Else, weighted average.
40
+ """
41
+ # 1. Normalize Labels
42
+ # Map NLP labels to Audio labels (Audio is master list)
43
+ # Audio: neutral, calm, happiness, sadness, anger, fear, disgust, surprise
44
+ # NLP: neutral, joy, sadness, anger, fear, disgust, surprise
45
+
46
+ LABEL_MAP = {
47
+ 'joy': 'happiness',
48
+ 'sad': 'sadness',
49
+ 'angry': 'anger',
50
+ 'surprised': 'surprise'
51
+ }
52
+
53
+ # Initialize aggregated scores with audio distribution
54
+ combined_scores = audio_dist.copy()
55
+
56
+ # Validation: if no text emotion, return audio result
57
+ if not text_emotion or not text_emotion.get('all_scores'):
58
+ return {"label": max(audio_dist, key=audio_dist.get), "confidence": audio_confidence, "is_fusion": False}
59
+
60
+ # 2. Add weighted text scores
61
+ # Default Weights
62
+ w_audio = 0.6
63
+ w_text = 0.4
64
+
65
+ # Dynamic Weighting Logic
66
+ if audio_confidence > 0.80:
67
+ # Audio is strong -> Trust Audio
68
+ w_audio = 0.9
69
+ w_text = 0.1
70
+ elif audio_confidence < 0.55:
71
+ # Audio is weak/uncertain -> Trust Text more (if available)
72
+ w_audio = 0.4
73
+ w_text = 0.6
74
+
75
+ # Process text scores
76
+ for item in text_emotion['all_scores']:
77
+ label = item['label']
78
+ score = item['score']
79
+
80
+ # Normalize label
81
+ mapped_label = LABEL_MAP.get(label, label)
82
+
83
+ # If label exists in audio classes, fuse it. Else ignore (or map to neutral)
84
+ if mapped_label in combined_scores:
85
+ # Weighted average: (Score_A * Wa) + (Score_T * Wt)
86
+ # Note: combined_scores[l] currently holds pure Audio Prob
87
+
88
+ # Since we iterate all text labels, we need to be careful not to double count
89
+ # We'll create a new dict for fusion
90
+ pass
91
+
92
+ # Better approach: Create new clean dict
93
+ final_scores = {}
94
+
95
+ all_labels = set(combined_scores.keys())
96
+
97
+ # Convert text scores list to dict
98
+ text_scores_map = {}
99
+ for item in text_emotion['all_scores']:
100
+ l = LABEL_MAP.get(item['label'], item['label'])
101
+ text_scores_map[l] = item['score']
102
+
103
+ for label in all_labels:
104
+ s_audio = combined_scores.get(label, 0.0)
105
+ s_text = text_scores_map.get(label, 0.0)
106
+
107
+ # Special case: 'calm' exists in Audio but not Text.
108
+ # If Text predicts 'neutral' or 'no strong emotion', it shouldn't penalty 'calm' too much?
109
+ # For simplicity, just linear blend.
110
+
111
+ fused = (s_audio * w_audio) + (s_text * w_text)
112
+ final_scores[label] = fused
113
+
114
+ # 3. Find winner
115
+ winner_label = max(final_scores, key=final_scores.get)
116
+ winner_score = final_scores[winner_label]
117
+
118
+ # Re-normalize confidence? Not strictly necessary for ranking but good for UI
119
+ total_score = sum(final_scores.values())
120
+ normalized_confidence = winner_score / total_score if total_score > 0 else 0.0
121
+
122
+ return {
123
+ "label": winner_label,
124
+ "confidence": normalized_confidence,
125
+ "is_fusion": True,
126
+ "distribution": final_scores
127
+ }
128
+
129
+ class FeedbackRequest(BaseModel):
130
+ filename: str # Temporary filename
131
+ correct_emotion: str
132
+ original_emotion: str
133
+
134
+ @app.get("/api/health")
135
+ def health_check():
136
+ return {"status": "ok", "message": "SER API is running"}
137
+
138
+ @app.post("/predict")
139
+ async def predict_audio(file: UploadFile = File(...)):
140
+ """
141
+ Receives an audio file, saves it temporarily, and returns prediction.
142
+ """
143
+ # Create temp file
144
+ temp_filename = f"temp_{int(time.time())}_{file.filename}"
145
+ temp_path = os.path.join(DATA_DIR, temp_filename)
146
+
147
+ with open(temp_path, "wb") as buffer:
148
+ shutil.copyfileobj(file.file, buffer)
149
+
150
+ # Always convert to ensuring standard PCM WAV (required for SpeechRecognition)
151
+ # Browser MediaRecorder might send WebM inside .wav extension
152
+ processed_wav_path = temp_path + "_processed.wav"
153
+ if convert_to_wav(temp_path, processed_wav_path):
154
+ # Conversion successful, use the processed file
155
+ # We keep temp_path to remove later if needed, but let's just swap them
156
+ os.remove(temp_path)
157
+ temp_path = processed_wav_path
158
+ else:
159
+ # Fallback: if conversion fails (e.g. missing ffmpeg), try using original
160
+ # but warn about potential NLP failure
161
+ print("Warning: Audio conversion failed. NLP might fail if format is not PCM WAV.")
162
+
163
+ try:
164
+ # 1. Audio Analysis
165
+ audio_result = classifier.predict_emotion(temp_path)
166
+
167
+ # 2. NLP Analysis
168
+ nlp_result = nlp_manager.process(temp_path)
169
+
170
+ # 3. Hybrid Fusion
171
+ try:
172
+ hybrid_result = fuse_predictions(
173
+ audio_result["distribution"],
174
+ nlp_result.get("text_emotion"),
175
+ audio_result["confidence"]
176
+ )
177
+ except Exception as e:
178
+ print(f"Fusion error: {e}")
179
+ # Fallback to audio if fusion fails
180
+ hybrid_result = {
181
+ "label": audio_result["label"],
182
+ "confidence": audio_result["confidence"],
183
+ "is_fusion": False
184
+ }
185
+
186
+ return {
187
+ "prediction": hybrid_result["label"], # Main result
188
+ "confidence": hybrid_result["confidence"],
189
+ "audio_emotion": audio_result,
190
+ "nlp_analysis": nlp_result,
191
+ "hybrid_analysis": hybrid_result,
192
+ "temp_filename": os.path.basename(temp_path)
193
+ }
194
+ except ValueError as e:
195
+ if "Model not loaded" in str(e):
196
+ # Cold Start: Allow labeling even if no model
197
+ return {
198
+ "prediction": "neutral",
199
+ "confidence": 0.0,
200
+ "audio_emotion": {"label": "neutral", "confidence": 0.0},
201
+ "nlp_analysis": {"transcription": None, "text_emotion": None},
202
+ "temp_filename": os.path.basename(temp_path),
203
+ "is_fallback": True
204
+ }
205
+ raise HTTPException(status_code=500, detail=str(e))
206
+ except Exception as e:
207
+ print(f"Prediction Error: {e}")
208
+ raise HTTPException(status_code=500, detail="Prediction failed")
209
+
210
+ import glob
211
+
212
+ @app.post("/feedback")
213
+ async def submit_feedback(feedback: FeedbackRequest, background_tasks: BackgroundTasks):
214
+ """
215
+ User corrects the prediction. We rename/move the temp file to be permanently part of the dataset.
216
+ Auto-retrains every 5000 user uploads.
217
+ """
218
+ temp_path = os.path.join(DATA_DIR, feedback.filename)
219
+
220
+ if not os.path.exists(temp_path):
221
+ raise HTTPException(status_code=404, detail="Original temp file not found")
222
+
223
+ # Create new permanent filename
224
+ # Format: user_upload_{emotion}_{timestamp}.wav
225
+ timestamp = int(time.time())
226
+ new_filename = f"user_upload_{feedback.correct_emotion}_{timestamp}.wav"
227
+ new_path = os.path.join(DATA_DIR, new_filename)
228
+
229
+ try:
230
+ shutil.move(temp_path, new_path)
231
+
232
+ # Check for Auto-Retraining Logic
233
+ # Optimize: glob can be slow if millions of files, but for 5000 it's fine.
234
+ # Alternatively we could maintain a counter in a file.
235
+ user_files = glob.glob(os.path.join(DATA_DIR, "user_upload_*.wav"))
236
+ count = len(user_files)
237
+
238
+ # Threshold: 5000
239
+ if count > 0 and count % 5000 == 0:
240
+ print(f"Auto-Retraining Threshold Reached ({count} files). Starting training...")
241
+ global training_logs
242
+ training_logs = []
243
+ background_tasks.add_task(classifier.train_model, DATA_DIR, log_message)
244
+ return {"status": "success", "saved_as": new_filename, "auto_train": True}
245
+
246
+ return {"status": "success", "saved_as": new_filename}
247
+ except Exception as e:
248
+ raise HTTPException(status_code=500, detail=f"Failed to save feedback: {e}")
249
+
250
+ # Global training logs
251
+ training_logs = []
252
+
253
+ def log_message(msg: str):
254
+ training_logs.append(msg)
255
+
256
+ @app.get("/logs")
257
+ def get_logs(after: int = 0):
258
+ """
259
+ Returns logs generated during training.
260
+ """
261
+ if after < 0:
262
+ after = 0
263
+ return {"logs": training_logs[after:], "next_index": len(training_logs)}
264
+
265
+ class TrainRequest(BaseModel):
266
+ password: str
267
+
268
+ @app.post("/train")
269
+ async def trigger_training(request: TrainRequest, background_tasks: BackgroundTasks):
270
+ """
271
+ Triggers model retraining in the background. Requires Admin Password.
272
+ """
273
+ if request.password != "ani24680":
274
+ raise HTTPException(status_code=401, detail="Unauthorized: Incorrect Admin Password")
275
+
276
+ global training_logs
277
+ training_logs = [] # Clear logs
278
+
279
+ if not classifier.is_loaded:
280
+ # If no model, we might want to wait, but allowing async training is fine
281
+ pass
282
+
283
+ background_tasks.add_task(classifier.train_model, DATA_DIR, log_message)
284
+ return {"status": "training_started", "message": "Model is training in background"}
285
+
286
+ # Mount Static Files (Frontend)
287
+ FRONTEND_DIR = os.path.join(BASE_DIR, "../frontend")
288
+ app.mount("/", StaticFiles(directory=FRONTEND_DIR, html=True), name="static")
backend/model_manager.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import librosa
4
+ import tensorflow as tf
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.preprocessing import LabelEncoder
7
+ from tensorflow.keras.utils import to_categorical
8
+ from tensorflow.keras.models import Sequential, load_model
9
+ from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
10
+ from tensorflow.keras.optimizers import Adam
11
+ from tensorflow.keras.callbacks import EarlyStopping
12
+ import pickle
13
+ import joblib
14
+ from joblib import Parallel, delayed
15
+
16
+ # Parameters
17
+ MAX_PAD_LEN = 174
18
+ N_MFCC = 40
19
+ DURATION = 3
20
+ SAMPLE_RATE = 22050
21
+
22
+ MODEL_PATH = "model.h5"
23
+ ENCODER_PATH = "encoder.pkl"
24
+
25
+ def extract_features_static(file_path, duration=DURATION, sample_rate=SAMPLE_RATE, n_mfcc=N_MFCC, max_pad_len=MAX_PAD_LEN):
26
+ """
27
+ Static helper for feature extraction to allow pickling for joblib parallel processing.
28
+ """
29
+ try:
30
+ # Normalize path
31
+ file_path = os.path.normpath(os.path.abspath(file_path))
32
+
33
+ # Load audio
34
+ # res_type='kaiser_fast' is faster
35
+ audio, sr = librosa.load(file_path, res_type='kaiser_fast', duration=duration, sr=sample_rate)
36
+
37
+ mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
38
+ pad_width = max_pad_len - mfccs.shape[1]
39
+ if pad_width > 0:
40
+ mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
41
+ else:
42
+ mfccs = mfccs[:, :max_pad_len]
43
+ return mfccs.T
44
+ except Exception as e:
45
+ print(f"Error extracting features from {file_path}: {e}")
46
+ return None
47
+
48
+ class EmotionClassifier:
49
+ def __init__(self):
50
+ self.model = None
51
+ self.le = LabelEncoder()
52
+ self.is_loaded = False
53
+ self.load_artifacts()
54
+
55
+ def load_artifacts(self):
56
+ if os.path.exists(MODEL_PATH) and os.path.exists(ENCODER_PATH):
57
+ try:
58
+ self.model = load_model(MODEL_PATH)
59
+ with open(ENCODER_PATH, 'rb') as f:
60
+ self.le = pickle.load(f)
61
+ self.is_loaded = True
62
+ print("Model and encoder loaded successfully.")
63
+ except Exception as e:
64
+ print(f"Failed to load artifacts: {e}")
65
+ else:
66
+ print("No pre-trained model found. System ready for training.")
67
+
68
+ def extract_features(self, file_path):
69
+ """Wrapper for static extraction method."""
70
+ return extract_features_static(file_path)
71
+
72
+ def train_model(self, data_path, log_callback=None):
73
+ """Trains the model from scratch using data in data_path."""
74
+ def log(msg):
75
+ if log_callback:
76
+ log_callback(msg)
77
+ else:
78
+ print(msg)
79
+
80
+ # Cache paths
81
+ features_cache_path = os.path.join(data_path, "features_cache.npy")
82
+ labels_cache_path = os.path.join(data_path, "labels_cache.npy")
83
+
84
+ X = None
85
+ y = None
86
+
87
+ # Check cache
88
+ if os.path.exists(features_cache_path) and os.path.exists(labels_cache_path):
89
+ log("Found cached features. Loading from disk...")
90
+ try:
91
+ X = np.load(features_cache_path)
92
+ y = np.load(labels_cache_path)
93
+ log(f"Loaded {len(X)} cached samples.")
94
+ except Exception as e:
95
+ log(f"Failed to load cache: {e}. Recomputing...")
96
+ X = None
97
+ y = None
98
+
99
+ if X is None or y is None:
100
+ files = []
101
+ # Walk through directory
102
+ for root, _, filenames in os.walk(data_path):
103
+ for f in filenames:
104
+ if f.endswith('.wav'):
105
+ files.append(os.path.join(root, f))
106
+
107
+ if not files:
108
+ log("DEBUG: No .wav files found in os.walk")
109
+ raise ValueError("No .wav files found for training.")
110
+
111
+ from .utils import get_label_from_filename
112
+
113
+ log(f"Processing {len(files)} files for training utilizing parallel processing...")
114
+
115
+ # Helper to process a single file and return (features, label)
116
+ def process_file(file):
117
+ lbl = get_label_from_filename(file)
118
+ if lbl:
119
+ feat = extract_features_static(file)
120
+ if feat is not None:
121
+ return (feat, lbl)
122
+ return None
123
+
124
+ # Run in parallel
125
+ # n_jobs=-1 uses all available cores
126
+ results = Parallel(n_jobs=-1, verbose=5)(delayed(process_file)(f) for f in files)
127
+
128
+ # Filter None results
129
+ valid_results = [r for r in results if r is not None]
130
+
131
+ if not valid_results:
132
+ log("CRITICAL: No features extracted successfully!")
133
+ raise ValueError("No features extracted. Check files and labels.")
134
+
135
+ log(f"Successfully processed {len(valid_results)}/{len(files)} files.")
136
+
137
+ features = [r[0] for r in valid_results]
138
+ labels = [r[1] for r in valid_results]
139
+
140
+ X = np.array(features, dtype='float32')
141
+ y = np.array(labels)
142
+
143
+ # Save cache
144
+ log("Saving features to cache...")
145
+ np.save(features_cache_path, X)
146
+ np.save(labels_cache_path, y)
147
+
148
+ # Encode labels
149
+ y_encoded = to_categorical(self.le.fit_transform(y))
150
+
151
+ # Split
152
+ X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y) # stratify=y handles class imbalance better
153
+
154
+ # Build Model
155
+ self.model = Sequential([
156
+ Conv1D(128, kernel_size=5, padding="same", activation="relu", input_shape=(X_train.shape[1], X_train.shape[2])),
157
+ MaxPooling1D(pool_size=2),
158
+ BatchNormalization(),
159
+ Conv1D(256, kernel_size=5, padding="same", activation="relu"),
160
+ MaxPooling1D(pool_size=2),
161
+ BatchNormalization(),
162
+ Dropout(0.3),
163
+ Flatten(),
164
+ Dense(256, activation='relu'),
165
+ Dropout(0.4),
166
+ Dense(y_encoded.shape[1], activation='softmax')
167
+ ])
168
+
169
+ self.model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])
170
+
171
+ early_stop = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
172
+
173
+ log("Starting training...")
174
+
175
+ class LogCallback(tf.keras.callbacks.Callback):
176
+ def on_epoch_end(self, epoch, logs=None):
177
+ log(f"Epoch {epoch+1}: loss={logs['loss']:.4f}, acc={logs['accuracy']:.4f}, val_loss={logs['val_loss']:.4f}")
178
+
179
+ self.model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stop, LogCallback()], verbose=0)
180
+
181
+ # Save artifacts
182
+ self.model.save(MODEL_PATH)
183
+ with open(ENCODER_PATH, 'wb') as f:
184
+ pickle.dump(self.le, f)
185
+
186
+ self.is_loaded = True
187
+ log("Training complete and model saved.")
188
+ return {"accuracy": self.model.evaluate(X_test, y_test)[1]}
189
+
190
+ def predict_emotion(self, file_path):
191
+ if not self.is_loaded:
192
+ raise ValueError("Model not loaded. Train the model first.")
193
+
194
+ mfcc = self.extract_features(file_path)
195
+ if mfcc is None:
196
+ raise ValueError("Could not extract features.")
197
+
198
+ mfcc = mfcc[np.newaxis, :, :] # Add batch dimension
199
+ prediction = self.model.predict(mfcc, verbose=0)
200
+
201
+ # Get all probabilities
202
+ probs = prediction[0]
203
+ classes = self.le.classes_
204
+
205
+ # Create distribution dict {label: score}
206
+ distribution = {label: float(score) for label, score in zip(classes, probs)}
207
+
208
+ predicted_index = np.argmax(prediction)
209
+ predicted_label = self.le.inverse_transform([predicted_index])[0]
210
+ confidence = float(prediction[0][predicted_index])
211
+
212
+ return {
213
+ "label": predicted_label,
214
+ "confidence": confidence,
215
+ "distribution": distribution
216
+ }
backend/nlp_manager.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import speech_recognition as sr
3
+ import torch
4
+ from transformers import pipeline
5
+ import os
6
+
7
+ class NLPManager:
8
+ def __init__(self):
9
+ print("Initializing NLP Manager...")
10
+ self.recognizer = sr.Recognizer()
11
+
12
+ # Load emotion classification pipeline
13
+ # Using a model fine-tuned for emotion detection
14
+ # Falls back to default cache if downloaded
15
+ print("Loading Sentiment Analysis Model (DistilRoBERTa)...")
16
+ self.classifier = pipeline(
17
+ "text-classification",
18
+ model="j-hartmann/emotion-english-distilroberta-base",
19
+ return_all_scores=True
20
+ )
21
+ print("NLP Manager Ready.")
22
+
23
+ def transcribe(self, audio_path):
24
+ """
25
+ Converts audio file to text using Google Web Speech API.
26
+ """
27
+ try:
28
+ with sr.AudioFile(audio_path) as source:
29
+ audio_data = self.recognizer.record(source)
30
+ # recognize_google is free and works well for short clips
31
+ text = self.recognizer.recognize_google(audio_data)
32
+ return text
33
+ except sr.UnknownValueError:
34
+ return None # Speech is unintelligible
35
+ except sr.RequestError as e:
36
+ print(f"Could not request results from Google Speech Recognition service; {e}")
37
+ return None
38
+ except Exception as e:
39
+ print(f"Transcription error: {e}")
40
+ return None
41
+
42
+ def analyze_sentiment(self, text):
43
+ """
44
+ Analyzes the emotion of the text.
45
+ Returns the dominant emotion and confidence.
46
+ """
47
+ if not text:
48
+ return None
49
+
50
+ # Predict
51
+ results = self.classifier(text)[0]
52
+ # results is list of dicts: [{'label': 'joy', 'score': 0.9}, ...]
53
+
54
+ # Sort by score descending
55
+ results.sort(key=lambda x: x['score'], reverse=True)
56
+
57
+ top_result = results[0]
58
+ return {
59
+ "label": top_result['label'],
60
+ "score": top_result['score'],
61
+ "all_scores": results
62
+ }
63
+
64
+ def process(self, audio_path):
65
+ """
66
+ Full pipeline: Audio -> Text -> Emotion
67
+ """
68
+ transcription = self.transcribe(audio_path)
69
+ if not transcription:
70
+ return {
71
+ "transcription": None,
72
+ "text_emotion": None
73
+ }
74
+
75
+ emotion_analysis = self.analyze_sentiment(transcription)
76
+ return {
77
+ "transcription": transcription,
78
+ "text_emotion": emotion_analysis
79
+ }
backend/reproduce_error.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import librosa
4
+ import traceback
5
+ import soundfile as sf
6
+
7
+ # Path to the specific file
8
+ file_path = r"c:\Users\aniru\OneDrive\Desktop\EDUVN\data sets\Actor_01\03-01-01-01-01-01-01.wav"
9
+
10
+ print(f"Testing loading: {file_path}")
11
+ print(f"Does file exist? {os.path.exists(file_path)}")
12
+
13
+ try:
14
+ # Mimic parameters from model_manager.py
15
+ DURATION = 3
16
+ SAMPLE_RATE = 22050
17
+ audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast', duration=DURATION, sr=SAMPLE_RATE)
18
+ print("Success! Audio loaded.")
19
+ print(f"Shape: {audio.shape}, Sample Rate: {sample_rate}")
20
+ except Exception as e:
21
+ print("FAILED to load audio.")
22
+ print(f"Error: {e}")
23
+ traceback.print_exc()
24
+
25
+ print("-" * 20)
26
+ print("Testing soundfile directly...")
27
+ try:
28
+ data, samplerate = sf.read(file_path)
29
+ print(f"Soundfile read success. Shape: {data.shape}, Rate: {samplerate}")
30
+ except Exception as e:
31
+ print(f"Soundfile direct read failed: {e}")
backend/requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ tensorflow
5
+ librosa
6
+ pydub
7
+ numpy
8
+ scikit-learn
9
+ soundfile
10
+ joblib
11
+ resampy
12
+ SpeechRecognition
13
+ transformers
14
+ tf-keras
15
+ torch
backend/test_caching.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import sys
4
+ import shutil
5
+ import numpy as np
6
+ import librosa
7
+ import soundfile as sf
8
+
9
+ # Setup path
10
+ try:
11
+ from .model_manager import EmotionClassifier
12
+ except ImportError:
13
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
14
+ from backend.model_manager import EmotionClassifier
15
+
16
+ def create_dummy_wav(path):
17
+ sr = 22050
18
+ t = np.linspace(0, 1, sr)
19
+ audio = np.sin(2*np.pi*440*t)
20
+ sf.write(path, audio, sr)
21
+
22
+ def test_caching():
23
+ base_dir = os.path.dirname(os.path.abspath(__file__))
24
+ test_data_dir = os.path.join(base_dir, "test_data_cache")
25
+
26
+ if os.path.exists(test_data_dir):
27
+ shutil.rmtree(test_data_dir)
28
+ os.makedirs(test_data_dir)
29
+
30
+ # Create dummy files
31
+ print("Creating dummy files...")
32
+ for i in range(10):
33
+ # Filename must imply label: 03-01-01...
34
+ # label is 3rd part: 01=neutral
35
+ name = f"03-01-01-01-01-{i:02d}-01.wav"
36
+ create_dummy_wav(os.path.join(test_data_dir, name))
37
+
38
+ classifier = EmotionClassifier()
39
+
40
+ logs = []
41
+ def log_callback(msg):
42
+ logs.append(msg)
43
+ print(f"[TEST] {msg}")
44
+
45
+ print("\n--- RUN 1: Extraction ---")
46
+ classifier.train_model(test_data_dir, log_callback=log_callback)
47
+
48
+ # Verify cache created
49
+ if os.path.exists(os.path.join(test_data_dir, "features_cache.npy")):
50
+ print("PASS: Cache file created.")
51
+ else:
52
+ print("FAIL: Cache file NOT created.")
53
+
54
+ print("\n--- RUN 2: Caching ---")
55
+ logs.clear()
56
+ classifier.train_model(test_data_dir, log_callback=log_callback)
57
+
58
+ # Verify log says "Found cached features"
59
+ if any("Found cached features" in log for log in logs):
60
+ print("PASS: Loaded from cache.")
61
+ else:
62
+ print("FAIL: Did NOT load from cache.")
63
+
64
+ # Cleanup
65
+ shutil.rmtree(test_data_dir)
66
+
67
+ if __name__ == "__main__":
68
+ test_caching()
backend/test_nlp.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import sys
4
+
5
+ # Setup path
6
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
7
+
8
+ try:
9
+ from nlp_manager import NLPManager
10
+ except ImportError:
11
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
12
+ from backend.nlp_manager import NLPManager
13
+
14
+ def test_nlp():
15
+ print("Testing NLP Manager Integration...")
16
+
17
+ try:
18
+ nlp = NLPManager()
19
+ print("PASS: Manager Initialized")
20
+ except Exception as e:
21
+ print(f"FAIL: Initialization Error: {e}")
22
+ return
23
+
24
+ # Test Sentiment Analysis
25
+ test_text = "I am so happy that this works!"
26
+ print(f"Analyzing text: '{test_text}'")
27
+ result = nlp.analyze_sentiment(test_text)
28
+
29
+ if result and result['label'] in ['joy', 'happiness', 'happy']:
30
+ print(f"PASS: Correctly identified emotion: {result['label']} (Score: {result['score']:.2f})")
31
+ else:
32
+ print(f"WARN: Analysis result: {result}")
33
+
34
+ # Test Transcribe (Mocking audio not easy without file, just ensuring method exists)
35
+ if hasattr(nlp, 'transcribe'):
36
+ print("PASS: Transcribe method exists.")
37
+
38
+ if __name__ == "__main__":
39
+ test_nlp()
backend/test_prediction.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import sys
4
+ import numpy as np
5
+ import soundfile as sf
6
+
7
+ # Setup path to import backend modules
8
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
9
+
10
+ try:
11
+ from backend.model_manager import EmotionClassifier
12
+ except ImportError:
13
+ # Fallback if running from backend dir directly
14
+ from model_manager import EmotionClassifier
15
+
16
+ def create_dummy_wav(path):
17
+ sr = 22050
18
+ t = np.linspace(0, 1, sr)
19
+ audio = np.sin(2*np.pi*440*t)
20
+ sf.write(path, audio, sr)
21
+
22
+ def test_prediction():
23
+ print("Initializing Classifier...")
24
+ try:
25
+ classifier = EmotionClassifier()
26
+ except Exception as e:
27
+ print(f"FAILED to initialize classifier: {e}")
28
+ return
29
+
30
+ # Create a dummy file
31
+ test_file = "test_audio_prediction.wav"
32
+ create_dummy_wav(test_file)
33
+ print(f"Created dummy file: {test_file}")
34
+
35
+ print("Attempting prediction...")
36
+ try:
37
+ result = classifier.predict_emotion(test_file)
38
+ print("Prediction Success!")
39
+ print(f"Result: {result}")
40
+ except Exception as e:
41
+ print("Prediction FAILED.")
42
+ print(f"Error: {e}")
43
+ import traceback
44
+ traceback.print_exc()
45
+ finally:
46
+ if os.path.exists(test_file):
47
+ os.remove(test_file)
48
+
49
+ if __name__ == "__main__":
50
+ test_prediction()
backend/test_training.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import sys
4
+
5
+ # When running as a module 'backend.test_training', we can use relative imports
6
+ try:
7
+ from .model_manager import EmotionClassifier
8
+ except ImportError:
9
+ # Fallback if run as script, but this will break relative imports in model_manager
10
+ # So we must fix path to import 'backend.model_manager'
11
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
12
+ from backend.model_manager import EmotionClassifier
13
+
14
+ def test_training():
15
+ print("Initializing Classifier...")
16
+ classifier = EmotionClassifier()
17
+
18
+ # Correct data path assuming we are in backend/
19
+ base_dir = os.path.dirname(os.path.abspath(__file__))
20
+ data_dir = os.path.join(base_dir, "../data sets")
21
+
22
+ print(f"Data directory: {data_dir}")
23
+
24
+ def log_callback(msg):
25
+ print(f"[TRAIN] {msg}")
26
+
27
+ print("Starting training test...")
28
+ try:
29
+ classifier.train_model(data_dir, log_callback=log_callback)
30
+ print("Training test passed!")
31
+ except Exception as e:
32
+ print(f"Training test failed: {e}")
33
+ import traceback
34
+ traceback.print_exc()
35
+
36
+ if __name__ == "__main__":
37
+ test_training()
backend/utils.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ from pydub import AudioSegment
4
+
5
+ RAVDESS_MAP = {
6
+ '01': 'neutral', '02': 'calm', '03': 'happiness', '04': 'sadness',
7
+ '05': 'anger', '06': 'fear', '07': 'disgust', '08': 'surprise'
8
+ }
9
+
10
+ KNOWN_EMOTIONS = ['anger', 'neutral', 'sadness', 'happiness', 'fear', 'disgust', 'surprise', 'calm']
11
+ # Handling some common synonyms/variations
12
+ EMOTION_ALIASES = {
13
+ 'happy': 'happiness',
14
+ 'sad': 'sadness',
15
+ 'angry': 'anger',
16
+ 'surprised': 'surprise'
17
+ }
18
+
19
+ def convert_to_wav(source_path, target_path):
20
+ """Converts audio file to WAV format."""
21
+ try:
22
+ audio = AudioSegment.from_file(source_path)
23
+ audio.export(target_path, format="wav")
24
+ return True
25
+ except Exception as e:
26
+ print(f"Error converting {source_path}: {e}")
27
+ return False
28
+
29
+ def get_label_from_filename(filename):
30
+ """Extracts emotion label from filename based on patterns."""
31
+ filename = os.path.basename(filename).lower()
32
+
33
+ # Rule 1: RAVDESS dataset (e.g., 03-01-01-01-01-01-01.wav)
34
+ if filename.count('-') == 6 and filename.startswith('03'):
35
+ parts = filename.replace('.wav', '').split('-')
36
+ if len(parts) > 2:
37
+ return RAVDESS_MAP.get(parts[2])
38
+
39
+ # Rule 2: General emotion words
40
+ # Check for exact matches or surrounded by underscores/dots
41
+ for emotion in KNOWN_EMOTIONS + list(EMOTION_ALIASES.keys()):
42
+ # Simple containment check might be too loose (e.g., "fearless"),
43
+ # but matches the original script's logic roughly.
44
+ # Improving it with delimiters for safety.
45
+ patterns = [
46
+ f"_{emotion}_",
47
+ f"{emotion}_",
48
+ f"_{emotion}.",
49
+ f"upload_{emotion}_"
50
+ ]
51
+
52
+ # Also check if it STARTS with the emotion (common in some datasets)
53
+ if filename.startswith(f"{emotion}_") or any(p in filename for p in patterns):
54
+ normalized = EMOTION_ALIASES.get(emotion, emotion)
55
+ return normalized
56
+
57
+ return None
encoder.pkl ADDED
Binary file (527 Bytes). View file
 
frontend/favicon.ico ADDED
frontend/index.html ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>VocalVibe - Emotion Recognition</title>
8
+ <link rel="preconnect" href="https://fonts.googleapis.com">
9
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
+ <link href="https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;600;700&display=swap" rel="stylesheet">
11
+ <link rel="stylesheet" href="style.css">
12
+ <!-- FontAwesome for Icons -->
13
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
14
+ </head>
15
+
16
+ <body>
17
+ <div class="background-blobs">
18
+ <div class="blob blob-1"></div>
19
+ <div class="blob blob-2"></div>
20
+ <div class="blob blob-3"></div>
21
+ </div>
22
+
23
+ <!-- Toast Container -->
24
+ <div id="toast-container"></div>
25
+
26
+ <main class="glass-container">
27
+ <header>
28
+ <h1>Vocal<span class="highlight">Vibe</span></h1>
29
+ <p>AI-Powered Speech Emotion Recognition</p>
30
+ <button id="trainBtn" class="btn-secondary" style="font-size: 0.8rem; padding: 0.3rem 0.8rem;"><i
31
+ class="fa-solid fa-brain"></i> Train Model</button>
32
+ </header>
33
+
34
+ <section class="controls">
35
+ <!-- Recording Section -->
36
+ <div class="record-area">
37
+ <button id="micBtn" class="mic-button" title="Hold to Record">
38
+ <i class="fa-solid fa-microphone"></i>
39
+ </button>
40
+ <p id="statusText">Click & Hold to Record</p>
41
+ <div id="visualizer" class="visualizer hidden">
42
+ <div class="bar"></div>
43
+ <div class="bar"></div>
44
+ <div class="bar"></div>
45
+ <div class="bar"></div>
46
+ <div class="bar"></div>
47
+ </div>
48
+ </div>
49
+
50
+ <div class="divider">
51
+ <span>OR</span>
52
+ </div>
53
+
54
+ <!-- Upload Section -->
55
+ <div class="upload-area" id="dropZone">
56
+ <i class="fa-solid fa-cloud-arrow-up"></i>
57
+ <p>Drag & Drop Audio File</p>
58
+ <input type="file" id="fileInput" accept="audio/*" hidden>
59
+ <button class="btn-secondary" onclick="document.getElementById('fileInput').click()">Browse
60
+ Files</button>
61
+ </div>
62
+ </section>
63
+ </main>
64
+
65
+ <!-- Password Modal -->
66
+ <div id="passwordModal" class="modal hidden">
67
+ <div class="modal-content glass-card" style="max-width: 400px; text-align: center;">
68
+ <button class="close-btn" id="closePasswordModal">&times;</button>
69
+ <h2>Admin Access</h2>
70
+ <p style="margin-bottom: 15px; color: #cbd5e1;">Enter password to start training</p>
71
+ <input type="password" id="adminPasswordInput" class="password-input" placeholder="Password">
72
+ <button id="submitPasswordBtn" class="btn-primary"
73
+ style="margin-top: 15px; width: 100%;">Authenticate</button>
74
+ </div>
75
+ </div>
76
+
77
+ <!-- Training Terminal Modal -->
78
+ <div id="trainingModal" class="modal hidden">
79
+ <div class="modal-content terminal-card">
80
+ <div class="terminal-header">
81
+ <span class="terminal-title"><i class="fa-solid fa-terminal"></i> Model Training</span>
82
+ <button class="close-btn" id="closeTrainingModal">&times;</button>
83
+ </div>
84
+ <div class="terminal-body" id="trainingLog">
85
+ <span class="log-line">Waiting for command...</span>
86
+ </div>
87
+ </div>
88
+ </div>
89
+
90
+ <!-- Result Modal -->
91
+ <div id="resultModal" class="modal hidden">
92
+ <div class="modal-content glass-card">
93
+ <button class="close-btn" id="closeModal">&times;</button>
94
+ <div class="emoji-display" id="resultEmoji">🤔</div>
95
+ <h2 id="resultLabel">Analyzing...</h2>
96
+ <p id="resultConfidence">Confidence: --%</p>
97
+
98
+ <div class="feedback-section">
99
+ <p>Was this correct?</p>
100
+ <div class="feedback-buttons">
101
+ <button class="btn-feedback correct" id="btnCorrect"><i class="fa-solid fa-check"></i> Yes</button>
102
+ <button class="btn-feedback incorrect" id="btnIncorrect"><i class="fa-solid fa-xmark"></i>
103
+ No</button>
104
+ </div>
105
+ <!-- Correction Dropdown (Hidden initially) -->
106
+ <div id="correctionArea" class="hidden">
107
+ <select id="emotionSelect">
108
+ <option value="" disabled selected>Select actual emotion</option>
109
+ <option value="neutral">Neutral</option>
110
+ <option value="calm">Calm</option>
111
+ <option value="happiness">Happiness</option>
112
+ <option value="sadness">Sadness</option>
113
+ <option value="anger">Anger</option>
114
+ <option value="fear">Fear</option>
115
+ <option value="disgust">Disgust</option>
116
+ <option value="surprise">Surprise</option>
117
+ </select>
118
+ <button id="submitCorrection" class="btn-primary">Submit Correction</button>
119
+ </div>
120
+ </div>
121
+ </div>
122
+ </div>
123
+
124
+ <footer class="watermark">
125
+ <p>Made by : Aniruddha Paul</p>
126
+ </footer>
127
+
128
+ <script src="script.js"></script>
129
+ </body>
130
+
131
+ </html>
frontend/script.js ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const micBtn = document.getElementById('micBtn');
2
+ const statusText = document.getElementById('statusText');
3
+ const visualizer = document.getElementById('visualizer');
4
+ const dropZone = document.getElementById('dropZone');
5
+ const fileInput = document.getElementById('fileInput');
6
+
7
+ // Result Modal Elements
8
+ const resultModal = document.getElementById('resultModal');
9
+ const closeModal = document.getElementById('closeModal');
10
+ const resultEmoji = document.getElementById('resultEmoji');
11
+ const resultLabel = document.getElementById('resultLabel');
12
+ const resultConfidence = document.getElementById('resultConfidence');
13
+ const btnCorrect = document.getElementById('btnCorrect');
14
+ const btnIncorrect = document.getElementById('btnIncorrect');
15
+ const correctionArea = document.getElementById('correctionArea');
16
+ const submitCorrection = document.getElementById('submitCorrection');
17
+
18
+ let mediaRecorder;
19
+ let audioChunks = [];
20
+ let currentTempFilename = null;
21
+ let currentPrediction = null;
22
+
23
+ // Emotion to Emoji Map
24
+ const emotionEmojis = {
25
+ 'neutral': '😐',
26
+ 'calm': '😌',
27
+ 'happiness': '😄',
28
+ 'happy': '😄',
29
+ 'sadness': '😢',
30
+ 'sad': '😢',
31
+ 'anger': '😠',
32
+ 'angry': '😠',
33
+ 'fear': '😱',
34
+ 'disgust': '🤢',
35
+ 'surprise': '😲'
36
+ };
37
+
38
+ // --- Recording Logic ---
39
+ micBtn.addEventListener('mousedown', startRecording);
40
+ micBtn.addEventListener('mouseup', stopRecording);
41
+ micBtn.addEventListener('mouseleave', () => {
42
+ if (mediaRecorder && mediaRecorder.state === 'recording') {
43
+ stopRecording();
44
+ }
45
+ });
46
+
47
+ // Training Logic
48
+ const trainingModal = document.getElementById('trainingModal');
49
+ const trainingLog = document.getElementById('trainingLog');
50
+ const closeTrainingModal = document.getElementById('closeTrainingModal');
51
+
52
+ closeTrainingModal.addEventListener('click', () => {
53
+ trainingModal.classList.add('hidden');
54
+ });
55
+
56
+
57
+ // --- Training & Password Logic ---
58
+ const passwordModal = document.getElementById('passwordModal');
59
+ const closePasswordModal = document.getElementById('closePasswordModal');
60
+ const submitPasswordBtn = document.getElementById('submitPasswordBtn');
61
+ const adminPasswordInput = document.getElementById('adminPasswordInput');
62
+
63
+ // Open Password Modal
64
+ document.getElementById('trainBtn').addEventListener('click', () => {
65
+ passwordModal.classList.remove('hidden');
66
+ adminPasswordInput.value = '';
67
+ adminPasswordInput.focus();
68
+ });
69
+
70
+ // Close Password Modal
71
+ closePasswordModal.addEventListener('click', () => {
72
+ passwordModal.classList.add('hidden');
73
+ });
74
+
75
+ // Handle Password Submission
76
+ // Handle Password Submission
77
+ function submitPassword() {
78
+ const password = adminPasswordInput.value;
79
+ if (!password) {
80
+ showToast("Please enter a password", "error");
81
+ return;
82
+ }
83
+
84
+ passwordModal.classList.add('hidden');
85
+ startTraining(password);
86
+ }
87
+
88
+ submitPasswordBtn.addEventListener('click', submitPassword);
89
+
90
+ // Allow Enter key to submit password and Esc to close modals
91
+ document.addEventListener('keydown', (e) => {
92
+ // Enter Key in Password Input
93
+ if (e.key === 'Enter' && document.activeElement === adminPasswordInput) {
94
+ submitPassword();
95
+ }
96
+ // Escape Key Global
97
+ if (e.key === 'Escape') {
98
+ passwordModal.classList.add('hidden');
99
+ resultModal.classList.add('hidden');
100
+ trainingModal.classList.add('hidden');
101
+ }
102
+ });
103
+
104
+ async function startTraining(password) {
105
+ // Open Terminal
106
+ trainingModal.classList.remove('hidden');
107
+ trainingLog.innerHTML = '<span class="log-line">Authenticating...</span>';
108
+
109
+ try {
110
+ const response = await fetch('/train', {
111
+ method: 'POST',
112
+ headers: {
113
+ 'Content-Type': 'application/json'
114
+ },
115
+ body: JSON.stringify({ password: password })
116
+ });
117
+
118
+ if (response.status === 401) {
119
+ trainingLog.innerHTML += '<span class="log-line" style="color:red">Error: Unauthorized. Incorrect Password.</span>';
120
+ showToast("Incorrect Admin Password", "error");
121
+ return;
122
+ }
123
+
124
+ const data = await response.json();
125
+
126
+ if (data.status === 'training_started') {
127
+ trainingLog.innerHTML += '<span class="log-line">Access Granted. Starting training sequence...</span>';
128
+ pollLogs();
129
+ }
130
+ } catch (e) {
131
+ showToast("Failed to start training.", "error");
132
+ trainingLog.innerHTML += `<span class="log-line" style="color:red">Error: ${e.message}</span>`;
133
+ }
134
+ }
135
+
136
+ async function pollLogs(startIndex = 0) {
137
+ try {
138
+ const response = await fetch(`/logs?after=${startIndex}`);
139
+ const data = await response.json();
140
+
141
+ if (data.logs && data.logs.length > 0) {
142
+ data.logs.forEach(log => {
143
+ const line = document.createElement('span');
144
+ line.className = 'log-line';
145
+ line.innerText = log;
146
+ if (log.includes("CRITICAL") || log.includes("Error")) line.style.color = '#ff5555';
147
+ if (log.includes("Success") || log.includes("complete")) line.style.color = '#55ff55';
148
+ trainingLog.appendChild(line);
149
+ });
150
+ // Auto scroll
151
+ trainingLog.scrollTop = trainingLog.scrollHeight;
152
+ }
153
+
154
+ // Continue polling if not complete (simple check: if logs stop or specific message?)
155
+ // Better: The backend just keeps logs coming. We'll poll until we see "Training complete"
156
+ const lastLog = data.logs.length > 0 ? data.logs[data.logs.length - 1] : "";
157
+
158
+ if (lastLog.includes("Training complete")) {
159
+ trainingLog.innerHTML += '<span class="log-line">>> Process finished. You may close this window.</span>';
160
+ return;
161
+ }
162
+
163
+ setTimeout(() => pollLogs(data.next_index), 500); // Poll every 500ms
164
+
165
+ } catch (e) {
166
+ console.error("Polling error", e);
167
+ setTimeout(() => pollLogs(startIndex), 2000); // Retry slower on error
168
+ }
169
+ }
170
+
171
+ // Touch support for mobile
172
+ micBtn.addEventListener('touchstart', (e) => { e.preventDefault(); startRecording(); });
173
+ micBtn.addEventListener('touchend', (e) => { e.preventDefault(); stopRecording(); });
174
+
175
+
176
+ function startRecording() {
177
+ statusText.innerText = "Recording...";
178
+ micBtn.classList.add('recording');
179
+ visualizer.classList.remove('hidden');
180
+ audioChunks = [];
181
+
182
+ navigator.mediaDevices.getUserMedia({ audio: true })
183
+ .then(stream => {
184
+ mediaRecorder = new MediaRecorder(stream);
185
+ mediaRecorder.start();
186
+
187
+ mediaRecorder.addEventListener("dataavailable", event => {
188
+ audioChunks.push(event.data);
189
+ });
190
+
191
+ mediaRecorder.addEventListener("stop", () => {
192
+ const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); // Default typically webm, assumes backend handles it or we send as file
193
+ // Usually comes as webm/ogg from browser. We'll verify mimetype.
194
+ uploadAudio(audioBlob, "recording.wav"); // Naming it .wav but content might be webm, backend pydub handles it.
195
+ });
196
+ })
197
+ .catch(err => {
198
+ console.error("Error accessing mic:", err);
199
+ statusText.innerText = "Error Accessing Mic";
200
+ });
201
+ }
202
+
203
+ function stopRecording() {
204
+ if (mediaRecorder && mediaRecorder.state !== 'inactive') {
205
+ mediaRecorder.stop();
206
+ statusText.innerText = "Processing...";
207
+ micBtn.classList.remove('recording');
208
+ visualizer.classList.add('hidden');
209
+ }
210
+ }
211
+
212
+ // --- File Upload Logic ---
213
+ fileInput.addEventListener('change', (e) => {
214
+ if (e.target.files.length > 0) {
215
+ handleFile(e.target.files[0]);
216
+ }
217
+ });
218
+
219
+ dropZone.addEventListener('dragover', (e) => {
220
+ e.preventDefault();
221
+ dropZone.classList.add('dragover');
222
+ });
223
+
224
+ dropZone.addEventListener('dragleave', () => dropZone.classList.remove('dragover'));
225
+
226
+ dropZone.addEventListener('drop', (e) => {
227
+ e.preventDefault();
228
+ dropZone.classList.remove('dragover');
229
+ if (e.dataTransfer.files.length > 0) {
230
+ handleFile(e.dataTransfer.files[0]);
231
+ }
232
+ });
233
+
234
+ function handleFile(file) {
235
+ statusText.innerText = `Uploading ${file.name}...`;
236
+ uploadAudio(file, file.name);
237
+ }
238
+
239
+ // --- Toast Notifications ---
240
+ function showToast(message, type = 'info') {
241
+ const container = document.getElementById('toast-container');
242
+ const toast = document.createElement('div');
243
+ toast.className = `toast ${type}`;
244
+
245
+ // Icon selection
246
+ let icon = 'fa-info-circle';
247
+ if (type === 'success') icon = 'fa-check-circle';
248
+ if (type === 'error') icon = 'fa-exclamation-circle';
249
+
250
+ toast.innerHTML = `
251
+ <i class="fa-solid ${icon}"></i>
252
+ <span>${message}</span>
253
+ `;
254
+
255
+ container.appendChild(toast);
256
+
257
+ // Auto remove
258
+ setTimeout(() => {
259
+ toast.classList.add('hide');
260
+ toast.addEventListener('animationend', () => toast.remove());
261
+ }, 3000);
262
+ }
263
+
264
+ // --- API Calls ---
265
+ async function uploadAudio(fileOrBlob, filename) {
266
+ const formData = new FormData();
267
+ formData.append("file", fileOrBlob, filename); // Append file
268
+
269
+ try {
270
+ const response = await fetch('/predict', {
271
+ method: 'POST',
272
+ body: formData
273
+ });
274
+
275
+ if (!response.ok) {
276
+ const errData = await response.json();
277
+ throw new Error(errData.detail || "Prediction failed");
278
+ }
279
+
280
+ const data = await response.json();
281
+ showResult(data);
282
+ statusText.innerText = "Click & Hold to Record";
283
+ showToast("Analysis Complete", "success");
284
+ } catch (error) {
285
+ console.error(error);
286
+ statusText.innerText = "Error: " + error.message;
287
+ showToast("Error: " + error.message, "error");
288
+ }
289
+ }
290
+
291
+ function showResult(data) {
292
+ currentTempFilename = data.temp_filename;
293
+ currentPrediction = data.prediction;
294
+
295
+ resultEmoji.innerText = emotionEmojis[data.prediction.toLowerCase()] || '❓';
296
+ resultLabel.innerText = data.prediction.charAt(0).toUpperCase() + data.prediction.slice(1);
297
+ resultConfidence.innerText = `Confidence: ${(data.confidence * 100).toFixed(1)}%`;
298
+
299
+ // Reset feedback UI
300
+ correctionArea.classList.add('hidden');
301
+ resultModal.classList.remove('hidden');
302
+
303
+ if (data.is_fallback) {
304
+ showToast("Model not trained. Please label this audio to build the dataset.", "info");
305
+ correctionArea.classList.remove('hidden');
306
+ resultLabel.innerText = "Label Required";
307
+ resultEmoji.innerText = "🏷️";
308
+ resultConfidence.innerText = "Help the AI learn!";
309
+ }
310
+
311
+ // --- NLP Analysis Display ---
312
+ let nlpDiv = document.getElementById('nlp-results');
313
+ if (!nlpDiv) {
314
+ nlpDiv = document.createElement('div');
315
+ nlpDiv.id = 'nlp-results';
316
+ nlpDiv.className = 'nlp-container';
317
+
318
+ // Insert before feedback section
319
+ const feedbackSection = resultModal.querySelector('.feedback-section');
320
+ resultModal.querySelector('.modal-content').insertBefore(nlpDiv, feedbackSection);
321
+ }
322
+
323
+ // Clear previous
324
+ nlpDiv.innerHTML = '';
325
+
326
+ if (data.nlp_analysis && data.nlp_analysis.transcription) {
327
+ const textEmotion = data.nlp_analysis.text_emotion;
328
+ const confidencePct = (textEmotion.score * 100).toFixed(1);
329
+
330
+ // Show Hybrid Breakdown
331
+ nlpDiv.innerHTML = `
332
+ <div class="divider">Hybrid Analysis</div>
333
+ <p class="transcription">"${data.nlp_analysis.transcription}"</p>
334
+
335
+ <div class="breakdown-grid" style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; margin-top: 10px; font-size: 0.9rem;">
336
+ <div class="breakdown-item">
337
+ <div style="color: #94a3b8;">Audio Tone</div>
338
+ <div class="highlight">${data.audio_emotion.label}</div>
339
+ <div class="confidence-small">${(data.audio_emotion.confidence * 100).toFixed(1)}%</div>
340
+ </div>
341
+ <div class="breakdown-item">
342
+ <div style="color: #94a3b8;">Text Context</div>
343
+ <div class="highlight">${textEmotion.label}</div>
344
+ <div class="confidence-small">${confidencePct}%</div>
345
+ </div>
346
+ </div>
347
+ <div style="margin-top: 10px; font-size: 0.8rem; color: #64748b;">
348
+ Result fused from acoustic and semantic models.
349
+ </div>
350
+ `;
351
+ } else {
352
+ nlpDiv.innerHTML = `
353
+ <div class="divider">Context Analysis</div>
354
+ <p style="color: #64748b; font-style: italic;">No speech detected or analysis unavailable.</p>
355
+ `;
356
+ }
357
+ }
358
+
359
+ // --- Modal & Feedback ---
360
+ closeModal.addEventListener('click', () => resultModal.classList.add('hidden'));
361
+ window.onclick = (event) => {
362
+ if (event.target == resultModal) resultModal.classList.add('hidden');
363
+ };
364
+
365
+ btnCorrect.addEventListener('click', () => {
366
+ submitFeedback(currentPrediction);
367
+ });
368
+
369
+ btnIncorrect.addEventListener('click', () => {
370
+ correctionArea.classList.remove('hidden');
371
+ });
372
+
373
+ submitCorrection.addEventListener('click', () => {
374
+ const selected = document.getElementById('emotionSelect').value;
375
+ if (selected) {
376
+ submitFeedback(selected);
377
+ }
378
+ });
379
+
380
+ async function submitFeedback(correctLabel) {
381
+ try {
382
+ const response = await fetch('/feedback', {
383
+ method: 'POST',
384
+ headers: { 'Content-Type': 'application/json' },
385
+ body: JSON.stringify({
386
+ filename: currentTempFilename,
387
+ original_emotion: currentPrediction,
388
+ correct_emotion: correctLabel
389
+ })
390
+ });
391
+
392
+ const res = await response.json();
393
+ if (res.status === 'success') {
394
+ showToast("Feedback saved successfully!", "success");
395
+ resultModal.classList.add('hidden');
396
+ }
397
+ } catch (e) {
398
+ showToast("Failed to save feedback.", "error");
399
+ }
400
+ }
frontend/style.css ADDED
@@ -0,0 +1,649 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ :root {
2
+ --primary: #6366f1;
3
+ --secondary: #a855f7;
4
+ --accent: #ec4899;
5
+ --bg-dark: #0f172a;
6
+ --text-light: #f8fafc;
7
+ --glass-bg: rgba(255, 255, 255, 0.05);
8
+ --glass-border: rgba(255, 255, 255, 0.1);
9
+ --glass-highlight: rgba(255, 255, 255, 0.15);
10
+ --toast-success: #22c55e;
11
+ --toast-error: #ef4444;
12
+ --toast-info: #3b82f6;
13
+ }
14
+
15
+ * {
16
+ margin: 0;
17
+ padding: 0;
18
+ box-sizing: border-box;
19
+ font-family: 'Outfit', sans-serif;
20
+ }
21
+
22
+ body {
23
+ background-color: var(--bg-dark);
24
+ color: var(--text-light);
25
+ height: 100vh;
26
+ display: flex;
27
+ justify-content: center;
28
+ align-items: center;
29
+ overflow: hidden;
30
+ position: relative;
31
+ }
32
+
33
+ /* Dynamic Background Blobs */
34
+ .background-blobs {
35
+ position: absolute;
36
+ top: 0;
37
+ left: 0;
38
+ width: 100%;
39
+ height: 100%;
40
+ z-index: -1;
41
+ filter: blur(80px);
42
+ }
43
+
44
+ .blob {
45
+ position: absolute;
46
+ border-radius: 50%;
47
+ opacity: 0.6;
48
+ animation: float 10s infinite ease-in-out;
49
+ }
50
+
51
+ .blob-1 {
52
+ width: 400px;
53
+ height: 400px;
54
+ background: var(--primary);
55
+ top: -50px;
56
+ left: -50px;
57
+ }
58
+
59
+ .blob-2 {
60
+ width: 300px;
61
+ height: 300px;
62
+ background: var(--secondary);
63
+ bottom: 50px;
64
+ right: -50px;
65
+ animation-delay: 2s;
66
+ }
67
+
68
+ .blob-3 {
69
+ width: 200px;
70
+ height: 200px;
71
+ background: var(--accent);
72
+ top: 40%;
73
+ left: 40%;
74
+ animation-delay: 4s;
75
+ }
76
+
77
+ @keyframes float {
78
+
79
+ 0%,
80
+ 100% {
81
+ transform: translate(0, 0);
82
+ }
83
+
84
+ 50% {
85
+ transform: translate(20px, -20px);
86
+ }
87
+ }
88
+
89
+ /* Glass Container */
90
+ .glass-container {
91
+ background: var(--glass-bg);
92
+ backdrop-filter: blur(16px);
93
+ -webkit-backdrop-filter: blur(16px);
94
+ border: 1px solid var(--glass-border);
95
+ border-radius: 24px;
96
+ padding: 3rem;
97
+ width: 90%;
98
+ max-width: 500px;
99
+ text-align: center;
100
+ box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37);
101
+ }
102
+
103
+ .glass-card {
104
+ background: var(--glass-bg);
105
+ backdrop-filter: blur(16px);
106
+ -webkit-backdrop-filter: blur(16px);
107
+ border: 1px solid var(--glass-border);
108
+ box-shadow: 0 8px 32px 0 rgba(0, 0, 0, 0.37);
109
+ }
110
+
111
+ header h1 {
112
+ font-size: 3rem;
113
+ margin-bottom: 0.5rem;
114
+ }
115
+
116
+ .highlight {
117
+ background: linear-gradient(45deg, var(--primary), var(--accent));
118
+ background-clip: text;
119
+ -webkit-background-clip: text;
120
+ -webkit-text-fill-color: transparent;
121
+ }
122
+
123
+ header p {
124
+ color: #cbd5e1;
125
+ margin-bottom: 2rem;
126
+ }
127
+
128
+ /* Mic Button */
129
+ .mic-button {
130
+ width: 100px;
131
+ height: 100px;
132
+ border-radius: 50%;
133
+ border: none;
134
+ background: linear-gradient(135deg, var(--primary), var(--secondary));
135
+ color: white;
136
+ font-size: 2.5rem;
137
+ cursor: pointer;
138
+ box-shadow: 0 0 20px rgba(99, 102, 241, 0.5);
139
+ transition: transform 0.2s, box-shadow 0.2s;
140
+ outline: none;
141
+ }
142
+
143
+ .mic-button:hover {
144
+ transform: scale(1.05);
145
+ box-shadow: 0 0 30px rgba(99, 102, 241, 0.7);
146
+ }
147
+
148
+ .mic-button:active,
149
+ .mic-button.recording {
150
+ transform: scale(0.95);
151
+ box-shadow: 0 0 10px rgba(99, 102, 241, 0.8);
152
+ background: var(--accent);
153
+ }
154
+
155
+ .mic-button.recording {
156
+ animation: pulse 1.5s infinite;
157
+ }
158
+
159
+ @keyframes pulse {
160
+ 0% {
161
+ box-shadow: 0 0 0 0 rgba(236, 72, 153, 0.7);
162
+ }
163
+
164
+ 70% {
165
+ box-shadow: 0 0 0 20px rgba(236, 72, 153, 0);
166
+ }
167
+
168
+ 100% {
169
+ box-shadow: 0 0 0 0 rgba(236, 72, 153, 0);
170
+ }
171
+ }
172
+
173
+ /* Visualizer */
174
+ .visualizer {
175
+ display: flex;
176
+ justify-content: center;
177
+ gap: 4px;
178
+ height: 30px;
179
+ align-items: center;
180
+ margin-top: 10px;
181
+ }
182
+
183
+ .visualizer.hidden {
184
+ visibility: hidden;
185
+ }
186
+
187
+ .bar {
188
+ width: 4px;
189
+ height: 10px;
190
+ background: white;
191
+ border-radius: 2px;
192
+ animation: equalise 0.5s infinite;
193
+ animation-play-state: paused;
194
+ }
195
+
196
+ .recording~.visualizer .bar {
197
+ animation-play-state: running;
198
+ }
199
+
200
+ @keyframes equalise {
201
+
202
+ 0%,
203
+ 100% {
204
+ height: 10px;
205
+ }
206
+
207
+ 50% {
208
+ height: 25px;
209
+ }
210
+ }
211
+
212
+ .bar:nth-child(1) {
213
+ animation-delay: 0.1s;
214
+ }
215
+
216
+ .bar:nth-child(2) {
217
+ animation-delay: 0.2s;
218
+ }
219
+
220
+ .bar:nth-child(3) {
221
+ animation-delay: 0.3s;
222
+ }
223
+
224
+ .bar:nth-child(4) {
225
+ animation-delay: 0.2s;
226
+ }
227
+
228
+ .bar:nth-child(5) {
229
+ animation-delay: 0.1s;
230
+ }
231
+
232
+ /* Upload Area */
233
+ .divider {
234
+ margin: 2rem 0;
235
+ position: relative;
236
+ color: #64748b;
237
+ }
238
+
239
+ .divider::before,
240
+ .divider::after {
241
+ content: '';
242
+ position: absolute;
243
+ top: 50%;
244
+ width: 40%;
245
+ height: 1px;
246
+ background: #475569;
247
+ }
248
+
249
+ .divider::before {
250
+ left: 0;
251
+ }
252
+
253
+ .divider::after {
254
+ right: 0;
255
+ }
256
+
257
+ .upload-area {
258
+ border: 2px dashed #475569;
259
+ border-radius: 12px;
260
+ padding: 1.5rem;
261
+ transition: all 0.3s;
262
+ cursor: pointer;
263
+ }
264
+
265
+ .upload-area:hover,
266
+ .upload-area.dragover {
267
+ border-color: var(--secondary);
268
+ background: rgba(168, 85, 247, 0.1);
269
+ }
270
+
271
+ .upload-area i {
272
+ font-size: 2rem;
273
+ color: #94a3b8;
274
+ margin-bottom: 0.5rem;
275
+ }
276
+
277
+ .btn-secondary {
278
+ margin-top: 1rem;
279
+ padding: 0.5rem 1.5rem;
280
+ border-radius: 8px;
281
+ border: 1px solid var(--primary);
282
+ background: transparent;
283
+ color: var(--primary);
284
+ cursor: pointer;
285
+ transition: 0.2s;
286
+ }
287
+
288
+ .btn-secondary:hover {
289
+ background: rgba(99, 102, 241, 0.2);
290
+ }
291
+
292
+ /* Modal */
293
+ .modal {
294
+ position: fixed;
295
+ top: 0;
296
+ left: 0;
297
+ width: 100%;
298
+ height: 100%;
299
+ background: rgba(0, 0, 0, 0.7);
300
+ display: flex;
301
+ justify-content: center;
302
+ align-items: center;
303
+ z-index: 100;
304
+ opacity: 1;
305
+ transition: opacity 0.3s;
306
+ }
307
+
308
+ .modal.hidden {
309
+ opacity: 0;
310
+ pointer-events: none;
311
+ }
312
+
313
+ .modal-content {
314
+ background: #1e293b;
315
+ padding: 2rem;
316
+ border-radius: 20px;
317
+ text-align: center;
318
+ width: 90%;
319
+ max-width: 400px;
320
+ position: relative;
321
+ border: 1px solid var(--glass-border);
322
+ }
323
+
324
+ .close-btn {
325
+ position: absolute;
326
+ top: 10px;
327
+ right: 15px;
328
+ background: none;
329
+ border: none;
330
+ color: white;
331
+ font-size: 1.5rem;
332
+ cursor: pointer;
333
+ }
334
+
335
+ .emoji-display {
336
+ font-size: 5rem;
337
+ margin-bottom: 1rem;
338
+ animation: pop 0.5s cubic-bezier(0.175, 0.885, 0.32, 1.275);
339
+ }
340
+
341
+ @keyframes pop {
342
+ 0% {
343
+ transform: scale(0);
344
+ }
345
+
346
+ 100% {
347
+ transform: scale(1);
348
+ }
349
+ }
350
+
351
+ .feedback-buttons {
352
+ display: flex;
353
+ justify-content: center;
354
+ gap: 1rem;
355
+ margin-top: 1rem;
356
+ }
357
+
358
+ .btn-feedback {
359
+ padding: 0.5rem 1rem;
360
+ border: none;
361
+ border-radius: 8px;
362
+ cursor: pointer;
363
+ font-weight: 600;
364
+ }
365
+
366
+ .correct {
367
+ background: #22c55e;
368
+ color: white;
369
+ }
370
+
371
+ .incorrect {
372
+ background: #ef4444;
373
+ color: white;
374
+ }
375
+
376
+ #correctionArea {
377
+ margin-top: 1rem;
378
+ display: flex;
379
+ flex-direction: column;
380
+ gap: 0.5rem;
381
+ }
382
+
383
+ #emotionSelect {
384
+ padding: 0.5rem;
385
+ border-radius: 6px;
386
+ border: 1px solid #475569;
387
+ background: #0f172a;
388
+ color: white;
389
+ }
390
+
391
+ /* Toast Notifications */
392
+ #toast-container {
393
+ position: fixed;
394
+ top: 20px;
395
+ right: 20px;
396
+ z-index: 1000;
397
+ display: flex;
398
+ flex-direction: column;
399
+ gap: 10px;
400
+ }
401
+
402
+ .toast {
403
+ background: rgba(15, 23, 42, 0.8);
404
+ backdrop-filter: blur(12px);
405
+ -webkit-backdrop-filter: blur(12px);
406
+ border: 1px solid var(--glass-border);
407
+ color: white;
408
+ padding: 12px 24px;
409
+ border-radius: 12px;
410
+ display: flex;
411
+ align-items: center;
412
+ gap: 12px;
413
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
414
+ animation: slideIn 0.3s ease-out forwards;
415
+ min-width: 300px;
416
+ overflow: hidden;
417
+ position: relative;
418
+ }
419
+
420
+ .toast::before {
421
+ content: '';
422
+ position: absolute;
423
+ left: 0;
424
+ top: 0;
425
+ bottom: 0;
426
+ width: 4px;
427
+ background: var(--toast-info);
428
+ }
429
+
430
+ .toast.success::before {
431
+ background: var(--toast-success);
432
+ }
433
+
434
+ .toast.error::before {
435
+ background: var(--toast-error);
436
+ }
437
+
438
+ .toast.info::before {
439
+ background: var(--toast-info);
440
+ }
441
+
442
+ .toast.hide {
443
+ animation: slideOut 0.3s ease-in forwards;
444
+ }
445
+
446
+ @keyframes slideIn {
447
+ from {
448
+ transform: translateX(100%);
449
+ opacity: 0;
450
+ }
451
+
452
+ to {
453
+ transform: translateX(0);
454
+ opacity: 1;
455
+ }
456
+ }
457
+
458
+ @keyframes slideOut {
459
+ from {
460
+ transform: translateX(0);
461
+ opacity: 1;
462
+ }
463
+
464
+ to {
465
+ transform: translateX(100%);
466
+ opacity: 0;
467
+ }
468
+ }
469
+
470
+ .btn-primary,
471
+ .btn-secondary,
472
+ .btn-feedback {
473
+ transition: all 0.2s ease;
474
+ font-weight: 500;
475
+ letter-spacing: 0.5px;
476
+ }
477
+
478
+ .btn-primary:active,
479
+ .btn-secondary:active {
480
+ transform: scale(0.98);
481
+ }
482
+
483
+ .modal-content {
484
+ background: rgba(15, 23, 42, 0.95);
485
+ box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.5);
486
+ border: 1px solid var(--glass-highlight);
487
+ }
488
+
489
+ /* Training Terminal Styles */
490
+ .terminal-card {
491
+ background: #0c0c0c;
492
+ border: 1px solid #333;
493
+ width: 90%;
494
+ max-width: 700px;
495
+ height: 400px;
496
+ padding: 0;
497
+ display: flex;
498
+ flex-direction: column;
499
+ overflow: hidden;
500
+ box-shadow: 0 0 40px rgba(0, 255, 0, 0.1);
501
+ }
502
+
503
+ .terminal-header {
504
+ background: #1a1a1a;
505
+ padding: 10px 15px;
506
+ display: flex;
507
+ justify-content: space-between;
508
+ align-items: center;
509
+ border-bottom: 1px solid #333;
510
+ }
511
+
512
+ .terminal-title {
513
+ color: #0f0;
514
+ font-family: 'Consolas', monospace;
515
+ font-size: 0.9rem;
516
+ display: flex;
517
+ align-items: center;
518
+ gap: 8px;
519
+ }
520
+
521
+ .terminal-body {
522
+ flex: 1;
523
+ padding: 15px;
524
+ background: #000;
525
+ overflow-y: auto;
526
+ text-align: left;
527
+ font-family: 'Consolas', monospace;
528
+ font-size: 0.9rem;
529
+ color: #e0e0e0;
530
+ }
531
+
532
+ .log-line {
533
+ display: block;
534
+ margin-bottom: 4px;
535
+ word-wrap: break-word;
536
+ }
537
+
538
+ .log-line::before {
539
+ content: '> ';
540
+ color: #0f0;
541
+ }
542
+
543
+ .terminal-body::-webkit-scrollbar {
544
+ width: 8px;
545
+ }
546
+
547
+ .terminal-body::-webkit-scrollbar-track {
548
+ background: #111;
549
+ }
550
+
551
+ .terminal-body::-webkit-scrollbar-thumb {
552
+ background: #333;
553
+ border-radius: 4px;
554
+ }
555
+
556
+ .terminal-body::-webkit-scrollbar-thumb:hover {
557
+ background: #444;
558
+ }
559
+
560
+ .terminal-header .close-btn {
561
+ position: static;
562
+ font-size: 1.2rem;
563
+ color: #666;
564
+ }
565
+
566
+ .terminal-header .close-btn:hover {
567
+ color: #fff;
568
+ }
569
+
570
+ .nlp-container {
571
+ margin-top: 1rem;
572
+ text-align: left;
573
+ background: rgba(0, 0, 0, 0.2);
574
+ padding: 1rem;
575
+ border-radius: 12px;
576
+ }
577
+
578
+ .transcription {
579
+ font-style: italic;
580
+ color: #e2e8f0;
581
+ margin-bottom: 0.5rem;
582
+ border-left: 3px solid var(--secondary);
583
+ padding-left: 10px;
584
+ }
585
+
586
+ .nlp-emotion {
587
+ font-weight: 500;
588
+ }
589
+
590
+ .confidence-small {
591
+ font-size: 0.8rem;
592
+ color: #94a3b8;
593
+ }
594
+
595
+ .password-input {
596
+ width: 100%;
597
+ padding: 10px;
598
+ border-radius: 10px;
599
+ border: 1px solid rgba(255, 255, 255, 0.2);
600
+ background: rgba(255, 255, 255, 0.05);
601
+ color: white;
602
+ font-size: 1rem;
603
+ outline: none;
604
+ transition: all 0.3s ease;
605
+ }
606
+
607
+ .password-input:focus {
608
+ border-color: var(--secondary);
609
+ background: rgba(255, 255, 255, 0.1);
610
+ }
611
+
612
+ /* --- NEW WATERMARK CODE (3D Glassy) --- */
613
+ .watermark {
614
+ position: fixed;
615
+ bottom: 20px;
616
+ right: 20px;
617
+ padding: 12px 24px;
618
+ background: rgba(255, 255, 255, 0.05);
619
+ backdrop-filter: blur(12px);
620
+ -webkit-backdrop-filter: blur(12px);
621
+ border: 1px solid rgba(255, 255, 255, 0.1);
622
+ border-top: 1px solid rgba(255, 255, 255, 0.3);
623
+ border-left: 1px solid rgba(255, 255, 255, 0.3);
624
+ border-radius: 24px;
625
+ box-shadow:
626
+ 10px 10px 20px rgba(0, 0, 0, 0.3),
627
+ -5px -5px 15px rgba(255, 255, 255, 0.02);
628
+ color: rgba(255, 255, 255, 0.9);
629
+ font-size: 0.9rem;
630
+ font-weight: 500;
631
+ pointer-events: auto;
632
+ /* Allow hover */
633
+ z-index: 1000;
634
+ font-family: 'Outfit', sans-serif;
635
+ transform: perspective(1000px) rotateX(5deg) rotateY(-5deg);
636
+ transition: all 0.4s ease;
637
+ text-shadow: 0 2px 4px rgba(0, 0, 0, 0.5);
638
+ letter-spacing: 0.5px;
639
+ }
640
+
641
+ .watermark:hover {
642
+ transform: perspective(1000px) rotateX(0deg) rotateY(0deg) scale(1.05);
643
+ background: rgba(255, 255, 255, 0.1);
644
+ color: #fff;
645
+ box-shadow:
646
+ 0 0 30px rgba(255, 255, 255, 0.1),
647
+ 0 10px 20px rgba(0, 0, 0, 0.4);
648
+ cursor: default;
649
+ }
frontend/style.css_append ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Terminal Toast */
2
+ .toast-terminal {
3
+ min-width: 400px;
4
+ flex-direction: column;
5
+ align-items: flex-start;
6
+ gap: 8px;
7
+ background: rgba(15, 23, 42, 0.95);
8
+ }
9
+
10
+ .terminal-window {
11
+ width: 100%;
12
+ height: 200px;
13
+ background: #000;
14
+ color: #0f0;
15
+ font-family: 'Courier New', Courier, monospace;
16
+ font-size: 0.8rem;
17
+ padding: 10px;
18
+ border-radius: 6px;
19
+ overflow-y: auto;
20
+ border: 1px solid #333;
21
+ white-space: pre-wrap;
22
+ box-shadow: inset 0 0 10px rgba(0,0,0,0.5);
23
+ }
24
+
25
+ .terminal-window::-webkit-scrollbar {
26
+ width: 8px;
27
+ }
28
+
29
+ .terminal-window::-webkit-scrollbar-thumb {
30
+ background: #333;
31
+ border-radius: 4px;
32
+ }
33
+
34
+ .terminal-header {
35
+ font-weight: bold;
36
+ color: #fff;
37
+ margin-bottom: 5px;
38
+ display: flex;
39
+ justify-content: space-between;
40
+ width: 100%;
41
+ }
frontend/test_prompt.html ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+
4
+ <body>
5
+ <button id="testBtn">Test Prompt</button>
6
+ <script>
7
+ document.getElementById('testBtn').addEventListener('click', () => {
8
+ const result = prompt("Test Prompt", "default");
9
+ alert("You entered: " + result);
10
+ });
11
+ </script>
12
+ </body>
13
+
14
+ </html>
model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:970a38d43836d6c44f6421ada2331151466d44a566a8dcc241f0a766ecfa4e77
3
+ size 36193352
run_app.ps1 ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $ErrorActionPreference = "Stop"
2
+
3
+ Write-Host "Starting VocalVibe Application..." -ForegroundColor Cyan
4
+
5
+ # Check for venv
6
+ if (-not (Test-Path "venv")) {
7
+ Write-Host "Virtual environment not found. Please run the setup first." -ForegroundColor Red
8
+ exit 1
9
+ }
10
+
11
+ # Activate venv and run uvicorn
12
+ Write-Host "Activating environment and starting server..." -ForegroundColor Green
13
+ Write-Host "App will differ at: http://localhost:8000" -ForegroundColor Yellow
14
+
15
+ $env:PYTHONPATH = "backend"
16
+ $env:Path = "$PSScriptRoot\venv\Scripts;$env:Path"
17
+ .\venv\Scripts\uvicorn backend.main:app --reload --host 0.0.0.0 --port 8000