from flask import Flask, request, jsonify, render_template import os import numpy as np import librosa import joblib import speech_recognition as sr from werkzeug.utils import secure_filename app = Flask(__name__) UPLOAD_FOLDER = 'uploads' ALLOWED_EXTENSIONS = {'wav', 'mp3', 'ogg'} app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max upload os.makedirs(UPLOAD_FOLDER, exist_ok=True) model = joblib.load("mlp_emotion_model.pkl") scaler = joblib.load("scaler.pkl") label_encoder = joblib.load("label_encoder.pkl") def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS def extract_features(file_path): """Extract audio features for emotion recognition""" try: y_data, sr = librosa.load(file_path, sr=None, mono=True) if len(y_data) == 0: return None features = np.hstack([ np.mean(librosa.feature.zero_crossing_rate(y_data).T, axis=0), np.mean(librosa.feature.chroma_stft(y=y_data, sr=sr).T, axis=0), np.mean(librosa.feature.mfcc(y=y_data, sr=sr, n_mfcc=40).T, axis=0), np.mean(librosa.feature.melspectrogram(y=y_data, sr=sr).T, axis=0) ]) return features except Exception as e: print(f"Error extracting features: {e}") return None def recognize_emotion(file_path): """Recognize emotion in audio file""" features = extract_features(file_path) if features is None: return {"error": "Failed to extract audio features"} scaled_features = scaler.transform(features.reshape(1, -1)) prediction = model.predict(scaled_features) emotion = label_encoder.inverse_transform(prediction)[0] probs = model.predict_proba(scaled_features)[0] emotion_probs = {emotion: float(prob) for emotion, prob in zip(label_encoder.classes_, probs)} emotion_map = { 'ANG': 'Angry', 'DIS': 'Disgusted', 'FEA': 'Fearful', 'HAP': 'Happy', 'NEU': 'Neutral', 'SAD': 'Sad' } full_emotion = emotion_map.get(emotion, emotion) return { "emotion": emotion, "emotion_name": full_emotion, "confidence": float(max(probs)), "probabilities": emotion_probs } def transcribe_audio(file_path): """Transcribe speech to text from audio file""" recognizer = sr.Recognizer() try: with sr.AudioFile(file_path) as source: audio_data = recognizer.record(source) text = recognizer.recognize_google(audio_data) return {"text": text} except sr.UnknownValueError: return {"text": "Speech unclear", "error": "Could not understand audio"} except sr.RequestError as e: return {"text": "", "error": f"Speech service error: {e}"} except Exception as e: return {"text": "", "error": f"Error transcribing audio: {e}"} @app.route('/') def index(): return render_template('index.html') @app.route('/analyze', methods=['POST']) def analyze_audio(): if 'file' not in request.files: return jsonify({"error": "No file uploaded"}), 400 file = request.files['file'] if file.filename == '': return jsonify({"error": "No file selected"}), 400 if not allowed_file(file.filename): return jsonify({"error": f"File type not allowed. Supported types: {', '.join(ALLOWED_EXTENSIONS)}"}), 400 try: filename = secure_filename(file.filename) file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(file_path) emotion_result = recognize_emotion(file_path) transcription_result = transcribe_audio(file_path) result = { "filename": filename, "emotion": emotion_result, "transcription": transcription_result } return jsonify(result) except Exception as e: return jsonify({"error": f"Error processing audio: {str(e)}"}), 500 finally: pass # if __name__ == '__main__': # app.run(debug=True)