Spaces:
Sleeping
Sleeping
File size: 4,152 Bytes
09000c8 70b54f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
from flask import Flask, request, jsonify, render_template
import os
import numpy as np
import librosa
import joblib
import speech_recognition as sr
from werkzeug.utils import secure_filename
app = Flask(__name__)
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'wav', 'mp3', 'ogg'}
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 # 16MB max upload
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
model = joblib.load("mlp_emotion_model.pkl")
scaler = joblib.load("scaler.pkl")
label_encoder = joblib.load("label_encoder.pkl")
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def extract_features(file_path):
"""Extract audio features for emotion recognition"""
try:
y_data, sr = librosa.load(file_path, sr=None, mono=True)
if len(y_data) == 0:
return None
features = np.hstack([
np.mean(librosa.feature.zero_crossing_rate(y_data).T, axis=0),
np.mean(librosa.feature.chroma_stft(y=y_data, sr=sr).T, axis=0),
np.mean(librosa.feature.mfcc(y=y_data, sr=sr, n_mfcc=40).T, axis=0),
np.mean(librosa.feature.melspectrogram(y=y_data, sr=sr).T, axis=0)
])
return features
except Exception as e:
print(f"Error extracting features: {e}")
return None
def recognize_emotion(file_path):
"""Recognize emotion in audio file"""
features = extract_features(file_path)
if features is None:
return {"error": "Failed to extract audio features"}
scaled_features = scaler.transform(features.reshape(1, -1))
prediction = model.predict(scaled_features)
emotion = label_encoder.inverse_transform(prediction)[0]
probs = model.predict_proba(scaled_features)[0]
emotion_probs = {emotion: float(prob) for emotion, prob in zip(label_encoder.classes_, probs)}
emotion_map = {
'ANG': 'Angry',
'DIS': 'Disgusted',
'FEA': 'Fearful',
'HAP': 'Happy',
'NEU': 'Neutral',
'SAD': 'Sad'
}
full_emotion = emotion_map.get(emotion, emotion)
return {
"emotion": emotion,
"emotion_name": full_emotion,
"confidence": float(max(probs)),
"probabilities": emotion_probs
}
def transcribe_audio(file_path):
"""Transcribe speech to text from audio file"""
recognizer = sr.Recognizer()
try:
with sr.AudioFile(file_path) as source:
audio_data = recognizer.record(source)
text = recognizer.recognize_google(audio_data)
return {"text": text}
except sr.UnknownValueError:
return {"text": "Speech unclear", "error": "Could not understand audio"}
except sr.RequestError as e:
return {"text": "", "error": f"Speech service error: {e}"}
except Exception as e:
return {"text": "", "error": f"Error transcribing audio: {e}"}
@app.route('/')
def index():
return render_template('index.html')
@app.route('/analyze', methods=['POST'])
def analyze_audio():
if 'file' not in request.files:
return jsonify({"error": "No file uploaded"}), 400
file = request.files['file']
if file.filename == '':
return jsonify({"error": "No file selected"}), 400
if not allowed_file(file.filename):
return jsonify({"error": f"File type not allowed. Supported types: {', '.join(ALLOWED_EXTENSIONS)}"}), 400
try:
filename = secure_filename(file.filename)
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(file_path)
emotion_result = recognize_emotion(file_path)
transcription_result = transcribe_audio(file_path)
result = {
"filename": filename,
"emotion": emotion_result,
"transcription": transcription_result
}
return jsonify(result)
except Exception as e:
return jsonify({"error": f"Error processing audio: {str(e)}"}), 500
finally:
pass
# if __name__ == '__main__':
# app.run(debug=True)
|