| |
|
| | import tensorflow as tf |
| | import numpy as np |
| | import json |
| | import librosa |
| | import os |
| |
|
| | def load_model(model_path): |
| | "Load the emotion recognition model." |
| | return tf.keras.models.load_model(model_path) |
| |
|
| | def predict_emotion(model, audio_path, preprocessor_config): |
| | "Predict emotion from an audio file." |
| | |
| | waveform, sr = librosa.load( |
| | audio_path, |
| | sr=preprocessor_config["sample_rate"], |
| | duration=preprocessor_config["duration"], |
| | offset=preprocessor_config["offset"] |
| | ) |
| | |
| | |
| | target_length = int(preprocessor_config["sample_rate"] * preprocessor_config["duration"]) |
| | if len(waveform) < target_length: |
| | waveform = np.pad(waveform, (0, target_length - len(waveform))) |
| | if len(waveform) > target_length: |
| | waveform = waveform[:target_length] |
| | |
| | |
| | mel_spec = librosa.feature.melspectrogram( |
| | y=waveform, |
| | sr=preprocessor_config["sample_rate"], |
| | n_fft=preprocessor_config["frame_length"], |
| | hop_length=preprocessor_config["hop_length"], |
| | n_mels=128 |
| | ) |
| | |
| | |
| | log_mel = np.log(np.maximum(mel_spec, 1e-10)) |
| | |
| | |
| | features = np.expand_dims(np.expand_dims(log_mel, axis=0), axis=-1) |
| | |
| | |
| | prediction = model.predict(features)[0] |
| | |
| | |
| | emotion_idx = np.argmax(prediction) |
| | emotion = preprocessor_config["emotions"][str(emotion_idx)] |
| | |
| | return emotion, prediction[emotion_idx] |
| |
|
| | |
| | if __name__ == "__main__": |
| | |
| | model = load_model("emotion_recognition_model.keras") |
| | |
| | |
| | with open("preprocessing.json", "r") as f: |
| | preprocessor_config = json.load(f) |
| | |
| | |
| | audio_path = "path/to/your/audio.wav" |
| | |
| | |
| | emotion, confidence = predict_emotion(model, audio_path, preprocessor_config) |
| | |
| | print(f"Predicted emotion: {emotion} with confidence {confidence:.2f}") |
| |
|