import tensorflow as tf import numpy as np import json import librosa import os def load_model(model_path): "Load the emotion recognition model." return tf.keras.models.load_model(model_path) def predict_emotion(model, audio_path, preprocessor_config): "Predict emotion from an audio file." # Load audio file waveform, sr = librosa.load( audio_path, sr=preprocessor_config["sample_rate"], duration=preprocessor_config["duration"], offset=preprocessor_config["offset"] ) # Ensure consistent length target_length = int(preprocessor_config["sample_rate"] * preprocessor_config["duration"]) if len(waveform) < target_length: waveform = np.pad(waveform, (0, target_length - len(waveform))) if len(waveform) > target_length: waveform = waveform[:target_length] # Extract features mel_spec = librosa.feature.melspectrogram( y=waveform, sr=preprocessor_config["sample_rate"], n_fft=preprocessor_config["frame_length"], hop_length=preprocessor_config["hop_length"], n_mels=128 ) # Convert to log scale log_mel = np.log(np.maximum(mel_spec, 1e-10)) # Add batch and channel dimensions features = np.expand_dims(np.expand_dims(log_mel, axis=0), axis=-1) # Make prediction prediction = model.predict(features)[0] # Get emotion label emotion_idx = np.argmax(prediction) emotion = preprocessor_config["emotions"][str(emotion_idx)] return emotion, prediction[emotion_idx] # Example usage if __name__ == "__main__": # Load model model = load_model("emotion_recognition_model.keras") # Load preprocessor_config with open("preprocessing.json", "r") as f: preprocessor_config = json.load(f) # Path to your audio file audio_path = "path/to/your/audio.wav" # Predict emotion emotion, confidence = predict_emotion(model, audio_path, preprocessor_config) print(f"Predicted emotion: {emotion} with confidence {confidence:.2f}")