File size: 2,087 Bytes

3cd07d4
 
 
 
 
 
 
 
 
 
 
b5c4dba
3cd07d4
 
 
 
b5c4dba
 
 
3cd07d4
 
 
b5c4dba
3cd07d4
 
 
 
 
 
 
 
b5c4dba
 
 
3cd07d4
 
 
 
 
 
 
 
 
 
 
 
 
 
b5c4dba
3cd07d4
 
 
 
 
 
 
 
b5c4dba
3cd07d4
b5c4dba
3cd07d4
 
 
 
 
b5c4dba
3cd07d4


import tensorflow as tf
import numpy as np
import json
import librosa
import os

def load_model(model_path):
    "Load the emotion recognition model."
    return tf.keras.models.load_model(model_path)

def predict_emotion(model, audio_path, preprocessor_config):
    "Predict emotion from an audio file."
    # Load audio file
    waveform, sr = librosa.load(
        audio_path, 
        sr=preprocessor_config["sample_rate"],
        duration=preprocessor_config["duration"],
        offset=preprocessor_config["offset"]
    )
    
    # Ensure consistent length
    target_length = int(preprocessor_config["sample_rate"] * preprocessor_config["duration"])
    if len(waveform) < target_length:
        waveform = np.pad(waveform, (0, target_length - len(waveform)))
    if len(waveform) > target_length:
        waveform = waveform[:target_length]
    
    # Extract features
    mel_spec = librosa.feature.melspectrogram(
        y=waveform,
        sr=preprocessor_config["sample_rate"],
        n_fft=preprocessor_config["frame_length"],
        hop_length=preprocessor_config["hop_length"],
        n_mels=128
    )
    
    # Convert to log scale
    log_mel = np.log(np.maximum(mel_spec, 1e-10))
    
    # Add batch and channel dimensions
    features = np.expand_dims(np.expand_dims(log_mel, axis=0), axis=-1)
    
    # Make prediction
    prediction = model.predict(features)[0]
    
    # Get emotion label
    emotion_idx = np.argmax(prediction)
    emotion = preprocessor_config["emotions"][str(emotion_idx)]
    
    return emotion, prediction[emotion_idx]

# Example usage
if __name__ == "__main__":
    # Load model
    model = load_model("emotion_recognition_model.keras")
    
    # Load preprocessor_config
    with open("preprocessing.json", "r") as f:
        preprocessor_config = json.load(f)
    
    # Path to your audio file
    audio_path = "path/to/your/audio.wav"
    
    # Predict emotion
    emotion, confidence = predict_emotion(model, audio_path, preprocessor_config)
    
    print(f"Predicted emotion: {emotion} with confidence {confidence:.2f}")