File size: 2,087 Bytes
3cd07d4
 
 
 
 
 
 
 
 
 
 
b5c4dba
3cd07d4
 
 
 
b5c4dba
 
 
3cd07d4
 
 
b5c4dba
3cd07d4
 
 
 
 
 
 
 
b5c4dba
 
 
3cd07d4
 
 
 
 
 
 
 
 
 
 
 
 
 
b5c4dba
3cd07d4
 
 
 
 
 
 
 
b5c4dba
3cd07d4
b5c4dba
3cd07d4
 
 
 
 
b5c4dba
3cd07d4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

import tensorflow as tf
import numpy as np
import json
import librosa
import os

def load_model(model_path):
    "Load the emotion recognition model."
    return tf.keras.models.load_model(model_path)

def predict_emotion(model, audio_path, preprocessor_config):
    "Predict emotion from an audio file."
    # Load audio file
    waveform, sr = librosa.load(
        audio_path, 
        sr=preprocessor_config["sample_rate"],
        duration=preprocessor_config["duration"],
        offset=preprocessor_config["offset"]
    )
    
    # Ensure consistent length
    target_length = int(preprocessor_config["sample_rate"] * preprocessor_config["duration"])
    if len(waveform) < target_length:
        waveform = np.pad(waveform, (0, target_length - len(waveform)))
    if len(waveform) > target_length:
        waveform = waveform[:target_length]
    
    # Extract features
    mel_spec = librosa.feature.melspectrogram(
        y=waveform,
        sr=preprocessor_config["sample_rate"],
        n_fft=preprocessor_config["frame_length"],
        hop_length=preprocessor_config["hop_length"],
        n_mels=128
    )
    
    # Convert to log scale
    log_mel = np.log(np.maximum(mel_spec, 1e-10))
    
    # Add batch and channel dimensions
    features = np.expand_dims(np.expand_dims(log_mel, axis=0), axis=-1)
    
    # Make prediction
    prediction = model.predict(features)[0]
    
    # Get emotion label
    emotion_idx = np.argmax(prediction)
    emotion = preprocessor_config["emotions"][str(emotion_idx)]
    
    return emotion, prediction[emotion_idx]

# Example usage
if __name__ == "__main__":
    # Load model
    model = load_model("emotion_recognition_model.keras")
    
    # Load preprocessor_config
    with open("preprocessing.json", "r") as f:
        preprocessor_config = json.load(f)
    
    # Path to your audio file
    audio_path = "path/to/your/audio.wav"
    
    # Predict emotion
    emotion, confidence = predict_emotion(model, audio_path, preprocessor_config)
    
    print(f"Predicted emotion: {emotion} with confidence {confidence:.2f}")