Audio_CNN1D / example_usage.py
vkushwahaa's picture
Upload folder using huggingface_hub
b5c4dba verified
import tensorflow as tf
import numpy as np
import json
import librosa
import os
def load_model(model_path):
"Load the emotion recognition model."
return tf.keras.models.load_model(model_path)
def predict_emotion(model, audio_path, preprocessor_config):
"Predict emotion from an audio file."
# Load audio file
waveform, sr = librosa.load(
audio_path,
sr=preprocessor_config["sample_rate"],
duration=preprocessor_config["duration"],
offset=preprocessor_config["offset"]
)
# Ensure consistent length
target_length = int(preprocessor_config["sample_rate"] * preprocessor_config["duration"])
if len(waveform) < target_length:
waveform = np.pad(waveform, (0, target_length - len(waveform)))
if len(waveform) > target_length:
waveform = waveform[:target_length]
# Extract features
mel_spec = librosa.feature.melspectrogram(
y=waveform,
sr=preprocessor_config["sample_rate"],
n_fft=preprocessor_config["frame_length"],
hop_length=preprocessor_config["hop_length"],
n_mels=128
)
# Convert to log scale
log_mel = np.log(np.maximum(mel_spec, 1e-10))
# Add batch and channel dimensions
features = np.expand_dims(np.expand_dims(log_mel, axis=0), axis=-1)
# Make prediction
prediction = model.predict(features)[0]
# Get emotion label
emotion_idx = np.argmax(prediction)
emotion = preprocessor_config["emotions"][str(emotion_idx)]
return emotion, prediction[emotion_idx]
# Example usage
if __name__ == "__main__":
# Load model
model = load_model("emotion_recognition_model.keras")
# Load preprocessor_config
with open("preprocessing.json", "r") as f:
preprocessor_config = json.load(f)
# Path to your audio file
audio_path = "path/to/your/audio.wav"
# Predict emotion
emotion, confidence = predict_emotion(model, audio_path, preprocessor_config)
print(f"Predicted emotion: {emotion} with confidence {confidence:.2f}")