File size: 4,907 Bytes

3d9d3c4

import os
import torch
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from main import Config, HybridEmotionRecognitionModel, extract_advanced_features


class EmotionPredictor:
    def __init__(self, model_path="best_emotion_model.pth"):
        """

        Initialize the emotion predictor



        Args:

            model_path (str): Path to the saved model weights

        """
        # Prepare feature extraction specifics
        self.features = Config.FEATURES

        # Emotion mapping (same as in original script)
        self.emotion_map = {
            "01": "neutral",
            "02": "calm",
            "03": "happy",
            "04": "sad",
            "05": "angry",
            "06": "fearful",
            "07": "disgust",
            "08": "surprised",
        }

        # Load the model
        # First, prepare a dummy dataset to get the input dimension and number of classes
        dummy_features, dummy_labels = self._prepare_dummy_dataset()

        # Initialize the model
        self.model = HybridEmotionRecognitionModel(
            input_dim=len(dummy_features[0]), num_classes=len(np.unique(dummy_labels))
        )

        # Load the saved weights
        self.model.load_state_dict(torch.load(model_path))
        self.model.eval()  # Set to evaluation mode

        # Prepare label encoder
        self.label_encoder = LabelEncoder()
        self.label_encoder.fit(dummy_labels)

        # Prepare scaler
        self.scaler = StandardScaler()
        self.scaler.fit(dummy_features)

    def _prepare_dummy_dataset(self):
        """

        Prepare a dummy dataset similar to the original preparation method



        Returns:

            tuple: Features and labels

        """
        features = []
        labels = []

        # Walk through all directories and subdirectories
        for root, dirs, files in os.walk(Config.DATA_DIR):
            for filename in files:
                if filename.endswith(".wav"):
                    # Full file path
                    file_path = os.path.join(root, filename)

                    try:
                        # Extract emotion from filename
                        emotion_code = filename.split("-")[2]
                        emotion = self.emotion_map.get(emotion_code, "unknown")

                        # Extract features
                        file_features = extract_advanced_features(file_path)
                        features.append(file_features)
                        labels.append(emotion)

                    except Exception as e:
                        print(f"Error processing {filename}: {e}")

                    # Limit to a small number of files for efficiency
                    if len(features) >= 100:
                        break

                if len(features) >= 100:
                    break

            if len(features) >= 100:
                break

        return np.array(features), np.array(labels)

    def predict_emotion(self, audio_file_path):
        """

        Predict emotion for a given audio file



        Args:

            audio_file_path (str): Path to the audio file



        Returns:

            str: Predicted emotion

        """
        # Extract features
        try:
            features = extract_advanced_features(audio_file_path)
        except Exception as e:
            print(f"Error extracting features: {e}")
            return "Unknown"

        # Standardize features
        features = self.scaler.transform(features.reshape(1, -1))

        # Convert to tensor
        features_tensor = torch.FloatTensor(features)

        # Predict
        with torch.no_grad():
            outputs = self.model(features_tensor)
            _, predicted = torch.max(outputs, 1)
            predicted_label_index = predicted.numpy()[0]

        # Convert numeric label to emotion string
        return self.label_encoder.classes_[predicted_label_index]


def main():
    # Initialize predictor
    predictor = EmotionPredictor()

    # Example usage
    print("Emotion Prediction Script")
    print("------------------------")

    # Prompt user to input audio file path
    while True:
        audio_path = input("Enter the path to an audio file (or 'q' to quit): ").strip()

        if audio_path.lower() == "q":
            break

        if not os.path.exists(audio_path):
            print("File does not exist. Please check the path.")
            continue

        try:
            # Predict emotion
            emotion = predictor.predict_emotion(audio_path)
            print(f"Predicted Emotion: {emotion}")

        except Exception as e:
            print(f"Error predicting emotion: {e}")


if __name__ == "__main__":
    main()