File size: 4,907 Bytes
3d9d3c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import os
import torch
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from main import Config, HybridEmotionRecognitionModel, extract_advanced_features


class EmotionPredictor:
    def __init__(self, model_path="best_emotion_model.pth"):
        """

        Initialize the emotion predictor



        Args:

            model_path (str): Path to the saved model weights

        """
        # Prepare feature extraction specifics
        self.features = Config.FEATURES

        # Emotion mapping (same as in original script)
        self.emotion_map = {
            "01": "neutral",
            "02": "calm",
            "03": "happy",
            "04": "sad",
            "05": "angry",
            "06": "fearful",
            "07": "disgust",
            "08": "surprised",
        }

        # Load the model
        # First, prepare a dummy dataset to get the input dimension and number of classes
        dummy_features, dummy_labels = self._prepare_dummy_dataset()

        # Initialize the model
        self.model = HybridEmotionRecognitionModel(
            input_dim=len(dummy_features[0]), num_classes=len(np.unique(dummy_labels))
        )

        # Load the saved weights
        self.model.load_state_dict(torch.load(model_path))
        self.model.eval()  # Set to evaluation mode

        # Prepare label encoder
        self.label_encoder = LabelEncoder()
        self.label_encoder.fit(dummy_labels)

        # Prepare scaler
        self.scaler = StandardScaler()
        self.scaler.fit(dummy_features)

    def _prepare_dummy_dataset(self):
        """

        Prepare a dummy dataset similar to the original preparation method



        Returns:

            tuple: Features and labels

        """
        features = []
        labels = []

        # Walk through all directories and subdirectories
        for root, dirs, files in os.walk(Config.DATA_DIR):
            for filename in files:
                if filename.endswith(".wav"):
                    # Full file path
                    file_path = os.path.join(root, filename)

                    try:
                        # Extract emotion from filename
                        emotion_code = filename.split("-")[2]
                        emotion = self.emotion_map.get(emotion_code, "unknown")

                        # Extract features
                        file_features = extract_advanced_features(file_path)
                        features.append(file_features)
                        labels.append(emotion)

                    except Exception as e:
                        print(f"Error processing {filename}: {e}")

                    # Limit to a small number of files for efficiency
                    if len(features) >= 100:
                        break

                if len(features) >= 100:
                    break

            if len(features) >= 100:
                break

        return np.array(features), np.array(labels)

    def predict_emotion(self, audio_file_path):
        """

        Predict emotion for a given audio file



        Args:

            audio_file_path (str): Path to the audio file



        Returns:

            str: Predicted emotion

        """
        # Extract features
        try:
            features = extract_advanced_features(audio_file_path)
        except Exception as e:
            print(f"Error extracting features: {e}")
            return "Unknown"

        # Standardize features
        features = self.scaler.transform(features.reshape(1, -1))

        # Convert to tensor
        features_tensor = torch.FloatTensor(features)

        # Predict
        with torch.no_grad():
            outputs = self.model(features_tensor)
            _, predicted = torch.max(outputs, 1)
            predicted_label_index = predicted.numpy()[0]

        # Convert numeric label to emotion string
        return self.label_encoder.classes_[predicted_label_index]


def main():
    # Initialize predictor
    predictor = EmotionPredictor()

    # Example usage
    print("Emotion Prediction Script")
    print("------------------------")

    # Prompt user to input audio file path
    while True:
        audio_path = input("Enter the path to an audio file (or 'q' to quit): ").strip()

        if audio_path.lower() == "q":
            break

        if not os.path.exists(audio_path):
            print("File does not exist. Please check the path.")
            continue

        try:
            # Predict emotion
            emotion = predictor.predict_emotion(audio_path)
            print(f"Predicted Emotion: {emotion}")

        except Exception as e:
            print(f"Error predicting emotion: {e}")


if __name__ == "__main__":
    main()