Spaces:

chaitanya9
/

emotion_recognizer

Runtime error

App Files Files Community

chaitanya9 commited on Nov 30, 2021

Commit

27bf1d6

1 Parent(s): 9a38ba2

Upload data_extractor.py

Browse files

Files changed (1) hide show

data_extractor.py +249 -0

data_extractor.py ADDED Viewed

	@@ -0,0 +1,249 @@

+import numpy as np
+import pandas as pd
+import pickle
+import tqdm
+import os
+from utils import get_label, extract_feature, get_first_letters
+from collections import defaultdict
+class AudioExtractor:
+    """A class that is used to featurize audio clips, and provide
+    them to the machine learning algorithms for training and testing"""
+    def __init__(self, audio_config=None, verbose=1, features_folder_name="features", classification=True,
+                    emotions=['sad', 'neutral', 'happy'], balance=True):
+        """
+        Params:
+            audio_config (dict): the dictionary that indicates what features to extract from the audio file,
+                default is {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False}
+                (i.e mfcc, chroma and mel)
+            verbose (bool/int): verbosity level, 0 for silence, 1 for info, default is 1
+            features_folder_name (str): the folder to store output features extracted, default is "features".
+            classification (bool): whether it is a classification or regression, default is True (i.e classification)
+            emotions (list): list of emotions to be extracted, default is ['sad', 'neutral', 'happy']
+            balance (bool): whether to balance dataset (both training and testing), default is True
+        """
+        self.audio_config = audio_config if audio_config else {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False}
+        self.verbose = verbose
+        self.features_folder_name = features_folder_name
+        self.classification = classification
+        self.emotions = emotions
+        self.balance = balance
+        # input dimension
+        self.input_dimension = None
+    def _load_data(self, desc_files, partition, shuffle):
+        self.load_metadata_from_desc_file(desc_files, partition)
+        # balancing the datasets ( both training or testing )
+        if partition == "train" and self.balance:
+            self.balance_training_data()
+        elif partition == "test" and self.balance:
+            self.balance_testing_data()
+        else:
+            if self.balance:
+                raise TypeError("Invalid partition, must be either train/test")
+        if shuffle:
+            self.shuffle_data_by_partition(partition)
+    def load_train_data(self, desc_files=["train_speech.csv"], shuffle=False):
+        """Loads training data from the metadata files `desc_files`"""
+        self._load_data(desc_files, "train", shuffle)
+    def load_test_data(self, desc_files=["test_speech.csv"], shuffle=False):
+        """Loads testing data from the metadata files `desc_files`"""
+        self._load_data(desc_files, "test", shuffle)
+    def shuffle_data_by_partition(self, partition):
+        if partition == "train":
+            self.train_audio_paths, self.train_emotions, self.train_features = shuffle_data(self.train_audio_paths,
+            self.train_emotions, self.train_features)
+        elif partition == "test":
+            self.test_audio_paths, self.test_emotions, self.test_features = shuffle_data(self.test_audio_paths,
+            self.test_emotions, self.test_features)
+        else:
+            raise TypeError("Invalid partition, must be either train/test")
+    def load_metadata_from_desc_file(self, desc_files, partition):
+        """Read metadata from a CSV file & Extract and loads features of audio files
+        Params:
+            desc_files (list): list of description files (csv files) to read from
+            partition (str): whether is "train" or "test"
+        """
+        # empty dataframe
+        df = pd.DataFrame({'path': [], 'emotion': []})
+        for desc_file in desc_files:
+            # concat dataframes
+            df = pd.concat((df, pd.read_csv(desc_file)), sort=False)
+        if self.verbose:
+            print("[*] Loading audio file paths and its corresponding labels...")
+        # get columns
+        audio_paths, emotions = list(df['path']), list(df['emotion'])
+        # if not classification, convert emotions to numbers
+        if not self.classification:
+            # so naive and need to be implemented
+            # in a better way
+            if len(self.emotions) == 3:
+                self.categories = {'sad': 1, 'neutral': 2, 'happy': 3}
+            elif len(self.emotions) == 5:
+                self.categories = {'angry': 1, 'sad': 2, 'neutral': 3, 'ps': 4, 'happy': 5}
+            else:
+                raise TypeError("Regression is only for either ['sad', 'neutral', 'happy'] or ['angry', 'sad', 'neutral', 'ps', 'happy']")
+            emotions = [ self.categories[e] for e in emotions ]
+        # make features folder if does not exist
+        if not os.path.isdir(self.features_folder_name):
+            os.mkdir(self.features_folder_name)
+        # get label for features
+        label = get_label(self.audio_config)
+        # construct features file name
+        n_samples = len(audio_paths)
+        first_letters = get_first_letters(self.emotions)
+        name = os.path.join(self.features_folder_name, f"{partition}_{label}_{first_letters}_{n_samples}.npy")
+        if os.path.isfile(name):
+            # if file already exists, just load then
+            if self.verbose:
+                print("[+] Feature file already exists, loading...")
+            features = np.load(name)
+        else:
+            # file does not exist, extract those features and dump them into the file
+            features = []
+            append = features.append
+            for audio_file in tqdm.tqdm(audio_paths, f"Extracting features for {partition}"):
+                feature = extract_feature(audio_file, **self.audio_config)
+                if self.input_dimension is None:
+                    self.input_dimension = feature.shape[0]
+                append(feature)
+            # convert to numpy array
+            features = np.array(features)
+            # save it
+            np.save(name, features)
+        if partition == "train":
+            try:
+                self.train_audio_paths
+            except AttributeError:
+                self.train_audio_paths = audio_paths
+                self.train_emotions = emotions
+                self.train_features = features
+            else:
+                if self.verbose:
+                    print("[*] Adding additional training samples")
+                self.train_audio_paths += audio_paths
+                self.train_emotions += emotions
+                self.train_features = np.vstack((self.train_features, features))
+        elif partition == "test":
+            try:
+                self.test_audio_paths
+            except AttributeError:
+                self.test_audio_paths = audio_paths
+                self.test_emotions = emotions
+                self.test_features = features
+            else:
+                if self.verbose:
+                    print("[*] Adding additional testing samples")
+                self.test_audio_paths += audio_paths
+                self.test_emotions += emotions
+                self.test_features = np.vstack((self.test_features, features))
+        else:
+            raise TypeError("Invalid partition, must be either train/test")
+    def _balance_data(self, partition):
+        if partition == "train":
+            emotions = self.train_emotions
+            features = self.train_features
+            audio_paths = self.train_audio_paths
+        elif partition == "test":
+            emotions = self.test_emotions
+            features = self.test_features
+            audio_paths = self.test_audio_paths
+        else:
+            raise TypeError("Invalid partition, must be either train/test")
+        count = []
+        if self.classification:
+            for emotion in self.emotions:
+                count.append(len([ e for e in emotions if e == emotion]))
+        else:
+            # regression, take actual numbers, not label emotion
+            for emotion in self.categories.values():
+                count.append(len([ e for e in emotions if e == emotion]))
+        # get the minimum data samples to balance to
+        minimum = min(count)
+        if minimum == 0:
+            # won't balance, otherwise 0 samples will be loaded
+            print("[!] One class has 0 samples, setting balance to False")
+            self.balance = False
+            return
+        if self.verbose:
+            print("[*] Balancing the dataset to the minimum value:", minimum)
+        d = defaultdict(list)
+        if self.classification:
+            counter = {e: 0 for e in self.emotions }
+        else:
+            counter = { e: 0 for e in self.categories.values() }
+        for emotion, feature, audio_path in zip(emotions, features, audio_paths):
+            if counter[emotion] >= minimum:
+                # minimum value exceeded
+                continue
+            counter[emotion] += 1
+            d[emotion].append((feature, audio_path))
+        emotions, features, audio_paths = [], [], []
+        for emotion, features_audio_paths in d.items():
+            for feature, audio_path in features_audio_paths:
+                emotions.append(emotion)
+                features.append(feature)
+                audio_paths.append(audio_path)
+        if partition == "train":
+            self.train_emotions = emotions
+            self.train_features = features
+            self.train_audio_paths = audio_paths
+        elif partition == "test":
+            self.test_emotions = emotions
+            self.test_features = features
+            self.test_audio_paths = audio_paths
+        else:
+            raise TypeError("Invalid partition, must be either train/test")
+    def balance_training_data(self):
+        self._balance_data("train")
+    def balance_testing_data(self):
+        self._balance_data("test")
+def shuffle_data(audio_paths, emotions, features):
+    """ Shuffle the data (called after making a complete pass through
+        training or validation data during the training process)
+    Params:
+        audio_paths (list): Paths to audio clips
+        emotions (list): Emotions in each audio clip
+        features (list): features audio clips
+    """
+    p = np.random.permutation(len(audio_paths))
+    audio_paths = [audio_paths[i] for i in p]
+    emotions = [emotions[i] for i in p]
+    features = [features[i] for i in p]
+    return audio_paths, emotions, features
+def load_data(train_desc_files, test_desc_files, audio_config=None, classification=True, shuffle=True,
+                balance=True, emotions=['sad', 'neutral', 'happy']):
+    # instantiate the class
+    audiogen = AudioExtractor(audio_config=audio_config, classification=classification, emotions=emotions,
+                                balance=balance, verbose=0)
+    # Loads training data
+    audiogen.load_train_data(train_desc_files, shuffle=shuffle)
+    # Loads testing data
+    audiogen.load_test_data(test_desc_files, shuffle=shuffle)
+    # X_train, X_test, y_train, y_test
+    return {
+        "X_train": np.array(audiogen.train_features),
+        "X_test": np.array(audiogen.test_features),
+        "y_train": np.array(audiogen.train_emotions),
+        "y_test": np.array(audiogen.test_emotions),
+        "train_audio_paths": audiogen.train_audio_paths,
+        "test_audio_paths": audiogen.test_audio_paths,
+        "balance": audiogen.balance,
+    }