Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import pandas as pd | |
| import pickle | |
| import tqdm | |
| import os | |
| from utils import get_label, extract_feature, get_first_letters | |
| from collections import defaultdict | |
| class AudioExtractor: | |
| """A class that is used to featurize audio clips, and provide | |
| them to the machine learning algorithms for training and testing""" | |
| def __init__(self, audio_config=None, verbose=1, features_folder_name="features", classification=True, | |
| emotions=['sad', 'neutral', 'happy'], balance=True): | |
| """ | |
| Params: | |
| audio_config (dict): the dictionary that indicates what features to extract from the audio file, | |
| default is {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False} | |
| (i.e mfcc, chroma and mel) | |
| verbose (bool/int): verbosity level, 0 for silence, 1 for info, default is 1 | |
| features_folder_name (str): the folder to store output features extracted, default is "features". | |
| classification (bool): whether it is a classification or regression, default is True (i.e classification) | |
| emotions (list): list of emotions to be extracted, default is ['sad', 'neutral', 'happy'] | |
| balance (bool): whether to balance dataset (both training and testing), default is True | |
| """ | |
| self.audio_config = audio_config if audio_config else {'mfcc': True, 'chroma': True, 'mel': True, 'contrast': False, 'tonnetz': False} | |
| self.verbose = verbose | |
| self.features_folder_name = features_folder_name | |
| self.classification = classification | |
| self.emotions = emotions | |
| self.balance = balance | |
| # input dimension | |
| self.input_dimension = None | |
| def _load_data(self, desc_files, partition, shuffle): | |
| self.load_metadata_from_desc_file(desc_files, partition) | |
| # balancing the datasets ( both training or testing ) | |
| if partition == "train" and self.balance: | |
| self.balance_training_data() | |
| elif partition == "test" and self.balance: | |
| self.balance_testing_data() | |
| else: | |
| if self.balance: | |
| raise TypeError("Invalid partition, must be either train/test") | |
| if shuffle: | |
| self.shuffle_data_by_partition(partition) | |
| def load_train_data(self, desc_files=["train_speech.csv"], shuffle=False): | |
| """Loads training data from the metadata files `desc_files`""" | |
| self._load_data(desc_files, "train", shuffle) | |
| def load_test_data(self, desc_files=["test_speech.csv"], shuffle=False): | |
| """Loads testing data from the metadata files `desc_files`""" | |
| self._load_data(desc_files, "test", shuffle) | |
| def shuffle_data_by_partition(self, partition): | |
| if partition == "train": | |
| self.train_audio_paths, self.train_emotions, self.train_features = shuffle_data(self.train_audio_paths, | |
| self.train_emotions, self.train_features) | |
| elif partition == "test": | |
| self.test_audio_paths, self.test_emotions, self.test_features = shuffle_data(self.test_audio_paths, | |
| self.test_emotions, self.test_features) | |
| else: | |
| raise TypeError("Invalid partition, must be either train/test") | |
| def load_metadata_from_desc_file(self, desc_files, partition): | |
| """Read metadata from a CSV file & Extract and loads features of audio files | |
| Params: | |
| desc_files (list): list of description files (csv files) to read from | |
| partition (str): whether is "train" or "test" | |
| """ | |
| # empty dataframe | |
| df = pd.DataFrame({'path': [], 'emotion': []}) | |
| for desc_file in desc_files: | |
| # concat dataframes | |
| df = pd.concat((df, pd.read_csv(desc_file)), sort=False) | |
| if self.verbose: | |
| print("[*] Loading audio file paths and its corresponding labels...") | |
| # get columns | |
| audio_paths, emotions = list(df['path']), list(df['emotion']) | |
| # if not classification, convert emotions to numbers | |
| if not self.classification: | |
| # so naive and need to be implemented | |
| # in a better way | |
| if len(self.emotions) == 3: | |
| self.categories = {'sad': 1, 'neutral': 2, 'happy': 3} | |
| elif len(self.emotions) == 5: | |
| self.categories = {'angry': 1, 'sad': 2, 'neutral': 3, 'ps': 4, 'happy': 5} | |
| else: | |
| raise TypeError("Regression is only for either ['sad', 'neutral', 'happy'] or ['angry', 'sad', 'neutral', 'ps', 'happy']") | |
| emotions = [ self.categories[e] for e in emotions ] | |
| # make features folder if does not exist | |
| if not os.path.isdir(self.features_folder_name): | |
| os.mkdir(self.features_folder_name) | |
| # get label for features | |
| label = get_label(self.audio_config) | |
| # construct features file name | |
| n_samples = len(audio_paths) | |
| first_letters = get_first_letters(self.emotions) | |
| name = os.path.join(self.features_folder_name, f"{partition}_{label}_{first_letters}_{n_samples}.npy") | |
| if os.path.isfile(name): | |
| # if file already exists, just load then | |
| if self.verbose: | |
| print("[+] Feature file already exists, loading...") | |
| features = np.load(name) | |
| else: | |
| # file does not exist, extract those features and dump them into the file | |
| features = [] | |
| append = features.append | |
| for audio_file in tqdm.tqdm(audio_paths, f"Extracting features for {partition}"): | |
| feature = extract_feature(audio_file, **self.audio_config) | |
| if self.input_dimension is None: | |
| self.input_dimension = feature.shape[0] | |
| append(feature) | |
| # convert to numpy array | |
| features = np.array(features) | |
| # save it | |
| np.save(name, features) | |
| if partition == "train": | |
| try: | |
| self.train_audio_paths | |
| except AttributeError: | |
| self.train_audio_paths = audio_paths | |
| self.train_emotions = emotions | |
| self.train_features = features | |
| else: | |
| if self.verbose: | |
| print("[*] Adding additional training samples") | |
| self.train_audio_paths += audio_paths | |
| self.train_emotions += emotions | |
| self.train_features = np.vstack((self.train_features, features)) | |
| elif partition == "test": | |
| try: | |
| self.test_audio_paths | |
| except AttributeError: | |
| self.test_audio_paths = audio_paths | |
| self.test_emotions = emotions | |
| self.test_features = features | |
| else: | |
| if self.verbose: | |
| print("[*] Adding additional testing samples") | |
| self.test_audio_paths += audio_paths | |
| self.test_emotions += emotions | |
| self.test_features = np.vstack((self.test_features, features)) | |
| else: | |
| raise TypeError("Invalid partition, must be either train/test") | |
| def _balance_data(self, partition): | |
| if partition == "train": | |
| emotions = self.train_emotions | |
| features = self.train_features | |
| audio_paths = self.train_audio_paths | |
| elif partition == "test": | |
| emotions = self.test_emotions | |
| features = self.test_features | |
| audio_paths = self.test_audio_paths | |
| else: | |
| raise TypeError("Invalid partition, must be either train/test") | |
| count = [] | |
| if self.classification: | |
| for emotion in self.emotions: | |
| count.append(len([ e for e in emotions if e == emotion])) | |
| else: | |
| # regression, take actual numbers, not label emotion | |
| for emotion in self.categories.values(): | |
| count.append(len([ e for e in emotions if e == emotion])) | |
| # get the minimum data samples to balance to | |
| minimum = min(count) | |
| if minimum == 0: | |
| # won't balance, otherwise 0 samples will be loaded | |
| print("[!] One class has 0 samples, setting balance to False") | |
| self.balance = False | |
| return | |
| if self.verbose: | |
| print("[*] Balancing the dataset to the minimum value:", minimum) | |
| d = defaultdict(list) | |
| if self.classification: | |
| counter = {e: 0 for e in self.emotions } | |
| else: | |
| counter = { e: 0 for e in self.categories.values() } | |
| for emotion, feature, audio_path in zip(emotions, features, audio_paths): | |
| if counter[emotion] >= minimum: | |
| # minimum value exceeded | |
| continue | |
| counter[emotion] += 1 | |
| d[emotion].append((feature, audio_path)) | |
| emotions, features, audio_paths = [], [], [] | |
| for emotion, features_audio_paths in d.items(): | |
| for feature, audio_path in features_audio_paths: | |
| emotions.append(emotion) | |
| features.append(feature) | |
| audio_paths.append(audio_path) | |
| if partition == "train": | |
| self.train_emotions = emotions | |
| self.train_features = features | |
| self.train_audio_paths = audio_paths | |
| elif partition == "test": | |
| self.test_emotions = emotions | |
| self.test_features = features | |
| self.test_audio_paths = audio_paths | |
| else: | |
| raise TypeError("Invalid partition, must be either train/test") | |
| def balance_training_data(self): | |
| self._balance_data("train") | |
| def balance_testing_data(self): | |
| self._balance_data("test") | |
| def shuffle_data(audio_paths, emotions, features): | |
| """ Shuffle the data (called after making a complete pass through | |
| training or validation data during the training process) | |
| Params: | |
| audio_paths (list): Paths to audio clips | |
| emotions (list): Emotions in each audio clip | |
| features (list): features audio clips | |
| """ | |
| p = np.random.permutation(len(audio_paths)) | |
| audio_paths = [audio_paths[i] for i in p] | |
| emotions = [emotions[i] for i in p] | |
| features = [features[i] for i in p] | |
| return audio_paths, emotions, features | |
| def load_data(train_desc_files, test_desc_files, audio_config=None, classification=True, shuffle=True, | |
| balance=True, emotions=['sad', 'neutral', 'happy']): | |
| # instantiate the class | |
| audiogen = AudioExtractor(audio_config=audio_config, classification=classification, emotions=emotions, | |
| balance=balance, verbose=0) | |
| # Loads training data | |
| audiogen.load_train_data(train_desc_files, shuffle=shuffle) | |
| # Loads testing data | |
| audiogen.load_test_data(test_desc_files, shuffle=shuffle) | |
| # X_train, X_test, y_train, y_test | |
| return { | |
| "X_train": np.array(audiogen.train_features), | |
| "X_test": np.array(audiogen.test_features), | |
| "y_train": np.array(audiogen.train_emotions), | |
| "y_test": np.array(audiogen.test_emotions), | |
| "train_audio_paths": audiogen.train_audio_paths, | |
| "test_audio_paths": audiogen.test_audio_paths, | |
| "balance": audiogen.balance, | |
| } |