Spaces:
Runtime error
Runtime error
| import soundfile | |
| import librosa | |
| import numpy as np | |
| import pickle | |
| import os | |
| from convert_wavs import convert_audio | |
| AVAILABLE_EMOTIONS = { | |
| "neutral", | |
| "calm", | |
| "happy", | |
| "sad", | |
| "angry", | |
| "fear", | |
| "disgust", | |
| "ps", # pleasant surprised | |
| "boredom" | |
| } | |
| def get_label(audio_config): | |
| """Returns label corresponding to which features are to be extracted | |
| e.g: | |
| audio_config = {'mfcc': True, 'chroma': True, 'contrast': False, 'tonnetz': False, 'mel': False} | |
| get_label(audio_config): 'mfcc-chroma' | |
| """ | |
| features = ["mfcc", "chroma", "mel", "contrast", "tonnetz"] | |
| label = "" | |
| for feature in features: | |
| if audio_config[feature]: | |
| label += f"{feature}-" | |
| return label.rstrip("-") | |
| def get_dropout_str(dropout, n_layers=3): | |
| if isinstance(dropout, list): | |
| return "_".join([ str(d) for d in dropout]) | |
| elif isinstance(dropout, float): | |
| return "_".join([ str(dropout) for i in range(n_layers) ]) | |
| def get_first_letters(emotions): | |
| return "".join(sorted([ e[0].upper() for e in emotions ])) | |
| def extract_feature(file_name, **kwargs): | |
| """ | |
| Extract feature from audio file `file_name` | |
| Features supported: | |
| - MFCC (mfcc) | |
| - Chroma (chroma) | |
| - MEL Spectrogram Frequency (mel) | |
| - Contrast (contrast) | |
| - Tonnetz (tonnetz) | |
| e.g: | |
| `features = extract_feature(path, mel=True, mfcc=True)` | |
| """ | |
| mfcc = kwargs.get("mfcc") | |
| chroma = kwargs.get("chroma") | |
| mel = kwargs.get("mel") | |
| contrast = kwargs.get("contrast") | |
| tonnetz = kwargs.get("tonnetz") | |
| # try: | |
| # with soundfile.SoundFile(file_name) as sound_file: | |
| # pass | |
| # except RuntimeError: | |
| # # not properly formated, convert to 16000 sample rate & mono channel using ffmpeg | |
| # # get the basename | |
| # basename = os.path.basename(file_name) | |
| # dirname = os.path.dirname(file_name) | |
| # name, ext = os.path.splitext(basename) | |
| # new_basename = f"{name}_c.wav" | |
| # new_filename = os.path.join(dirname, new_basename) | |
| # v = convert_audio(file_name, new_filename) | |
| # if v: | |
| # raise NotImplementedError("Converting the audio files failed, make sure `ffmpeg` is installed in your machine and added to PATH.") | |
| # else: | |
| # new_filename = file_name | |
| # with soundfile.SoundFile(new_filename) as sound_file: | |
| X = file_name[1].astype("float32") | |
| #X = sound_file.read(dtype="float32") | |
| sample_rate = file_name[0] #sound_file.samplerate | |
| #sample_rate = sound_file.samplerate | |
| if chroma or contrast: | |
| stft = np.abs(librosa.stft(X)) | |
| result = np.array([]) | |
| if mfcc: | |
| mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0) | |
| result = np.hstack((result, mfccs)) | |
| if chroma: | |
| chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0) | |
| result = np.hstack((result, chroma)) | |
| if mel: | |
| mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0) | |
| result = np.hstack((result, mel)) | |
| if contrast: | |
| contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0) | |
| result = np.hstack((result, contrast)) | |
| if tonnetz: | |
| tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0) | |
| result = np.hstack((result, tonnetz)) | |
| return result | |
| def get_best_estimators(classification): | |
| """ | |
| Loads the estimators that are pickled in `grid` folder | |
| Note that if you want to use different or more estimators, | |
| you can fine tune the parameters in `grid_search.py` script | |
| and run it again ( may take hours ) | |
| """ | |
| if classification: | |
| return pickle.load(open("grid/best_classifiers.pickle", "rb")) | |
| else: | |
| return pickle.load(open("grid/best_regressors.pickle", "rb")) | |
| def get_audio_config(features_list): | |
| """ | |
| Converts a list of features into a dictionary understandable by | |
| `data_extractor.AudioExtractor` class | |
| """ | |
| audio_config = {'mfcc': False, 'chroma': False, 'mel': False, 'contrast': False, 'tonnetz': False} | |
| for feature in features_list: | |
| if feature not in audio_config: | |
| raise TypeError(f"Feature passed: {feature} is not recognized.") | |
| audio_config[feature] = True | |
| return audio_config | |