| | """ |
| | This script extracts filter banks from audio files. Audio files are split |
| | into frames of 25 ms and 64 F banks are extracted from each frame. |
| | 64 such frames are grouped together to create a sample which is a |
| | 64 x 64 matrix. Each matrix is saved as a .npy file into the output folder. |
| | Samples from different speakers are in different folders and can be easily read |
| | by torchvision's DatasetFolder. |
| | """ |
| |
|
| | import os |
| | import re |
| | from io import StringIO |
| | from pathlib import Path |
| |
|
| | import numpy as np |
| | import pandas as pd |
| | import librosa |
| | import python_speech_features as psf |
| |
|
| | BASE_PATH = 'LibriSpeech' |
| | OUTPUT_PATH = 'fbanks' |
| | np.random.seed(42) |
| |
|
| |
|
| | def read_metadata(): |
| | with open(BASE_PATH + '/SPEAKERS.TXT', 'r') as meta: |
| | data = meta.readlines() |
| |
|
| | data = data[11:] |
| | data = ''.join(data) |
| | data = data[1:] |
| | data = re.sub(' +|', '', data) |
| | data = StringIO(data) |
| |
|
| | speakers = pd.read_csv(data, sep='|', error_bad_lines=False) |
| |
|
| | |
| | |
| | speakers_filtered = speakers[(speakers['SUBSET'] == 'train-clean-100')] |
| | speakers_filtered = speakers_filtered.copy() |
| | speakers_filtered['LABEL'] = speakers_filtered['ID'].astype('category').cat.codes |
| | speakers_filtered = speakers_filtered.reset_index(drop=True) |
| | return speakers_filtered |
| |
|
| |
|
| | def get_fbanks(audio_file): |
| |
|
| | def normalize_frames(signal, epsilon=1e-12): |
| | return np.array([(v - np.mean(v)) / max(np.std(v), epsilon) for v in signal]) |
| |
|
| | y, sr = librosa.load(audio_file, sr=None) |
| | assert sr == 16000 |
| |
|
| | trim_len = int(0.25 * sr) |
| | if y.shape[0] < 1 * sr: |
| | |
| | return None |
| |
|
| | y = y[trim_len:-trim_len] |
| |
|
| | |
| | filter_banks, energies = psf.fbank(y, samplerate=sr, nfilt=64, winlen=0.025, winstep=0.01) |
| | filter_banks = normalize_frames(signal=filter_banks) |
| |
|
| | filter_banks = filter_banks.reshape((filter_banks.shape[0], 64, 1)) |
| | return filter_banks |
| |
|
| |
|
| | def assert_out_dir_exists(index): |
| | dir_ = OUTPUT_PATH + '/' + str(index) |
| |
|
| | if not os.path.exists(dir_): |
| | os.makedirs(dir_) |
| | print('crated dir {}'.format(dir_)) |
| | else: |
| | print('dir {} already exists'.format(dir_)) |
| |
|
| | return dir_ |
| |
|
| |
|
| | def main(): |
| | speakers = read_metadata() |
| |
|
| | print('read metadata from file, number of rows in in are: {}'.format(speakers.shape)) |
| | print('numer of unique labels in the dataset is: {}'.format(speakers['LABEL'].unique().shape)) |
| | print('max label in the dataset is: {}'.format(speakers['LABEL'].max())) |
| | print('number of unique index: {}, max index: {}'.format(speakers.index.shape, max(speakers.index))) |
| |
|
| | for index, row in speakers.iterrows(): |
| | subset = row['SUBSET'] |
| | id_ = row['ID'] |
| | dir_ = BASE_PATH + '/' + subset + '/' + str(id_) + '/' |
| |
|
| | print('working for id: {}, index: {}, at path: {}'.format(id_, index, dir_)) |
| |
|
| | files_iter = Path(dir_).glob('**/*.flac') |
| | files_ = [str(f) for f in files_iter] |
| |
|
| | index_target_dir = assert_out_dir_exists(index) |
| |
|
| | sample_counter = 0 |
| |
|
| | for f in files_: |
| | fbanks = get_fbanks(f) |
| | num_frames = fbanks.shape[0] |
| |
|
| | |
| | file_sample_counter = 0 |
| | start = 0 |
| | while start < num_frames + 64: |
| | slice_ = fbanks[start:start + 64] |
| | if slice_ is not None and slice_.shape[0] == 64: |
| | assert slice_.shape[0] == 64 |
| | assert slice_.shape[1] == 64 |
| | assert slice_.shape[2] == 1 |
| | np.save(index_target_dir + '/' + str(sample_counter) + '.npy', slice_) |
| |
|
| | file_sample_counter += 1 |
| | sample_counter += 1 |
| |
|
| | start = start + 64 |
| |
|
| | print('done for index: {}, Samples from this file: {}'.format(index, file_sample_counter)) |
| |
|
| | print('done for id: {}, index: {}, total number of samples for this id: {}'.format(id_, index, sample_counter)) |
| | print('') |
| |
|
| | print('All done, YAY!, look at the files') |
| |
|
| |
|
| | if __name__ == '__main__': |
| | main() |