|
|
import pathlib |
|
|
import shutil |
|
|
|
|
|
import click |
|
|
import librosa |
|
|
import numpy as np |
|
|
import soundfile |
|
|
import tqdm |
|
|
|
|
|
|
|
|
@click.command(help='Reformat the WAV files to 16kHz, 16bit PCM mono format and copy labels') |
|
|
@click.option('--src', required=True, help='Source segments directory') |
|
|
@click.option('--dst', required=True, help='Target segments directory') |
|
|
@click.option( |
|
|
'--normalize', |
|
|
is_flag=True, show_default=True, default=False, |
|
|
help='Normalize the audio (peak calculated over all segments)' |
|
|
) |
|
|
def reformat_wavs(src, dst, normalize): |
|
|
src = pathlib.Path(src).resolve() |
|
|
dst = pathlib.Path(dst).resolve() |
|
|
assert src != dst, 'src and dst should not be the same path' |
|
|
assert src.is_dir() and (not dst.exists() or dst.is_dir()), 'src and dst must be directories' |
|
|
dst.mkdir(parents=True, exist_ok=True) |
|
|
samplerate = 16000 |
|
|
filelist = list(src.glob('*.wav')) |
|
|
max_y = 1.0 |
|
|
if normalize: |
|
|
max_y = 0.0 |
|
|
for file in tqdm.tqdm(filelist): |
|
|
y, _ = librosa.load(file, sr=samplerate, mono=True) |
|
|
max_y = max(max_y, np.max(np.abs(y))) |
|
|
max_y += 0.01 |
|
|
for file in tqdm.tqdm(filelist): |
|
|
y, _ = librosa.load(file, sr=samplerate, mono=True) |
|
|
soundfile.write((dst / file.name), y / max_y, samplerate, subtype='PCM_16') |
|
|
annotation = file.with_suffix('.lab') |
|
|
shutil.copy(annotation, dst) |
|
|
print('Reformatting and copying done.') |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
reformat_wavs() |
|
|
|