File size: 1,473 Bytes
79cf5f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import pathlib
import shutil

import click
import librosa
import numpy as np
import soundfile
import tqdm


@click.command(help='Reformat the WAV files to 16kHz, 16bit PCM mono format and copy labels')
@click.option('--src', required=True, help='Source segments directory')
@click.option('--dst', required=True, help='Target segments directory')
@click.option(
    '--normalize',
    is_flag=True, show_default=True, default=False,
    help='Normalize the audio (peak calculated over all segments)'
)
def reformat_wavs(src, dst, normalize):
    src = pathlib.Path(src).resolve()
    dst = pathlib.Path(dst).resolve()
    assert src != dst, 'src and dst should not be the same path'
    assert src.is_dir() and (not dst.exists() or dst.is_dir()), 'src and dst must be directories'
    dst.mkdir(parents=True, exist_ok=True)
    samplerate = 16000
    filelist = list(src.glob('*.wav'))
    max_y = 1.0
    if normalize:
        max_y = 0.0
        for file in tqdm.tqdm(filelist):
            y, _ = librosa.load(file, sr=samplerate, mono=True)
            max_y = max(max_y, np.max(np.abs(y)))
        max_y += 0.01
    for file in tqdm.tqdm(filelist):
        y, _ = librosa.load(file, sr=samplerate, mono=True)
        soundfile.write((dst / file.name), y / max_y, samplerate, subtype='PCM_16')
        annotation = file.with_suffix('.lab')
        shutil.copy(annotation, dst)
    print('Reformatting and copying done.')


if __name__ == '__main__':
    reformat_wavs()