Upload folder using huggingface_hub

79cf5f5 verified over 1 year ago

1.47 kB

	import pathlib
	import shutil

	import click
	import librosa
	import numpy as np
	import soundfile
	import tqdm


	@click.command(help='Reformat the WAV files to 16kHz, 16bit PCM mono format and copy labels')
	@click.option('--src', required=True, help='Source segments directory')
	@click.option('--dst', required=True, help='Target segments directory')
	@click.option(
	'--normalize',
	is_flag=True, show_default=True, default=False,
	help='Normalize the audio (peak calculated over all segments)'
	)
	def reformat_wavs(src, dst, normalize):
	src = pathlib.Path(src).resolve()
	dst = pathlib.Path(dst).resolve()
	assert src != dst, 'src and dst should not be the same path'
	assert src.is_dir() and (not dst.exists() or dst.is_dir()), 'src and dst must be directories'
	dst.mkdir(parents=True, exist_ok=True)
	samplerate = 16000
	filelist = list(src.glob('*.wav'))
	max_y = 1.0
	if normalize:
	max_y = 0.0
	for file in tqdm.tqdm(filelist):
	y, _ = librosa.load(file, sr=samplerate, mono=True)
	max_y = max(max_y, np.max(np.abs(y)))
	max_y += 0.01
	for file in tqdm.tqdm(filelist):
	y, _ = librosa.load(file, sr=samplerate, mono=True)
	soundfile.write((dst / file.name), y / max_y, samplerate, subtype='PCM_16')
	annotation = file.with_suffix('.lab')
	shutil.copy(annotation, dst)
	print('Reformatting and copying done.')


	if __name__ == '__main__':
	reformat_wavs()