Spaces:
Runtime error
Runtime error
| import os | |
| import shlex | |
| import subprocess | |
| import progressbar | |
| from time import time | |
| from pathlib import Path | |
| def find_all_files(path_dir, extension): | |
| out = [] | |
| for root, dirs, filenames in os.walk(path_dir): | |
| for f in filenames: | |
| if f.endswith(extension): | |
| out.append(((str(Path(f).stem)), os.path.join(root, f))) | |
| return out | |
| def convert16k(inputfile, outputfile16k): | |
| command = ('sox -c 1 -b 16 {} -t wav {} rate 16k'.format(inputfile, outputfile16k)) | |
| subprocess.call(shlex.split(command)) | |
| if __name__ == "__main__": | |
| import argparse | |
| parser = argparse.ArgumentParser(description='Convert to wav 16k audio using sox.') | |
| parser.add_argument('input_dir', type=str, | |
| help='Path to the input dir.') | |
| parser.add_argument('output_dir', type=str, | |
| help='Path to the output dir.') | |
| parser.add_argument('--extension', type=str, default='wav', | |
| help='Audio file extension in the input. Default: mp3') | |
| args = parser.parse_args() | |
| # Find all sequences | |
| print(f"Finding all audio files with extension '{args.extension}' from {args.input_dir}...") | |
| audio_files = find_all_files(args.input_dir, args.extension) | |
| print(f"Done! Found {len(audio_files)} files.") | |
| # Convert to relative path | |
| audio_files = [os.path.relpath(file[-1], start=args.input_dir) for file in audio_files] | |
| # Create all the directories needed | |
| rel_dirs_set = set([os.path.dirname(file) for file in audio_files]) | |
| for rel_dir in rel_dirs_set: | |
| Path(os.path.join(args.output_dir, rel_dir)).mkdir(parents=True, exist_ok=True) | |
| # Converting wavs files | |
| print("Converting the audio to wav files...") | |
| bar = progressbar.ProgressBar(maxval=len(audio_files)) | |
| bar.start() | |
| start_time = time() | |
| for index, file in enumerate(audio_files): | |
| bar.update(index) | |
| input_file = os.path.join(args.input_dir, file) | |
| output_file = os.path.join(args.output_dir, os.path.splitext(file)[0]+".wav") | |
| convert16k(input_file, output_file) | |
| bar.finish() | |
| print(f"...done {len(audio_files)} files in {time()-start_time} seconds.") |