| | import argparse |
| | import glob |
| | import multiprocessing |
| | import os |
| | import pathlib |
| |
|
| | import torch |
| | from tqdm import tqdm |
| |
|
| | from TTS.utils.vad import get_vad_model_and_utils, remove_silence |
| |
|
| | torch.set_num_threads(1) |
| |
|
| |
|
| | def adjust_path_and_remove_silence(audio_path): |
| | output_path = audio_path.replace(os.path.join(args.input_dir, ""), os.path.join(args.output_dir, "")) |
| | |
| | if os.path.exists(output_path) and not args.force: |
| | return output_path, False |
| |
|
| | |
| | pathlib.Path(output_path).parent.mkdir(parents=True, exist_ok=True) |
| | |
| | output_path, is_speech = remove_silence( |
| | model_and_utils, |
| | audio_path, |
| | output_path, |
| | trim_just_beginning_and_end=args.trim_just_beginning_and_end, |
| | use_cuda=args.use_cuda, |
| | ) |
| | return output_path, is_speech |
| |
|
| |
|
| | def preprocess_audios(): |
| | files = sorted(glob.glob(os.path.join(args.input_dir, args.glob), recursive=True)) |
| | print("> Number of files: ", len(files)) |
| | if not args.force: |
| | print("> Ignoring files that already exist in the output idrectory.") |
| |
|
| | if args.trim_just_beginning_and_end: |
| | print("> Trimming just the beginning and the end with nonspeech parts.") |
| | else: |
| | print("> Trimming all nonspeech parts.") |
| |
|
| | filtered_files = [] |
| | if files: |
| | |
| | |
| | |
| |
|
| | if args.num_processes > 1: |
| | with multiprocessing.Pool(processes=args.num_processes) as pool: |
| | results = list( |
| | tqdm( |
| | pool.imap_unordered(adjust_path_and_remove_silence, files), |
| | total=len(files), |
| | desc="Processing audio files", |
| | ) |
| | ) |
| | for output_path, is_speech in results: |
| | if not is_speech: |
| | filtered_files.append(output_path) |
| | else: |
| | for f in tqdm(files): |
| | output_path, is_speech = adjust_path_and_remove_silence(f) |
| | if not is_speech: |
| | filtered_files.append(output_path) |
| |
|
| | |
| | with open(os.path.join(args.output_dir, "filtered_files.txt"), "w", encoding="utf-8") as f: |
| | for file in filtered_files: |
| | f.write(str(file) + "\n") |
| | else: |
| | print("> No files Found !") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | parser = argparse.ArgumentParser( |
| | description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end True" |
| | ) |
| | parser.add_argument("-i", "--input_dir", type=str, help="Dataset root dir", required=True) |
| | parser.add_argument("-o", "--output_dir", type=str, help="Output Dataset dir", default="") |
| | parser.add_argument("-f", "--force", default=False, action="store_true", help="Force the replace of exists files") |
| | parser.add_argument( |
| | "-g", |
| | "--glob", |
| | type=str, |
| | default="**/*.wav", |
| | help="path in glob format for acess wavs from input_dir. ex: wav48/*/*.wav", |
| | ) |
| | parser.add_argument( |
| | "-t", |
| | "--trim_just_beginning_and_end", |
| | type=bool, |
| | default=True, |
| | help="If True this script will trim just the beginning and end nonspeech parts. If False all nonspeech parts will be trim. Default True", |
| | ) |
| | parser.add_argument( |
| | "-c", |
| | "--use_cuda", |
| | type=bool, |
| | default=False, |
| | help="If True use cuda", |
| | ) |
| | parser.add_argument( |
| | "--use_onnx", |
| | type=bool, |
| | default=False, |
| | help="If True use onnx", |
| | ) |
| | parser.add_argument( |
| | "--num_processes", |
| | type=int, |
| | default=1, |
| | help="Number of processes to use", |
| | ) |
| | args = parser.parse_args() |
| |
|
| | if args.output_dir == "": |
| | args.output_dir = args.input_dir |
| |
|
| | |
| | model_and_utils = get_vad_model_and_utils(use_cuda=args.use_cuda, use_onnx=args.use_onnx) |
| | preprocess_audios() |
| |
|