import os import soundfile as sf import random from tqdm import tqdm import glob train_dir = "/data4/liandong/datasets/Noises/DNS_16k/DNS-Challenge-NOISE-16k" train_scp_file = "/data4/liandong/datasets/Noises/DNS_16k/DNS_16k.scp" train_file_list = glob.glob(f'{train_dir}/*.wav') shuffle_flag = True if not shuffle_flag: train_file_list.sort() else: random.shuffle(train_file_list) train_file_num = len(train_file_list) print('The number of files: {}'.format(train_file_num)) train_scp = open(train_scp_file, 'w') for file_idx in tqdm(range(train_file_num)): train_path = train_file_list[file_idx] sig, _ = sf.read(train_path) if (sig ** 2).sum() > 1e-5: train_scp.write(train_path + '\n') else: print(f"The filename {os.path.split(train_path)[-1]} should be filtered.") os.remove(train_path) print('Write the scp...') train_scp.close()