| import os |
| import soundfile as sf |
| import random |
| from tqdm import tqdm |
| import glob |
|
|
|
|
| train_dir = "/data4/liandong/datasets/Noises/DNS_16k/DNS-Challenge-NOISE-16k" |
| train_scp_file = "/data4/liandong/datasets/Noises/DNS_16k/DNS_16k.scp" |
|
|
| train_file_list = glob.glob(f'{train_dir}/*.wav') |
|
|
| shuffle_flag = True |
| if not shuffle_flag: |
| train_file_list.sort() |
| else: |
| random.shuffle(train_file_list) |
|
|
| train_file_num = len(train_file_list) |
| print('The number of files: {}'.format(train_file_num)) |
| train_scp = open(train_scp_file, 'w') |
| for file_idx in tqdm(range(train_file_num)): |
| train_path = train_file_list[file_idx] |
| sig, _ = sf.read(train_path) |
| if (sig ** 2).sum() > 1e-5: |
| train_scp.write(train_path + '\n') |
| else: |
| print(f"The filename {os.path.split(train_path)[-1]} should be filtered.") |
| os.remove(train_path) |
|
|
| print('Write the scp...') |
| train_scp.close() |
|
|