| import os | |
| CTC_SCALE = 2 | |
| lrs2_dirpath = '/media/milselarch/47FC4BC577667AAD/LRS2' | |
| valid_lrs2_filepath = f'../data/LRS2-CTC{CTC_SCALE}-valid-pairs.txt' | |
| filenames = ['train.txt', 'test.txt', 'val.txt'] | |
| valid_lrs2_pairs = set([ | |
| line.strip() for line in open(valid_lrs2_filepath).readlines() | |
| ]) | |
| for filename in filenames: | |
| filepath = os.path.join(lrs2_dirpath, filename) | |
| lines = open(filepath, 'r').readlines() | |
| valid_lines = [] | |
| for line in lines: | |
| line = line + ' ' | |
| line = line[:line.index(' ')].strip() | |
| if line in valid_lrs2_pairs: | |
| valid_lines.append(line) | |
| valid_lines = sorted(valid_lines) | |
| export_filename = f'../data/LRS2_CTC{CTC_SCALE}_{filename}' | |
| open(export_filename, 'w').write('\n'.join(valid_lines)) | |
| print(f'<<< {filename} >>>') | |
| print(f'VALID: {len(valid_lines)}') | |
| print(f'TOTAL: {len(lines)}') | |