| import os |
| import glob |
| import random |
|
|
| import jsonlines |
|
|
| librispeech_dir = "./dataset/LibriSpeech/*" |
| enroll_path = "./prompt_enroll_audio/all" |
|
|
| if not os.path.exists(enroll_path): |
| os.makedirs(enroll_path) |
|
|
| speaker_dirs = glob.glob(f"{librispeech_dir}/*") |
| new_files = [] |
| for speaker_dir in speaker_dirs: |
| if not "train-" in speaker_dir and not "test-" in speaker_dir and not "dev-" in speaker_dir: |
| continue |
| speaker_id = os.path.basename(speaker_dir) |
| enroll_speaker_dir = os.path.join(enroll_path, speaker_id) |
| if not os.path.exists(enroll_speaker_dir): |
| os.makedirs(enroll_speaker_dir, exist_ok=True) |
| |
| if len(glob.glob(f"{enroll_speaker_dir}/*.wav")) == 0: |
| |
| flac_files = glob.glob(f"{speaker_dir}/*/*.flac") |
| try: |
| flac_files = random.sample(flac_files, 5) |
| except: |
| print() |
| print(flac_files) |
| print(speaker_dir) |
| for flac_file in flac_files: |
| new_flac_file = os.path.join(enroll_speaker_dir, os.path.basename(flac_file)) |
| |
| os.system(f"cp {flac_file} {new_flac_file}") |
| |
| new_files.append(flac_file) |
| else: |
| print(glob.glob(f"{enroll_speaker_dir}/*.wav")) |
| |
| |
| enroll_wav_files = glob.glob(f"{enroll_speaker_dir}/*") |
| for i, enroll_wav_file in enumerate(enroll_wav_files): |
| wav_file = os.path.join(enroll_speaker_dir, speaker_id + f"_{i}.wav") |
| os.system(f"ffmpeg -i {enroll_wav_file} -t 3 {wav_file}") |
| os.system(f"rm -rf {enroll_wav_file}") |
|
|
| |
| with open(f"{enroll_path}/enrolled_wavs.txt", "w") as f: |
| f.write("\n".join(new_files)) |