| import librosa | |
| import soundfile as sf | |
| import os | |
| from pathlib import Path | |
| input_dir = "/u01/colombo/hungnt/hieuld/tts/data_2" | |
| output_dir = "/u01/colombo/hungnt/hieuld/tts/data_2_24k" | |
| target_sr = 24000 | |
| os.makedirs(output_dir, exist_ok=True) | |
| wav_files = list(Path(input_dir).glob("*.wav")) | |
| print(f"Found {len(wav_files)} wav files") | |
| for wav_path in wav_files: | |
| print(f"Processing {wav_path.name}...") | |
| # Load audio | |
| audio, sr = librosa.load(wav_path, sr=None) | |
| # Resample to 24kHz | |
| if sr != target_sr: | |
| audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr) | |
| # Save | |
| output_path = Path(output_dir) / wav_path.name | |
| sf.write(output_path, audio, target_sr) | |
| # Copy .lab file if exists | |
| lab_path = wav_path.with_suffix('.lab') | |
| if lab_path.exists(): | |
| import shutil | |
| shutil.copy(lab_path, Path(output_dir) / lab_path.name) | |
| print(f"✓ Done! Audio saved to {output_dir}") | |
| print("Now update your config to use this directory") | |