| import argparse | |
| from pathlib import Path | |
| import librosa | |
| import soundfile as sf | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Normalize/trim a reference file for better XTTS cloning quality." | |
| ) | |
| parser.add_argument("--input", required=True, help="Input audio path.") | |
| parser.add_argument( | |
| "--output", | |
| default="data/reference_clean.wav", | |
| help="Output cleaned WAV path.", | |
| ) | |
| parser.add_argument( | |
| "--target_sr", | |
| type=int, | |
| default=24000, | |
| help="Output sample rate.", | |
| ) | |
| parser.add_argument( | |
| "--top_db", | |
| type=int, | |
| default=30, | |
| help="Silence trimming aggressiveness.", | |
| ) | |
| args = parser.parse_args() | |
| y, sr = librosa.load(args.input, sr=args.target_sr, mono=True) | |
| y_trimmed, _ = librosa.effects.trim(y, top_db=args.top_db) | |
| output_path = Path(args.output) | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| sf.write(str(output_path), y_trimmed, args.target_sr) | |
| print(f"Saved cleaned reference: {output_path}") | |
| if __name__ == "__main__": | |
| main() | |