chronisai / prepare_reference.py
RiishabhSinghal
Add XTTS voice clone Gradio app
21409cb
import argparse
from pathlib import Path
import librosa
import soundfile as sf
def main():
parser = argparse.ArgumentParser(
description="Normalize/trim a reference file for better XTTS cloning quality."
)
parser.add_argument("--input", required=True, help="Input audio path.")
parser.add_argument(
"--output",
default="data/reference_clean.wav",
help="Output cleaned WAV path.",
)
parser.add_argument(
"--target_sr",
type=int,
default=24000,
help="Output sample rate.",
)
parser.add_argument(
"--top_db",
type=int,
default=30,
help="Silence trimming aggressiveness.",
)
args = parser.parse_args()
y, sr = librosa.load(args.input, sr=args.target_sr, mono=True)
y_trimmed, _ = librosa.effects.trim(y, top_db=args.top_db)
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
sf.write(str(output_path), y_trimmed, args.target_sr)
print(f"Saved cleaned reference: {output_path}")
if __name__ == "__main__":
main()