khmer-tts / examples /inference.py
khmerttsopensource's picture
Add Khmer TTS model release
c9380a9 verified
#!/usr/bin/env python3
import argparse
from pathlib import Path
import torch
from scipy.io.wavfile import write
from transformers import AutoTokenizer, VitsModel
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Generate Khmer speech with the local MMS TTS model.")
parser.add_argument("--model", default=str(Path(__file__).resolve().parents[1]))
parser.add_argument("--text", required=True)
parser.add_argument("--output", default="khmer_tts.wav")
return parser.parse_args()
def main() -> None:
args = parse_args()
tokenizer = AutoTokenizer.from_pretrained(args.model)
model = VitsModel.from_pretrained(args.model)
model.eval()
inputs = tokenizer(args.text, return_tensors="pt")
with torch.no_grad():
waveform = model(**inputs).waveform.squeeze().cpu().numpy()
write(args.output, rate=model.config.sampling_rate, data=waveform)
print(f"Wrote {args.output} at {model.config.sampling_rate} Hz")
if __name__ == "__main__":
main()