Instructions to use khmerttsopensource/khmer-tts with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use khmerttsopensource/khmer-tts with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-to-audio", model="khmerttsopensource/khmer-tts")# Load model directly from transformers import AutoTokenizer, AutoModelForPreTraining tokenizer = AutoTokenizer.from_pretrained("khmerttsopensource/khmer-tts") model = AutoModelForPreTraining.from_pretrained("khmerttsopensource/khmer-tts") - Notebooks
- Google Colab
- Kaggle
| #!/usr/bin/env python3 | |
| import argparse | |
| from pathlib import Path | |
| import torch | |
| from scipy.io.wavfile import write | |
| from transformers import AutoTokenizer, VitsModel | |
| def parse_args() -> argparse.Namespace: | |
| parser = argparse.ArgumentParser(description="Generate Khmer speech with the local MMS TTS model.") | |
| parser.add_argument("--model", default=str(Path(__file__).resolve().parents[1])) | |
| parser.add_argument("--text", required=True) | |
| parser.add_argument("--output", default="khmer_tts.wav") | |
| return parser.parse_args() | |
| def main() -> None: | |
| args = parse_args() | |
| tokenizer = AutoTokenizer.from_pretrained(args.model) | |
| model = VitsModel.from_pretrained(args.model) | |
| model.eval() | |
| inputs = tokenizer(args.text, return_tensors="pt") | |
| with torch.no_grad(): | |
| waveform = model(**inputs).waveform.squeeze().cpu().numpy() | |
| write(args.output, rate=model.config.sampling_rate, data=waveform) | |
| print(f"Wrote {args.output} at {model.config.sampling_rate} Hz") | |
| if __name__ == "__main__": | |
| main() | |