Instructions to use khmerttsopensource/khmer-tts with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use khmerttsopensource/khmer-tts with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-to-audio", model="khmerttsopensource/khmer-tts")# Load model directly from transformers import AutoTokenizer, AutoModelForPreTraining tokenizer = AutoTokenizer.from_pretrained("khmerttsopensource/khmer-tts") model = AutoModelForPreTraining.from_pretrained("khmerttsopensource/khmer-tts") - Notebooks
- Google Colab
- Kaggle
File size: 1,037 Bytes
c9380a9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | #!/usr/bin/env python3
import argparse
from pathlib import Path
import torch
from scipy.io.wavfile import write
from transformers import AutoTokenizer, VitsModel
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Generate Khmer speech with the local MMS TTS model.")
parser.add_argument("--model", default=str(Path(__file__).resolve().parents[1]))
parser.add_argument("--text", required=True)
parser.add_argument("--output", default="khmer_tts.wav")
return parser.parse_args()
def main() -> None:
args = parse_args()
tokenizer = AutoTokenizer.from_pretrained(args.model)
model = VitsModel.from_pretrained(args.model)
model.eval()
inputs = tokenizer(args.text, return_tensors="pt")
with torch.no_grad():
waveform = model(**inputs).waveform.squeeze().cpu().numpy()
write(args.output, rate=model.config.sampling_rate, data=waveform)
print(f"Wrote {args.output} at {model.config.sampling_rate} Hz")
if __name__ == "__main__":
main()
|