datasetsANDmodels's picture
Upload usage.py
6742ac7 verified
from transformers import VitsModel, AutoTokenizer
import torch
import scipy.io.wavfile as wavfile
import numpy as np
model = VitsModel.from_pretrained("male-vietnamese-tts")
tokenizer = AutoTokenizer.from_pretrained("male-vietnamese-tts")
text = "Xin chào, đây là một cuộc chạy thử nghiệm."
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
output = model(**inputs).waveform
wav = output.squeeze().cpu().numpy()
rate = model.config.sampling_rate
output_file = "vietnamese.wav"
wavfile.write(output_file, rate, wav)
print(f"Audio saved to {output_file}")