File size: 581 Bytes
ea60ee0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
from transformers import VitsModel, AutoTokenizer
import torch
import scipy.io.wavfile as wavfile
model = VitsModel.from_pretrained("spanish-tts")
tokenizer = AutoTokenizer.from_pretrained("spanish-tts")
text= "Estoy llamando desde la oficina de impuestos."
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
output = model(**inputs).waveform # Output is a tensor of shape [batch_size, waveform_length]
output_audio = output.squeeze(0).numpy() # Remove batch dimension
wavfile.write("spanish.wav", rate=model.config.sampling_rate, data=output_audio)
|