File size: 393 Bytes
5347141
f99ee34
5347141
f99ee34
 
5347141
f99ee34
5347141
 
 
f99ee34
5347141
1
2
3
4
5
6
7
8
9
10
11
12
from transformers import AutoProcessor, VitsModel
import torch
import soundfile as sf

processor = AutoProcessor.from_pretrained("facebook/mms-tts-hmn")
model = VitsModel.from_pretrained("facebook/mms-tts-hmn")

inputs = processor(text="Kuv hlub koj", return_tensors="pt")
with torch.no_grad():
    speech = model(**inputs).waveform

sf.write("output.wav", speech.numpy()[0], samplerate=16000)