VITSmodel / app.py
ollui's picture
Create app.py
a5ae415 verified
raw
history blame contribute delete
636 Bytes
from transformers import AutoProcessor, VitsModel
import torch
import gradio as gr
import scipy.io.wavfile
model_id = "facebook/mms-tts-bod"
processor = AutoProcessor.from_pretrained(model_id)
model = VitsModel.from_pretrained(model_id)
model.eval()
def tts_fn(text):
inputs = processor(text=text, return_tensors="pt")
with torch.no_grad():
output = model(**inputs)
audio = output.waveform.squeeze().numpy()
sample_rate = model.config.sampling_rate
return (sample_rate, audio)
demo = gr.Interface(fn=tts_fn, inputs=gr.Textbox(label="Nhập văn bản tiếng Tây Tạng"), outputs="audio")
demo.launch()