ollui's picture
Create app.py
7e230df verified
raw
history blame contribute delete
898 Bytes
import torch
from transformers import AutoTokenizer, VitsModel
import scipy.io.wavfile
import gradio as gr
model = VitsModel.from_pretrained("facebook/mms-tts-chv")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-chv")
def tts_chuvash(text):
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
speech = model(**inputs).waveform
audio_np = speech.squeeze().cpu().numpy()
sampling_rate = model.config.sampling_rate
return (sampling_rate, audio_np)
demo = gr.Interface(
fn=tts_chuvash,
inputs=gr.Textbox(label="Текст чăвашла чĕлхи (Chuvash Cyrillic)"),
outputs=gr.Audio(label="Аудио"),
title="Чăваш TTS (Facebook VITS)",
description="Модель TTS на чăваш чĕлхи с помощью MMS-TTS от Facebook (поддерживает только кириллицу)"
)
demo.launch()