import torch from transformers import AutoTokenizer, VitsModel import scipy.io.wavfile import gradio as gr model = VitsModel.from_pretrained("facebook/mms-tts-chv") tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-chv") def tts_chuvash(text): inputs = tokenizer(text, return_tensors="pt") with torch.no_grad(): speech = model(**inputs).waveform audio_np = speech.squeeze().cpu().numpy() sampling_rate = model.config.sampling_rate return (sampling_rate, audio_np) demo = gr.Interface( fn=tts_chuvash, inputs=gr.Textbox(label="Текст чăвашла чĕлхи (Chuvash Cyrillic)"), outputs=gr.Audio(label="Аудио"), title="Чăваш TTS (Facebook VITS)", description="Модель TTS на чăваш чĕлхи с помощью MMS-TTS от Facebook (поддерживает только кириллицу)" ) demo.launch()