import torch import torchaudio from transformers import AutoProcessor, AutoModelForTextToWaveform import gradio as gr device = "cuda" if torch.cuda.is_available() else "cpu" model_id = "facebook/mms-tts-sah" processor = AutoProcessor.from_pretrained(model_id) model = AutoModelForTextToWaveform.from_pretrained(model_id).to(device) def yakut_tts(text): inputs = processor(text=text, return_tensors="pt").to(device) with torch.no_grad(): outputs = model(**inputs) waveform = outputs.waveform.squeeze().cpu() return (model.config.sampling_rate, waveform.numpy()) gr.Interface( fn=yakut_tts, inputs=gr.Textbox(label="Yakut Text", placeholder="Саха тыла"), outputs=gr.Audio(label="Generated Audio", type="numpy"), title="Yakut Text-to-Speech", description="Enter Yakut (Sakha) text and generate speech using facebook/mms-tts-sah model." ).launch()