import torch import gradio as gr from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor import soundfile as sf device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoModelForSpeechSeq2Seq.from_pretrained("facebook/mms-tts-chv", torch_dtype=torch.float32).to(device) processor = AutoProcessor.from_pretrained("facebook/mms-tts-chv") def tts_fn(text): inputs = processor(text=text, return_tensors="pt").to(device) with torch.no_grad(): speech = model.generate(**inputs) audio = processor.batch_decode(speech, return_tensors="np")[0] sf.write("output.wav", audio, 16000) return "output.wav" demo = gr.Interface(fn=tts_fn, inputs="text", outputs="audio", title="TTS Chuvash - Facebook MMS") demo.launch()