import gradio as gr from transformers import AutoProcessor, VitsModel import torch import scipy.io.wavfile import tempfile model = VitsModel.from_pretrained("facebook/mms-tts-fon") processor = AutoProcessor.from_pretrained("facebook/mms-tts-fon") sampling_rate = model.config.sampling_rate def tts_fon(text): inputs = processor(text, return_tensors="pt") with torch.no_grad(): audio = model(**inputs).waveform[0].numpy() with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: scipy.io.wavfile.write(f.name, rate=sampling_rate, data=audio) return f.name title = "🔊 Speak Fongbe with Meta’s Multilingual TTS Model" description = """ [Fongbe] Gbè elɔ xlɛ́ kpɔ́ndéwú nǔwlanwlán tɔn e è sɔ́ ɖ’ayǐ é ɖé nú Fongbe, gbè taji e è nɔ dó ɖò Benin kpo Togo sín akpáxwé ɖé lɛ kpo mɛ é. 🔍 **Lee è nɔ zán gbɔn é** Wlan xógbe klewun ɖé dó **Fon** mɛ bo zín “Submit” bo na dó sè gbè e è xò kplé é. Modèle ɔ nɔ ɖè xóɖiɖɔ tɔ́n ɖò hwenu e é jɛ é, bo nɔ zán vocoder gbè gègě tɔn e è ko kplɔ́n ɖ’ayǐ é ɖé. 🌍 **Gbè ɔ wú** Fon ɔ, Niger-Congo gbè wɛ bɔ gbɛtɔ́ livi 2 jɛji wɛ nɔ dó. È nɔ zán ɖò wemaxɔmɛ, xójlajla sín nǔ lɛ kpo xóɖɔɖókpɔ́ ayihɔngbe ayihɔngbe tɔn lɛ kpo mɛ, amɔ̌, è kpó ɖò xóɖiɖɔ sín nǔnywɛ xwitixwiti sín nǔ lɛ zán wɛ ganji ǎ. Demo enɛ ɔ nɔ d’alɔ bɔ è nɔ sú dò enɛ gbɔn TTS ɖiɖó bɔ é nɔ bɔkun nú gbè Aflika tɔn e mɛ nǔɖokan lɛ ma sukpɔ́ ɖè ǎ lɛ é gblamɛ. [English] This Space showcases a cutting-edge text-to-speech model for Fongbe, a major language spoken in Benin and parts of Togo. 🔍 **How to Use** Type a short sentence in **Fon** and press “Submit” to hear the synthesized voice. The model outputs audio in real-time using a pretrained multilingual vocoder. 🌍 **About the Language** Fon is a Niger-Congo language with over 2 million speakers. It is used in education, media, and daily communication, yet remains underrepresented in speech technology. This demo helps close that gap by making TTS more accessible for low-resource African languages. """ # Gradio interface gradiofontts = gr.Interface( fn=tts_fon, inputs=gr.Textbox( label="Enter your text in Fongbe here | Wlan xo mitɔn do Fɔngbé mɛ ɖo fi.", placeholder="Mi do gbe nu mi, un nɔ nyi Bonaventure Dossou. Un nyí Benin-nu goyitɔ́ ɖé. Mi ma ɖi xɛsi bo kplɔn Fon kpodo kpɔndewu elɔ kpo.", lines=3, max_lines=50, ), outputs=gr.Audio( label="VITS Fon Synthetized Speech | VITS Fɔngbé Xóɖiɖɔ Kplékplé.", type="filepath" ), title=title, description=description, theme="default" ) gradiofontts.launch()