Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoProcessor, VitsModel | |
| import torch | |
| import scipy.io.wavfile | |
| import tempfile | |
| model = VitsModel.from_pretrained("facebook/mms-tts-fon") | |
| processor = AutoProcessor.from_pretrained("facebook/mms-tts-fon") | |
| sampling_rate = model.config.sampling_rate | |
| def tts_fon(text): | |
| inputs = processor(text, return_tensors="pt") | |
| with torch.no_grad(): | |
| audio = model(**inputs).waveform[0].numpy() | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
| scipy.io.wavfile.write(f.name, rate=sampling_rate, data=audio) | |
| return f.name | |
| title = "🔊 Speak Fongbe with Meta’s Multilingual TTS Model" | |
| description = """ | |
| [Fongbe] | |
| Gbè elɔ xlɛ́ kpɔ́ndéwú nǔwlanwlán tɔn e è sɔ́ ɖ’ayǐ é ɖé nú Fongbe, gbè taji e è nɔ dó ɖò Benin kpo Togo sín akpáxwé ɖé lɛ kpo mɛ é. | |
| 🔍 **Lee è nɔ zán gbɔn é** | |
| Wlan xógbe klewun ɖé dó **Fon** mɛ bo zín “Submit” bo na dó sè gbè e è xò kplé é. Modèle ɔ nɔ ɖè xóɖiɖɔ tɔ́n ɖò hwenu e é jɛ é, bo nɔ zán vocoder gbè gègě tɔn e è ko kplɔ́n ɖ’ayǐ é ɖé. | |
| 🌍 **Gbè ɔ wú** | |
| Fon ɔ, Niger-Congo gbè wɛ bɔ gbɛtɔ́ livi 2 jɛji wɛ nɔ dó. È nɔ zán ɖò wemaxɔmɛ, xójlajla sín nǔ lɛ kpo xóɖɔɖókpɔ́ ayihɔngbe ayihɔngbe tɔn lɛ kpo mɛ, amɔ̌, è kpó ɖò xóɖiɖɔ sín nǔnywɛ xwitixwiti sín nǔ lɛ zán wɛ ganji ǎ. Demo enɛ ɔ nɔ d’alɔ bɔ è nɔ sú dò enɛ gbɔn TTS ɖiɖó bɔ é nɔ bɔkun nú gbè Aflika tɔn e mɛ nǔɖokan lɛ ma sukpɔ́ ɖè ǎ lɛ é gblamɛ. | |
| [English] | |
| This Space showcases a cutting-edge text-to-speech model for Fongbe, a major language spoken in Benin and parts of Togo. | |
| 🔍 **How to Use** | |
| Type a short sentence in **Fon** and press “Submit” to hear the synthesized voice. The model outputs audio in real-time using a pretrained multilingual vocoder. | |
| 🌍 **About the Language** | |
| Fon is a Niger-Congo language with over 2 million speakers. It is used in education, media, and daily communication, yet remains underrepresented in speech technology. This demo helps close that gap by making TTS more accessible for low-resource African languages. | |
| """ | |
| # Gradio interface | |
| gradiofontts = gr.Interface( | |
| fn=tts_fon, | |
| inputs=gr.Textbox( | |
| label="Enter your text in Fongbe here | Wlan xo mitɔn do Fɔngbé mɛ ɖo fi.", | |
| placeholder="Mi do gbe nu mi, un nɔ nyi Bonaventure Dossou. Un nyí Benin-nu goyitɔ́ ɖé. Mi ma ɖi xɛsi bo kplɔn Fon kpodo kpɔndewu elɔ kpo.", | |
| lines=3, | |
| max_lines=50, | |
| ), | |
| outputs=gr.Audio( | |
| label="VITS Fon Synthetized Speech | VITS Fɔngbé Xóɖiɖɔ Kplékplé.", | |
| type="filepath" | |
| ), | |
| title=title, | |
| description=description, | |
| theme="default" | |
| ) | |
| gradiofontts.launch() | |