FonTTS / app.py
bonadossou's picture
MMS TTS FON
570ddc0
import gradio as gr
from transformers import AutoProcessor, VitsModel
import torch
import scipy.io.wavfile
import tempfile
model = VitsModel.from_pretrained("facebook/mms-tts-fon")
processor = AutoProcessor.from_pretrained("facebook/mms-tts-fon")
sampling_rate = model.config.sampling_rate
def tts_fon(text):
inputs = processor(text, return_tensors="pt")
with torch.no_grad():
audio = model(**inputs).waveform[0].numpy()
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
scipy.io.wavfile.write(f.name, rate=sampling_rate, data=audio)
return f.name
title = "🔊 Speak Fongbe with Meta’s Multilingual TTS Model"
description = """
[Fongbe]
Gbè elɔ xlɛ́ kpɔ́ndéwú nǔwlanwlán tɔn e è sɔ́ ɖ’ayǐ é ɖé nú Fongbe, gbè taji e è nɔ dó ɖò Benin kpo Togo sín akpáxwé ɖé lɛ kpo mɛ é.
🔍 **Lee è nɔ zán gbɔn é**
Wlan xógbe klewun ɖé dó **Fon** mɛ bo zín “Submit” bo na dó sè gbè e è xò kplé é. Modèle ɔ nɔ ɖè xóɖiɖɔ tɔ́n ɖò hwenu e é jɛ é, bo nɔ zán vocoder gbè gègě tɔn e è ko kplɔ́n ɖ’ayǐ é ɖé.
🌍 **Gbè ɔ wú**
Fon ɔ, Niger-Congo gbè wɛ bɔ gbɛtɔ́ livi 2 jɛji wɛ nɔ dó. È nɔ zán ɖò wemaxɔmɛ, xójlajla sín nǔ lɛ kpo xóɖɔɖókpɔ́ ayihɔngbe ayihɔngbe tɔn lɛ kpo mɛ, amɔ̌, è kpó ɖò xóɖiɖɔ sín nǔnywɛ xwitixwiti sín nǔ lɛ zán wɛ ganji ǎ. Demo enɛ ɔ nɔ d’alɔ bɔ è nɔ sú dò enɛ gbɔn TTS ɖiɖó bɔ é nɔ bɔkun nú gbè Aflika tɔn e mɛ nǔɖokan lɛ ma sukpɔ́ ɖè ǎ lɛ é gblamɛ.
[English]
This Space showcases a cutting-edge text-to-speech model for Fongbe, a major language spoken in Benin and parts of Togo.
🔍 **How to Use**
Type a short sentence in **Fon** and press “Submit” to hear the synthesized voice. The model outputs audio in real-time using a pretrained multilingual vocoder.
🌍 **About the Language**
Fon is a Niger-Congo language with over 2 million speakers. It is used in education, media, and daily communication, yet remains underrepresented in speech technology. This demo helps close that gap by making TTS more accessible for low-resource African languages.
"""
# Gradio interface
gradiofontts = gr.Interface(
fn=tts_fon,
inputs=gr.Textbox(
label="Enter your text in Fongbe here | Wlan xo mitɔn do Fɔngbé mɛ ɖo fi.",
placeholder="Mi do gbe nu mi, un nɔ nyi Bonaventure Dossou. Un nyí Benin-nu goyitɔ́ ɖé. Mi ma ɖi xɛsi bo kplɔn Fon kpodo kpɔndewu elɔ kpo.",
lines=3,
max_lines=50,
),
outputs=gr.Audio(
label="VITS Fon Synthetized Speech | VITS Fɔngbé Xóɖiɖɔ Kplékplé.",
type="filepath"
),
title=title,
description=description,
theme="default"
)
gradiofontts.launch()