|
|
import gradio as gr |
|
|
from transformers import AutoProcessor, VitsModel |
|
|
import torch |
|
|
import scipy.io.wavfile |
|
|
import tempfile |
|
|
|
|
|
|
|
|
model = VitsModel.from_pretrained("facebook/mms-tts-fon") |
|
|
processor = AutoProcessor.from_pretrained("facebook/mms-tts-fon") |
|
|
|
|
|
|
|
|
sampling_rate = model.config.sampling_rate |
|
|
|
|
|
|
|
|
def tts_fon(text): |
|
|
inputs = processor(text, return_tensors="pt") |
|
|
with torch.no_grad(): |
|
|
audio = model(**inputs).waveform[0].numpy() |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: |
|
|
scipy.io.wavfile.write(f.name, rate=sampling_rate, data=audio) |
|
|
return f.name |
|
|
|
|
|
|
|
|
title = "🗣️ Fon Text-to-Speech (TTS) with Meta MMS" |
|
|
description = """ |
|
|
This Space uses Meta AI's `facebook/mms-tts-fon` model to synthesize speech in the Fon language. |
|
|
The model is part of the [Massively Multilingual Speech (MMS)](https://huggingface.co/facebook/mms-tts-fon) project. |
|
|
|
|
|
Fon is a Gbe language spoken in Benin and Togo. This demo allows you to input Fon text and hear the synthesized audio output. |
|
|
|
|
|
--- |
|
|
|
|
|
### 🔧 How to Use: |
|
|
1. Type a sentence in **Fon** (Latin script, tone markers optional). |
|
|
2. Press **Submit** or hit **Enter**. |
|
|
3. Wait a few seconds for audio synthesis. |
|
|
4. Listen or download the audio. |
|
|
|
|
|
--- |
|
|
|
|
|
### 📜 Rules & Notes: |
|
|
1. Input should be in **Fon** only (avoid English or other languages). |
|
|
2. You may enter as much text as you want, but long inputs may slow processing. Short to medium sentences are recommended. |
|
|
3. Use correct Unicode characters (ɛ, ɔ, etc.) if tones are important. |
|
|
4. Tone marks like `à, é, ǒ, ê` are supported but optional. |
|
|
5. Output uses a single female voice (pretrained by Meta). |
|
|
6. Audio is generated at the model’s default sampling rate (may vary by version). |
|
|
7. Model is intended for **research and demonstration** only. |
|
|
8. Do **not** use for commercial purposes without permission. |
|
|
9. Underlying model licensed under **CC-BY-NC 4.0**. |
|
|
10. Please be respectful — offensive or inappropriate input is not allowed. |
|
|
|
|
|
--- |
|
|
|
|
|
✨ Powered by Meta AI's MMS-TTS and Hugging Face 🤗 |
|
|
""" |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=tts_fon, |
|
|
inputs=gr.Textbox(label="Enter Fon text here", placeholder="e.g. Fɔ̀ngbè sɔ̀ wá kpɔ́ nù.", lines=3), |
|
|
outputs=gr.Audio(label="Synthesized Fon Speech", type="filepath"), |
|
|
title=title, |
|
|
description=description, |
|
|
theme="default" |
|
|
) |
|
|
|
|
|
iface.launch() |