Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from transformers import AutoProcessor, VitsModel
|
| 3 |
+
import torch
|
| 4 |
+
import scipy.io.wavfile
|
| 5 |
+
import tempfile
|
| 6 |
+
|
| 7 |
+
# Load the Fon TTS model from Meta AI
|
| 8 |
+
model = VitsModel.from_pretrained("facebook/mms-tts-fon")
|
| 9 |
+
processor = AutoProcessor.from_pretrained("facebook/mms-tts-fon")
|
| 10 |
+
|
| 11 |
+
# Automatically get sampling rate from model config
|
| 12 |
+
sampling_rate = model.config.sampling_rate
|
| 13 |
+
|
| 14 |
+
# Function to synthesize Fon audio from text
|
| 15 |
+
def tts_fon(text):
|
| 16 |
+
inputs = processor(text, return_tensors="pt")
|
| 17 |
+
with torch.no_grad():
|
| 18 |
+
audio = model(**inputs).waveform[0].numpy()
|
| 19 |
+
|
| 20 |
+
# Save temporary WAV file using the model's sampling rate
|
| 21 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
| 22 |
+
scipy.io.wavfile.write(f.name, rate=sampling_rate, data=audio)
|
| 23 |
+
return f.name
|
| 24 |
+
|
| 25 |
+
# Title and detailed description
|
| 26 |
+
title = "🗣️ Fon Text-to-Speech (TTS) with Meta MMS"
|
| 27 |
+
description = """
|
| 28 |
+
This Space uses Meta AI's `facebook/mms-tts-fon` model to synthesize speech in the Fon language.
|
| 29 |
+
The model is part of the [Massively Multilingual Speech (MMS)](https://huggingface.co/facebook/mms-tts-fon) project.
|
| 30 |
+
|
| 31 |
+
Fon is a Gbe language spoken in Benin and Togo. This demo allows you to input Fon text and hear the synthesized audio output.
|
| 32 |
+
|
| 33 |
+
---
|
| 34 |
+
|
| 35 |
+
### 🔧 How to Use:
|
| 36 |
+
1. Type a sentence in **Fon** (Latin script, tone markers optional).
|
| 37 |
+
2. Press **Submit** or hit **Enter**.
|
| 38 |
+
3. Wait a few seconds for audio synthesis.
|
| 39 |
+
4. Listen or download the audio.
|
| 40 |
+
|
| 41 |
+
---
|
| 42 |
+
|
| 43 |
+
### 📜 Rules & Notes:
|
| 44 |
+
1. Input should be in **Fon** only (avoid English or other languages).
|
| 45 |
+
2. You may enter as much text as you want, but long inputs may slow processing. Short to medium sentences are recommended.
|
| 46 |
+
3. Use correct Unicode characters (ɛ, ɔ, etc.) if tones are important.
|
| 47 |
+
4. Tone marks like `à, é, ǒ, ê` are supported but optional.
|
| 48 |
+
5. Output uses a single female voice (pretrained by Meta).
|
| 49 |
+
6. Audio is generated at the model’s default sampling rate (may vary by version).
|
| 50 |
+
7. Model is intended for **research and demonstration** only.
|
| 51 |
+
8. Do **not** use for commercial purposes without permission.
|
| 52 |
+
9. Underlying model licensed under **CC-BY-NC 4.0**.
|
| 53 |
+
10. Please be respectful — offensive or inappropriate input is not allowed.
|
| 54 |
+
|
| 55 |
+
---
|
| 56 |
+
|
| 57 |
+
✨ Powered by Meta AI's MMS-TTS and Hugging Face 🤗
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
# Gradio interface
|
| 61 |
+
iface = gr.Interface(
|
| 62 |
+
fn=tts_fon,
|
| 63 |
+
inputs=gr.Textbox(label="Enter Fon text here", placeholder="e.g. Fɔ̀ngbè sɔ̀ wá kpɔ́ nù.", lines=3),
|
| 64 |
+
outputs=gr.Audio(label="Synthesized Fon Speech", type="filepath"),
|
| 65 |
+
title=title,
|
| 66 |
+
description=description,
|
| 67 |
+
theme="default"
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
iface.launch()
|