|
|
|
|
|
|
|
|
|
|
|
import gradio as gr
|
|
|
from tts_infer import FonTTS
|
|
|
import os
|
|
|
|
|
|
MODEL_NAME = os.environ.get("FON_TTS_MODEL", "facebook/mms-tts-fon")
|
|
|
|
|
|
|
|
|
backend = None
|
|
|
|
|
|
def get_backend():
|
|
|
global backend
|
|
|
if backend is None:
|
|
|
backend = FonTTS(model_name=MODEL_NAME)
|
|
|
return backend
|
|
|
|
|
|
|
|
|
def synthesize_text(text, seed=42):
|
|
|
if not text or text.strip() == "":
|
|
|
return None
|
|
|
tts = get_backend()
|
|
|
audio, sr = tts.synthesize(text, seed=int(seed))
|
|
|
out_path = "last_output.wav"
|
|
|
tts.save_wav(audio, sr, out_path)
|
|
|
return out_path
|
|
|
|
|
|
|
|
|
def create_ui():
|
|
|
with gr.Blocks(title="Fon — Text to Speech (MMS-TTS)") as demo:
|
|
|
gr.Markdown("# Fon (Fongbé) — Text → Speech\nPaste Fon text (use standard orthography) and press Synthesize.")
|
|
|
with gr.Row():
|
|
|
txt = gr.Textbox(lines=4, label="Fon text", placeholder="e.g. e nɔ do fɔngbe ganji")
|
|
|
seed = gr.Number(value=42, label="Random seed (optional)")
|
|
|
synth_btn = gr.Button("Synthesize")
|
|
|
audio_out = gr.Audio(label="Output audio", type="filepath")
|
|
|
synth_btn.click(fn=synthesize_text, inputs=[txt, seed], outputs=audio_out)
|
|
|
return demo
|
|
|
|
|
|
|
|
|
def main():
|
|
|
demo = create_ui()
|
|
|
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main() |