import torch from transformers import pipeline import gradio as gr # Check device device = "cuda" if torch.cuda.is_available() else "cpu" # Supported multilingual TTS models tts_lang_model = { "yor": "facebook/mms-tts-yor", "ibo": "facebook/mms-tts-ibo", "hau": "facebook/mms-tts-hau" } # Cache of TTS pipelines tts_pipelines = {} def get_tts_pipeline(lang_code): if lang_code not in tts_pipelines: print(f"Loading TTS model for language: {lang_code}") tts_pipelines[lang_code] = pipeline( "text-to-speech", model=tts_lang_model[lang_code], device=0 if device == "cuda" else -1 ) return tts_pipelines[lang_code] # Inference function def synthesize(text, language): if not text.strip(): return None tts = get_tts_pipeline(language) result = tts(text) return (16000, result["audio"]) # Gradio UI with gr.Blocks() as demo: gr.Markdown("## 🗣️ Multilingual TTS for Nigerian Languages (Yoruba, Igbo, Hausa)") with gr.Row(): lang = gr.Radio( choices=["yor", "ibo", "hau"], label="Select Language", value="yor" ) input_text = gr.Textbox(label="Enter text to synthesize", placeholder="Type something in the selected language") output_audio = gr.Audio(label="Generated Speech", type="numpy") generate_btn = gr.Button("🔊 Generate") generate_btn.click(fn=synthesize, inputs=[input_text, lang], outputs=output_audio) # Launch interface if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)