#!/usr/bin/env python3 """ Nigerian TTS API - YarnGPT-based TTS for Nigerian languages. Runs on HuggingFace Spaces with T4 GPU. """ import io import logging import time import tempfile import torch import torchaudio import gradio as gr logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) # Check GPU DEVICE = "cuda" if torch.cuda.is_available() else "cpu" log.info(f"Device: {DEVICE}") if DEVICE == "cuda": log.info(f"GPU: {torch.cuda.get_device_name(0)}") def get_system_info(): """Return system information.""" info = f"Device: {DEVICE}\n" info += f"PyTorch: {torch.__version__}\n" if DEVICE == "cuda": info += f"GPU: {torch.cuda.get_device_name(0)}\n" mem = torch.cuda.get_device_properties(0).total_memory / 1e9 info += f"Memory: {mem:.1f} GB" return info def tts_synthesize(text: str, speaker: str, language: str): """TTS synthesis using YarnGPT.""" if not text.strip(): return None, "Empty text" log.info(f"TTS: text='{text[:50]}...', speaker={speaker}, lang={language}") start = time.time() try: from yarngpt import generate_speech # Generate speech audio_tensor = generate_speech( text=text, speaker=speaker, language=language, temperature=0.1, repetition_penalty=1.1, max_length=4000, ) elapsed = time.time() - start log.info(f"TTS done in {elapsed:.1f}s") # Save to temp file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: torchaudio.save(f.name, audio_tensor, sample_rate=24000) return f.name, None except Exception as e: log.error(f"TTS error: {e}") import traceback traceback.print_exc() return None, str(e) # Available speakers by language SPEAKERS = { "english": ["idera", "chinenye", "jude", "emma", "umar", "joke", "zainab", "osagie", "remi", "tayo"], "yoruba": ["abayomi", "aisha", "folake"], "igbo": ["chioma", "obinna", "adanna"], "hausa": ["amina", "fatima", "ibrahim", "yusuf"], } ALL_SPEAKERS = [] for speakers in SPEAKERS.values(): ALL_SPEAKERS.extend(speakers) def update_speakers(language): """Update speaker dropdown based on language.""" speakers = SPEAKERS.get(language, SPEAKERS["english"]) return gr.Dropdown(choices=speakers, value=speakers[0]) # Gradio UI with gr.Blocks(title="Nigerian TTS API") as demo: gr.Markdown("# 🎙️ Nigerian TTS API") gr.Markdown("YarnGPT-based Text-to-Speech for Nigerian languages") with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Text", placeholder="Enter text to synthesize...", lines=3, ) language = gr.Dropdown( label="Language", choices=["english", "yoruba", "igbo", "hausa"], value="english", ) speaker = gr.Dropdown( label="Speaker", choices=SPEAKERS["english"], value="idera", ) submit_btn = gr.Button("🔊 Synthesize", variant="primary") with gr.Column(): audio_output = gr.Audio(label="Output Audio", type="filepath") error_output = gr.Textbox(label="Status", visible=True) sys_info = gr.Textbox(label="System Info", value=get_system_info(), lines=4) # Update speakers when language changes language.change(fn=update_speakers, inputs=[language], outputs=[speaker]) # Generate speech on button click submit_btn.click( fn=tts_synthesize, inputs=[text_input, speaker, language], outputs=[audio_output, error_output], ) # Example inputs gr.Examples( examples=[ ["Hello, how are you today?", "idera", "english"], ["The weather in Lagos is beautiful.", "emma", "english"], ["Bawo ni, e ku ojo.", "abayomi", "yoruba"], ["How you dey, my brother?", "jude", "english"], ], inputs=[text_input, speaker, language], ) if __name__ == "__main__": demo.launch()