| |
| """ |
| Nigerian TTS API - YarnGPT-based TTS for Nigerian languages. |
| Runs on HuggingFace Spaces with T4 GPU. |
| """ |
|
|
| import io |
| import logging |
| import time |
| import tempfile |
| import torch |
| import torchaudio |
| import gradio as gr |
|
|
| logging.basicConfig(level=logging.INFO) |
| log = logging.getLogger(__name__) |
|
|
| |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
| log.info(f"Device: {DEVICE}") |
| if DEVICE == "cuda": |
| log.info(f"GPU: {torch.cuda.get_device_name(0)}") |
|
|
|
|
| def get_system_info(): |
| """Return system information.""" |
| info = f"Device: {DEVICE}\n" |
| info += f"PyTorch: {torch.__version__}\n" |
| if DEVICE == "cuda": |
| info += f"GPU: {torch.cuda.get_device_name(0)}\n" |
| mem = torch.cuda.get_device_properties(0).total_memory / 1e9 |
| info += f"Memory: {mem:.1f} GB" |
| return info |
|
|
|
|
| def tts_synthesize(text: str, speaker: str, language: str): |
| """TTS synthesis using YarnGPT.""" |
| if not text.strip(): |
| return None, "Empty text" |
|
|
| log.info(f"TTS: text='{text[:50]}...', speaker={speaker}, lang={language}") |
| start = time.time() |
|
|
| try: |
| from yarngpt import generate_speech |
|
|
| |
| audio_tensor = generate_speech( |
| text=text, |
| speaker=speaker, |
| language=language, |
| temperature=0.1, |
| repetition_penalty=1.1, |
| max_length=4000, |
| ) |
|
|
| elapsed = time.time() - start |
| log.info(f"TTS done in {elapsed:.1f}s") |
|
|
| |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: |
| torchaudio.save(f.name, audio_tensor, sample_rate=24000) |
| return f.name, None |
|
|
| except Exception as e: |
| log.error(f"TTS error: {e}") |
| import traceback |
| traceback.print_exc() |
| return None, str(e) |
|
|
|
|
| |
| SPEAKERS = { |
| "english": ["idera", "chinenye", "jude", "emma", "umar", "joke", "zainab", "osagie", "remi", "tayo"], |
| "yoruba": ["abayomi", "aisha", "folake"], |
| "igbo": ["chioma", "obinna", "adanna"], |
| "hausa": ["amina", "fatima", "ibrahim", "yusuf"], |
| } |
|
|
| ALL_SPEAKERS = [] |
| for speakers in SPEAKERS.values(): |
| ALL_SPEAKERS.extend(speakers) |
|
|
|
|
| def update_speakers(language): |
| """Update speaker dropdown based on language.""" |
| speakers = SPEAKERS.get(language, SPEAKERS["english"]) |
| return gr.Dropdown(choices=speakers, value=speakers[0]) |
|
|
|
|
| |
| with gr.Blocks(title="Nigerian TTS API") as demo: |
| gr.Markdown("# 🎙️ Nigerian TTS API") |
| gr.Markdown("YarnGPT-based Text-to-Speech for Nigerian languages") |
|
|
| with gr.Row(): |
| with gr.Column(): |
| text_input = gr.Textbox( |
| label="Text", |
| placeholder="Enter text to synthesize...", |
| lines=3, |
| ) |
| language = gr.Dropdown( |
| label="Language", |
| choices=["english", "yoruba", "igbo", "hausa"], |
| value="english", |
| ) |
| speaker = gr.Dropdown( |
| label="Speaker", |
| choices=SPEAKERS["english"], |
| value="idera", |
| ) |
| submit_btn = gr.Button("🔊 Synthesize", variant="primary") |
|
|
| with gr.Column(): |
| audio_output = gr.Audio(label="Output Audio", type="filepath") |
| error_output = gr.Textbox(label="Status", visible=True) |
| sys_info = gr.Textbox(label="System Info", value=get_system_info(), lines=4) |
|
|
| |
| language.change(fn=update_speakers, inputs=[language], outputs=[speaker]) |
|
|
| |
| submit_btn.click( |
| fn=tts_synthesize, |
| inputs=[text_input, speaker, language], |
| outputs=[audio_output, error_output], |
| ) |
|
|
| |
| gr.Examples( |
| examples=[ |
| ["Hello, how are you today?", "idera", "english"], |
| ["The weather in Lagos is beautiful.", "emma", "english"], |
| ["Bawo ni, e ku ojo.", "abayomi", "yoruba"], |
| ["How you dey, my brother?", "jude", "english"], |
| ], |
| inputs=[text_input, speaker, language], |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|