Imakandi-Labs's picture
Upload folder using huggingface_hub
9b498f2 verified
#!/usr/bin/env python3
"""
Nigerian TTS API - YarnGPT-based TTS for Nigerian languages.
Runs on HuggingFace Spaces with T4 GPU.
"""
import io
import logging
import time
import tempfile
import torch
import torchaudio
import gradio as gr
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
# Check GPU
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
log.info(f"Device: {DEVICE}")
if DEVICE == "cuda":
log.info(f"GPU: {torch.cuda.get_device_name(0)}")
def get_system_info():
"""Return system information."""
info = f"Device: {DEVICE}\n"
info += f"PyTorch: {torch.__version__}\n"
if DEVICE == "cuda":
info += f"GPU: {torch.cuda.get_device_name(0)}\n"
mem = torch.cuda.get_device_properties(0).total_memory / 1e9
info += f"Memory: {mem:.1f} GB"
return info
def tts_synthesize(text: str, speaker: str, language: str):
"""TTS synthesis using YarnGPT."""
if not text.strip():
return None, "Empty text"
log.info(f"TTS: text='{text[:50]}...', speaker={speaker}, lang={language}")
start = time.time()
try:
from yarngpt import generate_speech
# Generate speech
audio_tensor = generate_speech(
text=text,
speaker=speaker,
language=language,
temperature=0.1,
repetition_penalty=1.1,
max_length=4000,
)
elapsed = time.time() - start
log.info(f"TTS done in {elapsed:.1f}s")
# Save to temp file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
torchaudio.save(f.name, audio_tensor, sample_rate=24000)
return f.name, None
except Exception as e:
log.error(f"TTS error: {e}")
import traceback
traceback.print_exc()
return None, str(e)
# Available speakers by language
SPEAKERS = {
"english": ["idera", "chinenye", "jude", "emma", "umar", "joke", "zainab", "osagie", "remi", "tayo"],
"yoruba": ["abayomi", "aisha", "folake"],
"igbo": ["chioma", "obinna", "adanna"],
"hausa": ["amina", "fatima", "ibrahim", "yusuf"],
}
ALL_SPEAKERS = []
for speakers in SPEAKERS.values():
ALL_SPEAKERS.extend(speakers)
def update_speakers(language):
"""Update speaker dropdown based on language."""
speakers = SPEAKERS.get(language, SPEAKERS["english"])
return gr.Dropdown(choices=speakers, value=speakers[0])
# Gradio UI
with gr.Blocks(title="Nigerian TTS API") as demo:
gr.Markdown("# 🎙️ Nigerian TTS API")
gr.Markdown("YarnGPT-based Text-to-Speech for Nigerian languages")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Text",
placeholder="Enter text to synthesize...",
lines=3,
)
language = gr.Dropdown(
label="Language",
choices=["english", "yoruba", "igbo", "hausa"],
value="english",
)
speaker = gr.Dropdown(
label="Speaker",
choices=SPEAKERS["english"],
value="idera",
)
submit_btn = gr.Button("🔊 Synthesize", variant="primary")
with gr.Column():
audio_output = gr.Audio(label="Output Audio", type="filepath")
error_output = gr.Textbox(label="Status", visible=True)
sys_info = gr.Textbox(label="System Info", value=get_system_info(), lines=4)
# Update speakers when language changes
language.change(fn=update_speakers, inputs=[language], outputs=[speaker])
# Generate speech on button click
submit_btn.click(
fn=tts_synthesize,
inputs=[text_input, speaker, language],
outputs=[audio_output, error_output],
)
# Example inputs
gr.Examples(
examples=[
["Hello, how are you today?", "idera", "english"],
["The weather in Lagos is beautiful.", "emma", "english"],
["Bawo ni, e ku ojo.", "abayomi", "yoruba"],
["How you dey, my brother?", "jude", "english"],
],
inputs=[text_input, speaker, language],
)
if __name__ == "__main__":
demo.launch()