Imakandi-Labs commited on
Commit
9b498f2
·
verified ·
1 Parent(s): a0dd7fb

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +36 -6
  2. app.py +142 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,12 +1,42 @@
1
  ---
2
- title: Nigerian Tts Api
3
- emoji: 😻
4
- colorFrom: yellow
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 6.13.0
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Nigerian TTS API
3
+ emoji: 🎙️
4
+ colorFrom: green
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 5.0.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ hardware: t4-small
12
  ---
13
 
14
+ # Nigerian TTS API
15
+
16
+ YarnGPT-based TTS service for Nigerian languages (English, Yoruba, Igbo, Hausa, Pidgin).
17
+
18
+ ## API Usage
19
+
20
+ ```python
21
+ import httpx
22
+
23
+ response = httpx.post(
24
+ "https://ubuntufarms-nigerian-tts-api.hf.space/api/tts",
25
+ json={"text": "Hello, how are you?", "speaker": "idera", "language": "english"}
26
+ )
27
+ audio = response.content # WAV audio bytes
28
+ ```
29
+
30
+ ## Available Speakers
31
+
32
+ - **English/Pidgin**: idera, emma, tayo, jude, chinenye
33
+ - **Yoruba**: yoruba_female1, yoruba_male1
34
+ - **Igbo**: igbo_female1
35
+ - **Hausa**: hausa_female1
36
+
37
+ ## Languages
38
+
39
+ - english
40
+ - yoruba
41
+ - igbo
42
+ - hausa
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Nigerian TTS API - YarnGPT-based TTS for Nigerian languages.
4
+ Runs on HuggingFace Spaces with T4 GPU.
5
+ """
6
+
7
+ import io
8
+ import logging
9
+ import time
10
+ import tempfile
11
+ import torch
12
+ import torchaudio
13
+ import gradio as gr
14
+
15
+ logging.basicConfig(level=logging.INFO)
16
+ log = logging.getLogger(__name__)
17
+
18
+ # Check GPU
19
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
20
+ log.info(f"Device: {DEVICE}")
21
+ if DEVICE == "cuda":
22
+ log.info(f"GPU: {torch.cuda.get_device_name(0)}")
23
+
24
+
25
+ def get_system_info():
26
+ """Return system information."""
27
+ info = f"Device: {DEVICE}\n"
28
+ info += f"PyTorch: {torch.__version__}\n"
29
+ if DEVICE == "cuda":
30
+ info += f"GPU: {torch.cuda.get_device_name(0)}\n"
31
+ mem = torch.cuda.get_device_properties(0).total_memory / 1e9
32
+ info += f"Memory: {mem:.1f} GB"
33
+ return info
34
+
35
+
36
+ def tts_synthesize(text: str, speaker: str, language: str):
37
+ """TTS synthesis using YarnGPT."""
38
+ if not text.strip():
39
+ return None, "Empty text"
40
+
41
+ log.info(f"TTS: text='{text[:50]}...', speaker={speaker}, lang={language}")
42
+ start = time.time()
43
+
44
+ try:
45
+ from yarngpt import generate_speech
46
+
47
+ # Generate speech
48
+ audio_tensor = generate_speech(
49
+ text=text,
50
+ speaker=speaker,
51
+ language=language,
52
+ temperature=0.1,
53
+ repetition_penalty=1.1,
54
+ max_length=4000,
55
+ )
56
+
57
+ elapsed = time.time() - start
58
+ log.info(f"TTS done in {elapsed:.1f}s")
59
+
60
+ # Save to temp file
61
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
62
+ torchaudio.save(f.name, audio_tensor, sample_rate=24000)
63
+ return f.name, None
64
+
65
+ except Exception as e:
66
+ log.error(f"TTS error: {e}")
67
+ import traceback
68
+ traceback.print_exc()
69
+ return None, str(e)
70
+
71
+
72
+ # Available speakers by language
73
+ SPEAKERS = {
74
+ "english": ["idera", "chinenye", "jude", "emma", "umar", "joke", "zainab", "osagie", "remi", "tayo"],
75
+ "yoruba": ["abayomi", "aisha", "folake"],
76
+ "igbo": ["chioma", "obinna", "adanna"],
77
+ "hausa": ["amina", "fatima", "ibrahim", "yusuf"],
78
+ }
79
+
80
+ ALL_SPEAKERS = []
81
+ for speakers in SPEAKERS.values():
82
+ ALL_SPEAKERS.extend(speakers)
83
+
84
+
85
+ def update_speakers(language):
86
+ """Update speaker dropdown based on language."""
87
+ speakers = SPEAKERS.get(language, SPEAKERS["english"])
88
+ return gr.Dropdown(choices=speakers, value=speakers[0])
89
+
90
+
91
+ # Gradio UI
92
+ with gr.Blocks(title="Nigerian TTS API") as demo:
93
+ gr.Markdown("# 🎙️ Nigerian TTS API")
94
+ gr.Markdown("YarnGPT-based Text-to-Speech for Nigerian languages")
95
+
96
+ with gr.Row():
97
+ with gr.Column():
98
+ text_input = gr.Textbox(
99
+ label="Text",
100
+ placeholder="Enter text to synthesize...",
101
+ lines=3,
102
+ )
103
+ language = gr.Dropdown(
104
+ label="Language",
105
+ choices=["english", "yoruba", "igbo", "hausa"],
106
+ value="english",
107
+ )
108
+ speaker = gr.Dropdown(
109
+ label="Speaker",
110
+ choices=SPEAKERS["english"],
111
+ value="idera",
112
+ )
113
+ submit_btn = gr.Button("🔊 Synthesize", variant="primary")
114
+
115
+ with gr.Column():
116
+ audio_output = gr.Audio(label="Output Audio", type="filepath")
117
+ error_output = gr.Textbox(label="Status", visible=True)
118
+ sys_info = gr.Textbox(label="System Info", value=get_system_info(), lines=4)
119
+
120
+ # Update speakers when language changes
121
+ language.change(fn=update_speakers, inputs=[language], outputs=[speaker])
122
+
123
+ # Generate speech on button click
124
+ submit_btn.click(
125
+ fn=tts_synthesize,
126
+ inputs=[text_input, speaker, language],
127
+ outputs=[audio_output, error_output],
128
+ )
129
+
130
+ # Example inputs
131
+ gr.Examples(
132
+ examples=[
133
+ ["Hello, how are you today?", "idera", "english"],
134
+ ["The weather in Lagos is beautiful.", "emma", "english"],
135
+ ["Bawo ni, e ku ojo.", "abayomi", "yoruba"],
136
+ ["How you dey, my brother?", "jude", "english"],
137
+ ],
138
+ inputs=[text_input, speaker, language],
139
+ )
140
+
141
+ if __name__ == "__main__":
142
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ yarngpt>=0.2.0
2
+ torch>=2.0.0
3
+ torchaudio>=2.0.0
4
+ gradio>=5.0.0
5
+ spaces