Files changed (1) hide show
  1. app.py +48 -202
app.py CHANGED
@@ -1,184 +1,19 @@
1
- # # app.py
2
- # import gradio as gr
3
- # import tempfile
4
- # import soundfile as sf
5
- # import numpy as np
6
-
7
- # from kokoro import KPipeline # correct import
8
-
9
- # # Initialize pipeline once on startup.
10
- # # lang_code: 'a' => American English, 'b' => British English, etc. See README for mapping.
11
- # pipeline = KPipeline(lang_code="a") # choose lang_code that matches the voice prefix
12
-
13
- # # Example voices (prefix letter indicates language family)
14
- # VOICES = [
15
- # "af_heart", "af_bella", "af_nicole", # a* = american-ish voices
16
- # "am_adam", "am_michael",
17
- # "bf_emma", "bm_george" # b* = british-ish voices
18
- # ]
19
-
20
-
21
- # def synthesize_to_file(text: str, voice: str = "af_heart"):
22
- # """Run kokoro pipeline and write first generated audio to a temporary wav file."""
23
- # text = (text or "").strip()
24
- # if not text:
25
- # return None, "Please enter text."
26
-
27
- # try:
28
- # gen = pipeline(text, voice=voice) # generator yielding (gs, ps, audio)
29
- # # take the first item produced
30
- # item = next(gen, None)
31
- # if item is None:
32
- # return None, "Kokoro returned no audio."
33
-
34
- # gs, ps, audio = item # gs: generation metadata, ps: phonemes, audio: numpy float32
35
- # # Kokoro audio sample rate is 24000
36
- # sr = 24000
37
-
38
- # # Ensure numpy array dtype is float32
39
- # audio = np.asarray(audio, dtype=np.float32)
40
-
41
- # # Write to temporary wav file and return its path (Gradio can serve file paths)
42
- # tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
43
- # sf.write(tmp.name, audio, sr, format="WAV")
44
- # return tmp.name, f"Success — generated {len(audio)} samples @ {sr}Hz."
45
-
46
- # except Exception as e:
47
- # return None, f"Error: {e}"
48
-
49
-
50
- # with gr.Blocks(title="Kokoro TTS (Gradio)") as demo:
51
- # gr.Markdown("## Kokoro-82M — Text → Speech (Gradio)")
52
- # with gr.Row():
53
- # txt = gr.Textbox(lines=4, placeholder="Type text to synthesize...", label="Input text")
54
- # voice = gr.Dropdown(choices=VOICES, value=VOICES[0], label="Voice")
55
-
56
- # out_audio = gr.Audio(label="Generated audio (wav file)")
57
- # status = gr.Textbox(label="Status", interactive=False)
58
-
59
- # btn = gr.Button("Generate")
60
- # btn.click(fn=synthesize_to_file, inputs=[txt, voice], outputs=[out_audio, status])
61
-
62
- # if __name__ == "__main__":
63
- # demo.launch(server_name="0.0.0.0", server_port=7860)
64
-
65
-
66
-
67
-
68
-
69
-
70
-
71
-
72
-
73
-
74
-
75
-
76
-
77
-
78
-
79
-
80
-
81
-
82
-
83
-
84
-
85
-
86
-
87
-
88
- # import gradio as gr
89
- # import tempfile
90
- # import soundfile as sf
91
- # import numpy as np
92
- # from kokoro import KPipeline
93
-
94
- # pipeline = KPipeline(lang_code="a")
95
-
96
- # VOICES = [
97
- # "af_heart", "af_bella", "af_nicole",
98
- # "am_adam", "am_michael",
99
- # "bf_emma", "bm_george"
100
- # ]
101
-
102
- # SR = 24000 # Kokoro standard sample rate
103
-
104
-
105
- # def generate_full_audio(text, voice):
106
- # text = (text or "").strip()
107
- # if not text:
108
- # return None, None, "Please enter text."
109
-
110
- # try:
111
- # # Kokoro returns a generator over chunks
112
- # gen = pipeline(text, voice=voice)
113
-
114
- # audio_chunks = []
115
-
116
- # # Collect *all* audio chunks (fixes 6-second problem)
117
- # for (gs, ps, audio) in gen:
118
- # audio_chunks.append(np.asarray(audio, dtype=np.float32))
119
-
120
- # if not audio_chunks:
121
- # return None, None, "No audio produced."
122
-
123
- # # Concatenate all chunks into one continuous waveform
124
- # final_audio = np.concatenate(audio_chunks)
125
-
126
- # # Save to WAV for download
127
- # tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
128
- # sf.write(tmp.name, final_audio, SR)
129
-
130
- # return (SR, final_audio), tmp.name, f"Generated {len(final_audio)/SR:.2f} seconds of audio."
131
-
132
- # except Exception as e:
133
- # return None, None, f"Error: {e}"
134
-
135
-
136
- # with gr.Blocks(title="Kokoro Unlimited TTS") as demo:
137
- # gr.Markdown("## 🎧 Kokoro TTS — Unlimited Text, Downloadable Audio")
138
-
139
- # with gr.Row():
140
- # txt = gr.Textbox(
141
- # lines=10,
142
- # label="Input Text (no length limit)",
143
- # placeholder="Paste long text here...",
144
- # )
145
- # voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")
146
-
147
- # audio_out = gr.Audio(label="Generated Audio")
148
- # download_out = gr.File(label="Download Audio (.wav)")
149
- # status = gr.Textbox(label="Status", interactive=False)
150
-
151
- # generate_btn = gr.Button("Generate")
152
-
153
- # generate_btn.click(
154
- # fn=generate_full_audio,
155
- # inputs=[txt, voice],
156
- # outputs=[audio_out, download_out, status]
157
- # )
158
-
159
- # demo.launch()
160
-
161
-
162
-
163
-
164
-
165
-
166
-
167
-
168
-
169
-
170
-
171
-
172
-
173
-
174
-
175
-
176
  import gradio as gr
177
  import tempfile
178
  import soundfile as sf
179
  import numpy as np
180
  from kokoro import KPipeline
181
  import time
 
 
 
 
 
 
 
 
 
 
182
 
183
  pipeline = KPipeline(lang_code="a")
184
 
@@ -190,58 +25,71 @@ VOICES = [
190
 
191
  SR = 24000
192
 
193
-
194
  def tts_stream(text, voice):
195
  text = (text or "").strip()
196
  if not text:
197
  yield None, None, 0, "Please enter text."
198
  return
199
 
200
- # Split text into smaller chunks for progress-based streaming
201
- # Helps prevent 60–90s stall timeout
202
- sentences = text.split(". ")
 
 
203
  total = len(sentences)
204
  audio_chunks = []
 
 
 
 
 
205
 
206
  for i, sentence in enumerate(sentences):
207
  if not sentence.strip():
208
  continue
209
 
210
- # Run Kokoro on the chunk
211
  gen = pipeline(sentence, voice=voice)
212
 
 
213
  for (gs, ps, audio) in gen:
214
  audio = np.asarray(audio, dtype=np.float32)
215
  audio_chunks.append(audio)
216
-
217
- # Progress streaming to UI every chunk
218
  progress = int((i + 1) / total * 100)
219
- yield None, None, progress, f"Processing chunk {i+1}/{total}..."
220
 
221
- # HuggingFace anti-timeout heartbeat
222
- time.sleep(0.1)
223
 
224
- # Combine all audio into one file
225
- final_audio = np.concatenate(audio_chunks)
 
 
226
 
 
227
  tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
228
  sf.write(tmp.name, final_audio, SR)
229
 
 
230
  yield (SR, final_audio), tmp.name, 100, "Completed!"
231
 
232
 
233
- with gr.Blocks(title="Kokoro TTS (No Timeout)") as demo:
234
- gr.Markdown("## ⚡ Kokoro TTS – Unlimited Length + Safe From Timeout + Progress Bar")
235
-
236
- text = gr.Textbox(lines=12, label="Input text")
237
- voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")
238
-
239
- audio_output = gr.Audio(label="Audio Output")
240
- file_download = gr.File(label="Download WAV")
241
- progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False)
242
- status = gr.Textbox(label="Status", interactive=False)
243
-
244
- run_btn = gr.Button("Generate")
 
 
245
 
246
  run_btn.click(
247
  fn=tts_stream,
@@ -249,6 +97,4 @@ with gr.Blocks(title="Kokoro TTS (No Timeout)") as demo:
249
  outputs=[audio_output, file_download, progress, status],
250
  )
251
 
252
- demo.launch()
253
-
254
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import tempfile
3
  import soundfile as sf
4
  import numpy as np
5
  from kokoro import KPipeline
6
  import time
7
+ import nltk
8
+
9
+ # Download the necessary NLTK data for sentence splitting
10
+ try:
11
+ nltk.data.find('tokenizers/punkt_tab')
12
+ except LookupError:
13
+ nltk.download('punkt_tab')
14
+ nltk.download('punkt')
15
+
16
+ from nltk.tokenize import sent_tokenize
17
 
18
  pipeline = KPipeline(lang_code="a")
19
 
 
25
 
26
  SR = 24000
27
 
 
28
  def tts_stream(text, voice):
29
  text = (text or "").strip()
30
  if not text:
31
  yield None, None, 0, "Please enter text."
32
  return
33
 
34
+ # --- IMPROVEMENT HERE ---
35
+ # Use NLTK to split text into linguistically correct sentences.
36
+ # This handles "Dr.", "Mr.", "?", "!", and quotes correctly.
37
+ sentences = sent_tokenize(text)
38
+
39
  total = len(sentences)
40
  audio_chunks = []
41
+
42
+ # Initialize an empty array for the concatenated audio
43
+ full_audio = np.array([], dtype=np.float32)
44
+
45
+ print(f"Split into {total} sentences.")
46
 
47
  for i, sentence in enumerate(sentences):
48
  if not sentence.strip():
49
  continue
50
 
51
+ # Run Kokoro on the specific sentence
52
  gen = pipeline(sentence, voice=voice)
53
 
54
+ # Kokoro returns a generator, we grab the audio from it
55
  for (gs, ps, audio) in gen:
56
  audio = np.asarray(audio, dtype=np.float32)
57
  audio_chunks.append(audio)
58
+
59
+ # Progress streaming to UI
60
  progress = int((i + 1) / total * 100)
61
+ yield None, None, progress, f"Processing sentence {i+1}/{total}..."
62
 
63
+ # Anti-timeout heartbeat
64
+ time.sleep(0.05)
65
 
66
+ if audio_chunks:
67
+ final_audio = np.concatenate(audio_chunks)
68
+ else:
69
+ final_audio = np.array([], dtype=np.float32)
70
 
71
+ # Write to a temp file for the download button
72
  tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
73
  sf.write(tmp.name, final_audio, SR)
74
 
75
+ # Return the audio to the player and the file for download
76
  yield (SR, final_audio), tmp.name, 100, "Completed!"
77
 
78
 
79
+ with gr.Blocks(title="Kokoro TTS (Smart Split)") as demo:
80
+ gr.Markdown("## ⚡ Kokoro TTS – Smart Sentence Splitting")
81
+
82
+ with gr.Row():
83
+ with gr.Column():
84
+ text = gr.Textbox(lines=12, label="Input text", placeholder="Paste long text here...")
85
+ voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")
86
+ run_btn = gr.Button("Generate", variant="primary")
87
+
88
+ with gr.Column():
89
+ audio_output = gr.Audio(label="Audio Output", interactive=False)
90
+ file_download = gr.File(label="Download WAV")
91
+ progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False)
92
+ status = gr.Textbox(label="Status", interactive=False)
93
 
94
  run_btn.click(
95
  fn=tts_stream,
 
97
  outputs=[audio_output, file_download, progress, status],
98
  )
99
 
100
+ demo.queue().launch()