File size: 6,680 Bytes
b3f30bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bb4d23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77dc7d0
1e48f34
 
 
b3f30bd
3bb4d23
77dc7d0
b3f30bd
77dc7d0
1e48f34
b3f30bd
1e48f34
b3f30bd
1e48f34
77dc7d0
3bb4d23
77dc7d0
b3f30bd
3bb4d23
1e48f34
 
3bb4d23
 
77dc7d0
3bb4d23
 
 
 
 
b3f30bd
3bb4d23
 
 
 
 
 
77dc7d0
b3f30bd
3bb4d23
 
77dc7d0
3bb4d23
 
 
77dc7d0
3bb4d23
 
b3f30bd
3bb4d23
 
b3f30bd
3bb4d23
 
dff9996
3bb4d23
77dc7d0
dff9996
3bb4d23
 
b3f30bd
3bb4d23
 
77dc7d0
3bb4d23
 
 
1e48f34
77dc7d0
3bb4d23
b3f30bd
3bb4d23
 
 
 
b3f30bd
 
 
77dc7d0
3bb4d23
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# # app.py
# import gradio as gr
# import tempfile
# import soundfile as sf
# import numpy as np

# from kokoro import KPipeline  # correct import

# # Initialize pipeline once on startup.
# # lang_code: 'a' => American English, 'b' => British English, etc. See README for mapping.
# pipeline = KPipeline(lang_code="a")  # choose lang_code that matches the voice prefix

# # Example voices (prefix letter indicates language family)
# VOICES = [
#     "af_heart", "af_bella", "af_nicole",     # a* = american-ish voices
#     "am_adam", "am_michael",
#     "bf_emma", "bm_george"                  # b* = british-ish voices
# ]


# def synthesize_to_file(text: str, voice: str = "af_heart"):
#     """Run kokoro pipeline and write first generated audio to a temporary wav file."""
#     text = (text or "").strip()
#     if not text:
#         return None, "Please enter text."

#     try:
#         gen = pipeline(text, voice=voice)  # generator yielding (gs, ps, audio)
#         # take the first item produced
#         item = next(gen, None)
#         if item is None:
#             return None, "Kokoro returned no audio."

#         gs, ps, audio = item  # gs: generation metadata, ps: phonemes, audio: numpy float32
#         # Kokoro audio sample rate is 24000
#         sr = 24000

#         # Ensure numpy array dtype is float32
#         audio = np.asarray(audio, dtype=np.float32)

#         # Write to temporary wav file and return its path (Gradio can serve file paths)
#         tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
#         sf.write(tmp.name, audio, sr, format="WAV")
#         return tmp.name, f"Success β€” generated {len(audio)} samples @ {sr}Hz."

#     except Exception as e:
#         return None, f"Error: {e}"


# with gr.Blocks(title="Kokoro TTS (Gradio)") as demo:
#     gr.Markdown("## Kokoro-82M β€” Text β†’ Speech (Gradio)")
#     with gr.Row():
#         txt = gr.Textbox(lines=4, placeholder="Type text to synthesize...", label="Input text")
#         voice = gr.Dropdown(choices=VOICES, value=VOICES[0], label="Voice")

#     out_audio = gr.Audio(label="Generated audio (wav file)")
#     status = gr.Textbox(label="Status", interactive=False)

#     btn = gr.Button("Generate")
#     btn.click(fn=synthesize_to_file, inputs=[txt, voice], outputs=[out_audio, status])

# if __name__ == "__main__":
#     demo.launch(server_name="0.0.0.0", server_port=7860)
























# import gradio as gr
# import tempfile
# import soundfile as sf
# import numpy as np
# from kokoro import KPipeline

# pipeline = KPipeline(lang_code="a")

# VOICES = [
#     "af_heart", "af_bella", "af_nicole",
#     "am_adam", "am_michael",
#     "bf_emma", "bm_george"
# ]

# SR = 24000  # Kokoro standard sample rate


# def generate_full_audio(text, voice):
#     text = (text or "").strip()
#     if not text:
#         return None, None, "Please enter text."

#     try:
#         # Kokoro returns a generator over chunks
#         gen = pipeline(text, voice=voice)

#         audio_chunks = []

#         # Collect *all* audio chunks (fixes 6-second problem)
#         for (gs, ps, audio) in gen:
#             audio_chunks.append(np.asarray(audio, dtype=np.float32))

#         if not audio_chunks:
#             return None, None, "No audio produced."

#         # Concatenate all chunks into one continuous waveform
#         final_audio = np.concatenate(audio_chunks)

#         # Save to WAV for download
#         tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
#         sf.write(tmp.name, final_audio, SR)

#         return (SR, final_audio), tmp.name, f"Generated {len(final_audio)/SR:.2f} seconds of audio."

#     except Exception as e:
#         return None, None, f"Error: {e}"


# with gr.Blocks(title="Kokoro Unlimited TTS") as demo:
#     gr.Markdown("## 🎧 Kokoro TTS β€” Unlimited Text, Downloadable Audio")

#     with gr.Row():
#         txt = gr.Textbox(
#             lines=10,
#             label="Input Text (no length limit)",
#             placeholder="Paste long text here...",
#         )
#         voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")

#     audio_out = gr.Audio(label="Generated Audio")
#     download_out = gr.File(label="Download Audio (.wav)")
#     status = gr.Textbox(label="Status", interactive=False)

#     generate_btn = gr.Button("Generate")

#     generate_btn.click(
#         fn=generate_full_audio,
#         inputs=[txt, voice],
#         outputs=[audio_out, download_out, status]
#     )

# demo.launch()
















import gradio as gr
import tempfile
import soundfile as sf
import numpy as np
from kokoro import KPipeline
import time

pipeline = KPipeline(lang_code="a")

VOICES = [
    "af_heart", "af_bella", "af_nicole",
    "am_adam", "am_michael",
    "bf_emma", "bm_george"
]

SR = 24000


def tts_stream(text, voice):
    text = (text or "").strip()
    if not text:
        yield None, None, 0, "Please enter text."
        return

    # Split text into smaller chunks for progress-based streaming
    # Helps prevent 60–90s stall timeout
    sentences = text.split(". ")
    total = len(sentences)
    audio_chunks = []

    for i, sentence in enumerate(sentences):
        if not sentence.strip():
            continue

        # Run Kokoro on the chunk
        gen = pipeline(sentence, voice=voice)

        for (gs, ps, audio) in gen:
            audio = np.asarray(audio, dtype=np.float32)
            audio_chunks.append(audio)

        # Progress streaming to UI every chunk
        progress = int((i + 1) / total * 100)
        yield None, None, progress, f"Processing chunk {i+1}/{total}..."

        # HuggingFace anti-timeout heartbeat
        time.sleep(0.1)

    # Combine all audio into one file
    final_audio = np.concatenate(audio_chunks)

    tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
    sf.write(tmp.name, final_audio, SR)

    yield (SR, final_audio), tmp.name, 100, "Completed!"


with gr.Blocks(title="Kokoro TTS (No Timeout)") as demo:
    gr.Markdown("## ⚑ Kokoro TTS – Unlimited Length + Safe From Timeout + Progress Bar")

    text = gr.Textbox(lines=12, label="Input text")
    voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")

    audio_output = gr.Audio(label="Audio Output")
    file_download = gr.File(label="Download WAV")
    progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False)
    status = gr.Textbox(label="Status", interactive=False)

    run_btn = gr.Button("Generate")

    run_btn.click(
        fn=tts_stream,
        inputs=[text, voice],
        outputs=[audio_output, file_download, progress, status],
    )

demo.launch()