Spaces:
Sleeping
Sleeping
File size: 6,680 Bytes
b3f30bd 3bb4d23 77dc7d0 1e48f34 b3f30bd 3bb4d23 77dc7d0 b3f30bd 77dc7d0 1e48f34 b3f30bd 1e48f34 b3f30bd 1e48f34 77dc7d0 3bb4d23 77dc7d0 b3f30bd 3bb4d23 1e48f34 3bb4d23 77dc7d0 3bb4d23 b3f30bd 3bb4d23 77dc7d0 b3f30bd 3bb4d23 77dc7d0 3bb4d23 77dc7d0 3bb4d23 b3f30bd 3bb4d23 b3f30bd 3bb4d23 dff9996 3bb4d23 77dc7d0 dff9996 3bb4d23 b3f30bd 3bb4d23 77dc7d0 3bb4d23 1e48f34 77dc7d0 3bb4d23 b3f30bd 3bb4d23 b3f30bd 77dc7d0 3bb4d23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
# # app.py
# import gradio as gr
# import tempfile
# import soundfile as sf
# import numpy as np
# from kokoro import KPipeline # correct import
# # Initialize pipeline once on startup.
# # lang_code: 'a' => American English, 'b' => British English, etc. See README for mapping.
# pipeline = KPipeline(lang_code="a") # choose lang_code that matches the voice prefix
# # Example voices (prefix letter indicates language family)
# VOICES = [
# "af_heart", "af_bella", "af_nicole", # a* = american-ish voices
# "am_adam", "am_michael",
# "bf_emma", "bm_george" # b* = british-ish voices
# ]
# def synthesize_to_file(text: str, voice: str = "af_heart"):
# """Run kokoro pipeline and write first generated audio to a temporary wav file."""
# text = (text or "").strip()
# if not text:
# return None, "Please enter text."
# try:
# gen = pipeline(text, voice=voice) # generator yielding (gs, ps, audio)
# # take the first item produced
# item = next(gen, None)
# if item is None:
# return None, "Kokoro returned no audio."
# gs, ps, audio = item # gs: generation metadata, ps: phonemes, audio: numpy float32
# # Kokoro audio sample rate is 24000
# sr = 24000
# # Ensure numpy array dtype is float32
# audio = np.asarray(audio, dtype=np.float32)
# # Write to temporary wav file and return its path (Gradio can serve file paths)
# tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
# sf.write(tmp.name, audio, sr, format="WAV")
# return tmp.name, f"Success β generated {len(audio)} samples @ {sr}Hz."
# except Exception as e:
# return None, f"Error: {e}"
# with gr.Blocks(title="Kokoro TTS (Gradio)") as demo:
# gr.Markdown("## Kokoro-82M β Text β Speech (Gradio)")
# with gr.Row():
# txt = gr.Textbox(lines=4, placeholder="Type text to synthesize...", label="Input text")
# voice = gr.Dropdown(choices=VOICES, value=VOICES[0], label="Voice")
# out_audio = gr.Audio(label="Generated audio (wav file)")
# status = gr.Textbox(label="Status", interactive=False)
# btn = gr.Button("Generate")
# btn.click(fn=synthesize_to_file, inputs=[txt, voice], outputs=[out_audio, status])
# if __name__ == "__main__":
# demo.launch(server_name="0.0.0.0", server_port=7860)
# import gradio as gr
# import tempfile
# import soundfile as sf
# import numpy as np
# from kokoro import KPipeline
# pipeline = KPipeline(lang_code="a")
# VOICES = [
# "af_heart", "af_bella", "af_nicole",
# "am_adam", "am_michael",
# "bf_emma", "bm_george"
# ]
# SR = 24000 # Kokoro standard sample rate
# def generate_full_audio(text, voice):
# text = (text or "").strip()
# if not text:
# return None, None, "Please enter text."
# try:
# # Kokoro returns a generator over chunks
# gen = pipeline(text, voice=voice)
# audio_chunks = []
# # Collect *all* audio chunks (fixes 6-second problem)
# for (gs, ps, audio) in gen:
# audio_chunks.append(np.asarray(audio, dtype=np.float32))
# if not audio_chunks:
# return None, None, "No audio produced."
# # Concatenate all chunks into one continuous waveform
# final_audio = np.concatenate(audio_chunks)
# # Save to WAV for download
# tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
# sf.write(tmp.name, final_audio, SR)
# return (SR, final_audio), tmp.name, f"Generated {len(final_audio)/SR:.2f} seconds of audio."
# except Exception as e:
# return None, None, f"Error: {e}"
# with gr.Blocks(title="Kokoro Unlimited TTS") as demo:
# gr.Markdown("## π§ Kokoro TTS β Unlimited Text, Downloadable Audio")
# with gr.Row():
# txt = gr.Textbox(
# lines=10,
# label="Input Text (no length limit)",
# placeholder="Paste long text here...",
# )
# voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")
# audio_out = gr.Audio(label="Generated Audio")
# download_out = gr.File(label="Download Audio (.wav)")
# status = gr.Textbox(label="Status", interactive=False)
# generate_btn = gr.Button("Generate")
# generate_btn.click(
# fn=generate_full_audio,
# inputs=[txt, voice],
# outputs=[audio_out, download_out, status]
# )
# demo.launch()
import gradio as gr
import tempfile
import soundfile as sf
import numpy as np
from kokoro import KPipeline
import time
pipeline = KPipeline(lang_code="a")
VOICES = [
"af_heart", "af_bella", "af_nicole",
"am_adam", "am_michael",
"bf_emma", "bm_george"
]
SR = 24000
def tts_stream(text, voice):
text = (text or "").strip()
if not text:
yield None, None, 0, "Please enter text."
return
# Split text into smaller chunks for progress-based streaming
# Helps prevent 60β90s stall timeout
sentences = text.split(". ")
total = len(sentences)
audio_chunks = []
for i, sentence in enumerate(sentences):
if not sentence.strip():
continue
# Run Kokoro on the chunk
gen = pipeline(sentence, voice=voice)
for (gs, ps, audio) in gen:
audio = np.asarray(audio, dtype=np.float32)
audio_chunks.append(audio)
# Progress streaming to UI every chunk
progress = int((i + 1) / total * 100)
yield None, None, progress, f"Processing chunk {i+1}/{total}..."
# HuggingFace anti-timeout heartbeat
time.sleep(0.1)
# Combine all audio into one file
final_audio = np.concatenate(audio_chunks)
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
sf.write(tmp.name, final_audio, SR)
yield (SR, final_audio), tmp.name, 100, "Completed!"
with gr.Blocks(title="Kokoro TTS (No Timeout)") as demo:
gr.Markdown("## β‘ Kokoro TTS β Unlimited Length + Safe From Timeout + Progress Bar")
text = gr.Textbox(lines=12, label="Input text")
voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")
audio_output = gr.Audio(label="Audio Output")
file_download = gr.File(label="Download WAV")
progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False)
status = gr.Textbox(label="Status", interactive=False)
run_btn = gr.Button("Generate")
run_btn.click(
fn=tts_stream,
inputs=[text, voice],
outputs=[audio_output, file_download, progress, status],
)
demo.launch()
|