Spaces:
Sleeping
Sleeping
| # # app.py | |
| # import gradio as gr | |
| # import tempfile | |
| # import soundfile as sf | |
| # import numpy as np | |
| # from kokoro import KPipeline # correct import | |
| # # Initialize pipeline once on startup. | |
| # # lang_code: 'a' => American English, 'b' => British English, etc. See README for mapping. | |
| # pipeline = KPipeline(lang_code="a") # choose lang_code that matches the voice prefix | |
| # # Example voices (prefix letter indicates language family) | |
| # VOICES = [ | |
| # "af_heart", "af_bella", "af_nicole", # a* = american-ish voices | |
| # "am_adam", "am_michael", | |
| # "bf_emma", "bm_george" # b* = british-ish voices | |
| # ] | |
| # def synthesize_to_file(text: str, voice: str = "af_heart"): | |
| # """Run kokoro pipeline and write first generated audio to a temporary wav file.""" | |
| # text = (text or "").strip() | |
| # if not text: | |
| # return None, "Please enter text." | |
| # try: | |
| # gen = pipeline(text, voice=voice) # generator yielding (gs, ps, audio) | |
| # # take the first item produced | |
| # item = next(gen, None) | |
| # if item is None: | |
| # return None, "Kokoro returned no audio." | |
| # gs, ps, audio = item # gs: generation metadata, ps: phonemes, audio: numpy float32 | |
| # # Kokoro audio sample rate is 24000 | |
| # sr = 24000 | |
| # # Ensure numpy array dtype is float32 | |
| # audio = np.asarray(audio, dtype=np.float32) | |
| # # Write to temporary wav file and return its path (Gradio can serve file paths) | |
| # tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
| # sf.write(tmp.name, audio, sr, format="WAV") | |
| # return tmp.name, f"Success β generated {len(audio)} samples @ {sr}Hz." | |
| # except Exception as e: | |
| # return None, f"Error: {e}" | |
| # with gr.Blocks(title="Kokoro TTS (Gradio)") as demo: | |
| # gr.Markdown("## Kokoro-82M β Text β Speech (Gradio)") | |
| # with gr.Row(): | |
| # txt = gr.Textbox(lines=4, placeholder="Type text to synthesize...", label="Input text") | |
| # voice = gr.Dropdown(choices=VOICES, value=VOICES[0], label="Voice") | |
| # out_audio = gr.Audio(label="Generated audio (wav file)") | |
| # status = gr.Textbox(label="Status", interactive=False) | |
| # btn = gr.Button("Generate") | |
| # btn.click(fn=synthesize_to_file, inputs=[txt, voice], outputs=[out_audio, status]) | |
| # if __name__ == "__main__": | |
| # demo.launch(server_name="0.0.0.0", server_port=7860) | |
| # import gradio as gr | |
| # import tempfile | |
| # import soundfile as sf | |
| # import numpy as np | |
| # from kokoro import KPipeline | |
| # pipeline = KPipeline(lang_code="a") | |
| # VOICES = [ | |
| # "af_heart", "af_bella", "af_nicole", | |
| # "am_adam", "am_michael", | |
| # "bf_emma", "bm_george" | |
| # ] | |
| # SR = 24000 # Kokoro standard sample rate | |
| # def generate_full_audio(text, voice): | |
| # text = (text or "").strip() | |
| # if not text: | |
| # return None, None, "Please enter text." | |
| # try: | |
| # # Kokoro returns a generator over chunks | |
| # gen = pipeline(text, voice=voice) | |
| # audio_chunks = [] | |
| # # Collect *all* audio chunks (fixes 6-second problem) | |
| # for (gs, ps, audio) in gen: | |
| # audio_chunks.append(np.asarray(audio, dtype=np.float32)) | |
| # if not audio_chunks: | |
| # return None, None, "No audio produced." | |
| # # Concatenate all chunks into one continuous waveform | |
| # final_audio = np.concatenate(audio_chunks) | |
| # # Save to WAV for download | |
| # tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
| # sf.write(tmp.name, final_audio, SR) | |
| # return (SR, final_audio), tmp.name, f"Generated {len(final_audio)/SR:.2f} seconds of audio." | |
| # except Exception as e: | |
| # return None, None, f"Error: {e}" | |
| # with gr.Blocks(title="Kokoro Unlimited TTS") as demo: | |
| # gr.Markdown("## π§ Kokoro TTS β Unlimited Text, Downloadable Audio") | |
| # with gr.Row(): | |
| # txt = gr.Textbox( | |
| # lines=10, | |
| # label="Input Text (no length limit)", | |
| # placeholder="Paste long text here...", | |
| # ) | |
| # voice = gr.Dropdown(VOICES, value="af_heart", label="Voice") | |
| # audio_out = gr.Audio(label="Generated Audio") | |
| # download_out = gr.File(label="Download Audio (.wav)") | |
| # status = gr.Textbox(label="Status", interactive=False) | |
| # generate_btn = gr.Button("Generate") | |
| # generate_btn.click( | |
| # fn=generate_full_audio, | |
| # inputs=[txt, voice], | |
| # outputs=[audio_out, download_out, status] | |
| # ) | |
| # demo.launch() | |
| import gradio as gr | |
| import tempfile | |
| import soundfile as sf | |
| import numpy as np | |
| from kokoro import KPipeline | |
| import time | |
| pipeline = KPipeline(lang_code="a") | |
| VOICES = [ | |
| "af_heart", "af_bella", "af_nicole", | |
| "am_adam", "am_michael", | |
| "bf_emma", "bm_george" | |
| ] | |
| SR = 24000 | |
| def tts_stream(text, voice): | |
| text = (text or "").strip() | |
| if not text: | |
| yield None, None, 0, "Please enter text." | |
| return | |
| # Split text into smaller chunks for progress-based streaming | |
| # Helps prevent 60β90s stall timeout | |
| sentences = text.split(". ") | |
| total = len(sentences) | |
| audio_chunks = [] | |
| for i, sentence in enumerate(sentences): | |
| if not sentence.strip(): | |
| continue | |
| # Run Kokoro on the chunk | |
| gen = pipeline(sentence, voice=voice) | |
| for (gs, ps, audio) in gen: | |
| audio = np.asarray(audio, dtype=np.float32) | |
| audio_chunks.append(audio) | |
| # Progress streaming to UI every chunk | |
| progress = int((i + 1) / total * 100) | |
| yield None, None, progress, f"Processing chunk {i+1}/{total}..." | |
| # HuggingFace anti-timeout heartbeat | |
| time.sleep(0.1) | |
| # Combine all audio into one file | |
| final_audio = np.concatenate(audio_chunks) | |
| tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) | |
| sf.write(tmp.name, final_audio, SR) | |
| yield (SR, final_audio), tmp.name, 100, "Completed!" | |
| with gr.Blocks(title="Kokoro TTS (No Timeout)") as demo: | |
| gr.Markdown("## β‘ Kokoro TTS β Unlimited Length + Safe From Timeout + Progress Bar") | |
| text = gr.Textbox(lines=12, label="Input text") | |
| voice = gr.Dropdown(VOICES, value="af_heart", label="Voice") | |
| audio_output = gr.Audio(label="Audio Output") | |
| file_download = gr.File(label="Download WAV") | |
| progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False) | |
| status = gr.Textbox(label="Status", interactive=False) | |
| run_btn = gr.Button("Generate") | |
| run_btn.click( | |
| fn=tts_stream, | |
| inputs=[text, voice], | |
| outputs=[audio_output, file_download, progress, status], | |
| ) | |
| demo.launch() | |