Spaces:

heerjtdev
/

koko

Sleeping

File size: 6,680 Bytes

b3f30bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3bb4d23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77dc7d0
1e48f34
 
 
b3f30bd
3bb4d23
77dc7d0
b3f30bd
77dc7d0
1e48f34
b3f30bd
1e48f34
b3f30bd
1e48f34
77dc7d0
3bb4d23
77dc7d0
b3f30bd
3bb4d23
1e48f34
 
3bb4d23
 
77dc7d0
3bb4d23
 
 
 
 
b3f30bd
3bb4d23
 
 
 
 
 
77dc7d0
b3f30bd
3bb4d23
 
77dc7d0
3bb4d23
 
 
77dc7d0
3bb4d23
 
b3f30bd
3bb4d23
 
b3f30bd
3bb4d23
 
dff9996
3bb4d23
77dc7d0
dff9996
3bb4d23
 
b3f30bd
3bb4d23
 
77dc7d0
3bb4d23
 
 
1e48f34
77dc7d0
3bb4d23
b3f30bd
3bb4d23
 
 
 
b3f30bd
 
 
77dc7d0
3bb4d23

# # app.py
# import gradio as gr
# import tempfile
# import soundfile as sf
# import numpy as np

# from kokoro import KPipeline  # correct import

# # Initialize pipeline once on startup.
# # lang_code: 'a' => American English, 'b' => British English, etc. See README for mapping.
# pipeline = KPipeline(lang_code="a")  # choose lang_code that matches the voice prefix

# # Example voices (prefix letter indicates language family)
# VOICES = [
#     "af_heart", "af_bella", "af_nicole",     # a* = american-ish voices
#     "am_adam", "am_michael",
#     "bf_emma", "bm_george"                  # b* = british-ish voices
# ]


# def synthesize_to_file(text: str, voice: str = "af_heart"):
#     """Run kokoro pipeline and write first generated audio to a temporary wav file."""
#     text = (text or "").strip()
#     if not text:
#         return None, "Please enter text."

#     try:
#         gen = pipeline(text, voice=voice)  # generator yielding (gs, ps, audio)
#         # take the first item produced
#         item = next(gen, None)
#         if item is None:
#             return None, "Kokoro returned no audio."

#         gs, ps, audio = item  # gs: generation metadata, ps: phonemes, audio: numpy float32
#         # Kokoro audio sample rate is 24000
#         sr = 24000

#         # Ensure numpy array dtype is float32
#         audio = np.asarray(audio, dtype=np.float32)

#         # Write to temporary wav file and return its path (Gradio can serve file paths)
#         tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
#         sf.write(tmp.name, audio, sr, format="WAV")
#         return tmp.name, f"Success — generated {len(audio)} samples @ {sr}Hz."

#     except Exception as e:
#         return None, f"Error: {e}"


# with gr.Blocks(title="Kokoro TTS (Gradio)") as demo:
#     gr.Markdown("## Kokoro-82M — Text → Speech (Gradio)")
#     with gr.Row():
#         txt = gr.Textbox(lines=4, placeholder="Type text to synthesize...", label="Input text")
#         voice = gr.Dropdown(choices=VOICES, value=VOICES[0], label="Voice")

#     out_audio = gr.Audio(label="Generated audio (wav file)")
#     status = gr.Textbox(label="Status", interactive=False)

#     btn = gr.Button("Generate")
#     btn.click(fn=synthesize_to_file, inputs=[txt, voice], outputs=[out_audio, status])

# if __name__ == "__main__":
#     demo.launch(server_name="0.0.0.0", server_port=7860)
























# import gradio as gr
# import tempfile
# import soundfile as sf
# import numpy as np
# from kokoro import KPipeline

# pipeline = KPipeline(lang_code="a")

# VOICES = [
#     "af_heart", "af_bella", "af_nicole",
#     "am_adam", "am_michael",
#     "bf_emma", "bm_george"
# ]

# SR = 24000  # Kokoro standard sample rate


# def generate_full_audio(text, voice):
#     text = (text or "").strip()
#     if not text:
#         return None, None, "Please enter text."

#     try:
#         # Kokoro returns a generator over chunks
#         gen = pipeline(text, voice=voice)

#         audio_chunks = []

#         # Collect *all* audio chunks (fixes 6-second problem)
#         for (gs, ps, audio) in gen:
#             audio_chunks.append(np.asarray(audio, dtype=np.float32))

#         if not audio_chunks:
#             return None, None, "No audio produced."

#         # Concatenate all chunks into one continuous waveform
#         final_audio = np.concatenate(audio_chunks)

#         # Save to WAV for download
#         tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
#         sf.write(tmp.name, final_audio, SR)

#         return (SR, final_audio), tmp.name, f"Generated {len(final_audio)/SR:.2f} seconds of audio."

#     except Exception as e:
#         return None, None, f"Error: {e}"


# with gr.Blocks(title="Kokoro Unlimited TTS") as demo:
#     gr.Markdown("## 🎧 Kokoro TTS — Unlimited Text, Downloadable Audio")

#     with gr.Row():
#         txt = gr.Textbox(
#             lines=10,
#             label="Input Text (no length limit)",
#             placeholder="Paste long text here...",
#         )
#         voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")

#     audio_out = gr.Audio(label="Generated Audio")
#     download_out = gr.File(label="Download Audio (.wav)")
#     status = gr.Textbox(label="Status", interactive=False)

#     generate_btn = gr.Button("Generate")

#     generate_btn.click(
#         fn=generate_full_audio,
#         inputs=[txt, voice],
#         outputs=[audio_out, download_out, status]
#     )

# demo.launch()
















import gradio as gr
import tempfile
import soundfile as sf
import numpy as np
from kokoro import KPipeline
import time

pipeline = KPipeline(lang_code="a")

VOICES = [
    "af_heart", "af_bella", "af_nicole",
    "am_adam", "am_michael",
    "bf_emma", "bm_george"
]

SR = 24000


def tts_stream(text, voice):
    text = (text or "").strip()
    if not text:
        yield None, None, 0, "Please enter text."
        return

    # Split text into smaller chunks for progress-based streaming
    # Helps prevent 60–90s stall timeout
    sentences = text.split(". ")
    total = len(sentences)
    audio_chunks = []

    for i, sentence in enumerate(sentences):
        if not sentence.strip():
            continue

        # Run Kokoro on the chunk
        gen = pipeline(sentence, voice=voice)

        for (gs, ps, audio) in gen:
            audio = np.asarray(audio, dtype=np.float32)
            audio_chunks.append(audio)

        # Progress streaming to UI every chunk
        progress = int((i + 1) / total * 100)
        yield None, None, progress, f"Processing chunk {i+1}/{total}..."

        # HuggingFace anti-timeout heartbeat
        time.sleep(0.1)

    # Combine all audio into one file
    final_audio = np.concatenate(audio_chunks)

    tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
    sf.write(tmp.name, final_audio, SR)

    yield (SR, final_audio), tmp.name, 100, "Completed!"


with gr.Blocks(title="Kokoro TTS (No Timeout)") as demo:
    gr.Markdown("## ⚡ Kokoro TTS – Unlimited Length + Safe From Timeout + Progress Bar")

    text = gr.Textbox(lines=12, label="Input text")
    voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")

    audio_output = gr.Audio(label="Audio Output")
    file_download = gr.File(label="Download WAV")
    progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False)
    status = gr.Textbox(label="Status", interactive=False)

    run_btn = gr.Button("Generate")

    run_btn.click(
        fn=tts_stream,
        inputs=[text, voice],
        outputs=[audio_output, file_download, progress, status],
    )

demo.launch()