Spaces:

heerjtdev
/

koko

Sleeping

App Files Files Community

Update app.py

by iammraat - opened 25 days ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+48

-202

Files changed (1) hide show

app.py +48 -202

app.py CHANGED Viewed

@@ -1,184 +1,19 @@
-# # app.py
-# import gradio as gr
-# import tempfile
-# import soundfile as sf
-# import numpy as np
-# from kokoro import KPipeline  # correct import
-# # Initialize pipeline once on startup.
-# # lang_code: 'a' => American English, 'b' => British English, etc. See README for mapping.
-# pipeline = KPipeline(lang_code="a")  # choose lang_code that matches the voice prefix
-# # Example voices (prefix letter indicates language family)
-# VOICES = [
-#     "af_heart", "af_bella", "af_nicole",     # a* = american-ish voices
-#     "am_adam", "am_michael",
-#     "bf_emma", "bm_george"                  # b* = british-ish voices
-# ]
-# def synthesize_to_file(text: str, voice: str = "af_heart"):
-#     """Run kokoro pipeline and write first generated audio to a temporary wav file."""
-#     text = (text or "").strip()
-#     if not text:
-#         return None, "Please enter text."
-#     try:
-#         gen = pipeline(text, voice=voice)  # generator yielding (gs, ps, audio)
-#         # take the first item produced
-#         item = next(gen, None)
-#         if item is None:
-#             return None, "Kokoro returned no audio."
-#         gs, ps, audio = item  # gs: generation metadata, ps: phonemes, audio: numpy float32
-#         # Kokoro audio sample rate is 24000
-#         sr = 24000
-#         # Ensure numpy array dtype is float32
-#         audio = np.asarray(audio, dtype=np.float32)
-#         # Write to temporary wav file and return its path (Gradio can serve file paths)
-#         tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-#         sf.write(tmp.name, audio, sr, format="WAV")
-#         return tmp.name, f"Success — generated {len(audio)} samples @ {sr}Hz."
-#     except Exception as e:
-#         return None, f"Error: {e}"
-# with gr.Blocks(title="Kokoro TTS (Gradio)") as demo:
-#     gr.Markdown("## Kokoro-82M — Text → Speech (Gradio)")
-#     with gr.Row():
-#         txt = gr.Textbox(lines=4, placeholder="Type text to synthesize...", label="Input text")
-#         voice = gr.Dropdown(choices=VOICES, value=VOICES[0], label="Voice")
-#     out_audio = gr.Audio(label="Generated audio (wav file)")
-#     status = gr.Textbox(label="Status", interactive=False)
-#     btn = gr.Button("Generate")
-#     btn.click(fn=synthesize_to_file, inputs=[txt, voice], outputs=[out_audio, status])
-# if __name__ == "__main__":
-#     demo.launch(server_name="0.0.0.0", server_port=7860)
-# import gradio as gr
-# import tempfile
-# import soundfile as sf
-# import numpy as np
-# from kokoro import KPipeline
-# pipeline = KPipeline(lang_code="a")
-# VOICES = [
-#     "af_heart", "af_bella", "af_nicole",
-#     "am_adam", "am_michael",
-#     "bf_emma", "bm_george"
-# ]
-# SR = 24000  # Kokoro standard sample rate
-# def generate_full_audio(text, voice):
-#     text = (text or "").strip()
-#     if not text:
-#         return None, None, "Please enter text."
-#     try:
-#         # Kokoro returns a generator over chunks
-#         gen = pipeline(text, voice=voice)
-#         audio_chunks = []
-#         # Collect *all* audio chunks (fixes 6-second problem)
-#         for (gs, ps, audio) in gen:
-#             audio_chunks.append(np.asarray(audio, dtype=np.float32))
-#         if not audio_chunks:
-#             return None, None, "No audio produced."
-#         # Concatenate all chunks into one continuous waveform
-#         final_audio = np.concatenate(audio_chunks)
-#         # Save to WAV for download
-#         tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-#         sf.write(tmp.name, final_audio, SR)
-#         return (SR, final_audio), tmp.name, f"Generated {len(final_audio)/SR:.2f} seconds of audio."
-#     except Exception as e:
-#         return None, None, f"Error: {e}"
-# with gr.Blocks(title="Kokoro Unlimited TTS") as demo:
-#     gr.Markdown("## 🎧 Kokoro TTS — Unlimited Text, Downloadable Audio")
-#     with gr.Row():
-#         txt = gr.Textbox(
-#             lines=10,
-#             label="Input Text (no length limit)",
-#             placeholder="Paste long text here...",
-#         )
-#         voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")
-#     audio_out = gr.Audio(label="Generated Audio")
-#     download_out = gr.File(label="Download Audio (.wav)")
-#     status = gr.Textbox(label="Status", interactive=False)
-#     generate_btn = gr.Button("Generate")
-#     generate_btn.click(
-#         fn=generate_full_audio,
-#         inputs=[txt, voice],
-#         outputs=[audio_out, download_out, status]
-#     )
-# demo.launch()
 import gradio as gr
 import tempfile
 import soundfile as sf
 import numpy as np
 from kokoro import KPipeline
 import time
 pipeline = KPipeline(lang_code="a")
@@ -190,58 +25,71 @@ VOICES = [
 SR = 24000
 def tts_stream(text, voice):
     text = (text or "").strip()
     if not text:
         yield None, None, 0, "Please enter text."
         return
-    # Split text into smaller chunks for progress-based streaming
-    # Helps prevent 60–90s stall timeout
-    sentences = text.split(". ")
     total = len(sentences)
     audio_chunks = []
     for i, sentence in enumerate(sentences):
         if not sentence.strip():
             continue
-        # Run Kokoro on the chunk
         gen = pipeline(sentence, voice=voice)
         for (gs, ps, audio) in gen:
             audio = np.asarray(audio, dtype=np.float32)
             audio_chunks.append(audio)
-        # Progress streaming to UI every chunk
         progress = int((i + 1) / total * 100)
-        yield None, None, progress, f"Processing chunk {i+1}/{total}..."
-        # HuggingFace anti-timeout heartbeat
-        time.sleep(0.1)
-    # Combine all audio into one file
-    final_audio = np.concatenate(audio_chunks)
     tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
     sf.write(tmp.name, final_audio, SR)
     yield (SR, final_audio), tmp.name, 100, "Completed!"
-with gr.Blocks(title="Kokoro TTS (No Timeout)") as demo:
-    gr.Markdown("## ⚡ Kokoro TTS – Unlimited Length + Safe From Timeout + Progress Bar")
-    text = gr.Textbox(lines=12, label="Input text")
-    voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")
-    audio_output = gr.Audio(label="Audio Output")
-    file_download = gr.File(label="Download WAV")
-    progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False)
-    status = gr.Textbox(label="Status", interactive=False)
-    run_btn = gr.Button("Generate")
     run_btn.click(
         fn=tts_stream,
@@ -249,6 +97,4 @@ with gr.Blocks(title="Kokoro TTS (No Timeout)") as demo:
         outputs=[audio_output, file_download, progress, status],
     )
-demo.launch()

 import gradio as gr
 import tempfile
 import soundfile as sf
 import numpy as np
 from kokoro import KPipeline
 import time
+import nltk
+# Download the necessary NLTK data for sentence splitting
+try:
+    nltk.data.find('tokenizers/punkt_tab')
+except LookupError:
+    nltk.download('punkt_tab')
+    nltk.download('punkt')
+from nltk.tokenize import sent_tokenize
 pipeline = KPipeline(lang_code="a")
 SR = 24000
 def tts_stream(text, voice):
     text = (text or "").strip()
     if not text:
         yield None, None, 0, "Please enter text."
         return
+    # --- IMPROVEMENT HERE ---
+    # Use NLTK to split text into linguistically correct sentences.
+    # This handles "Dr.", "Mr.", "?", "!", and quotes correctly.
+    sentences = sent_tokenize(text)
     total = len(sentences)
     audio_chunks = []
+    # Initialize an empty array for the concatenated audio
+    full_audio = np.array([], dtype=np.float32)
+    print(f"Split into {total} sentences.")
     for i, sentence in enumerate(sentences):
         if not sentence.strip():
             continue
+        # Run Kokoro on the specific sentence
         gen = pipeline(sentence, voice=voice)
+        # Kokoro returns a generator, we grab the audio from it
         for (gs, ps, audio) in gen:
             audio = np.asarray(audio, dtype=np.float32)
             audio_chunks.append(audio)
+        # Progress streaming to UI
         progress = int((i + 1) / total * 100)
+        yield None, None, progress, f"Processing sentence {i+1}/{total}..."
+        # Anti-timeout heartbeat
+        time.sleep(0.05)
+    if audio_chunks:
+        final_audio = np.concatenate(audio_chunks)
+    else:
+        final_audio = np.array([], dtype=np.float32)
+    # Write to a temp file for the download button
     tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
     sf.write(tmp.name, final_audio, SR)
+    # Return the audio to the player and the file for download
     yield (SR, final_audio), tmp.name, 100, "Completed!"
+with gr.Blocks(title="Kokoro TTS (Smart Split)") as demo:
+    gr.Markdown("## ⚡ Kokoro TTS – Smart Sentence Splitting")
+    with gr.Row():
+        with gr.Column():
+            text = gr.Textbox(lines=12, label="Input text", placeholder="Paste long text here...")
+            voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")
+            run_btn = gr.Button("Generate", variant="primary")
+        with gr.Column():
+            audio_output = gr.Audio(label="Audio Output", interactive=False)
+            file_download = gr.File(label="Download WAV")
+            progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False)
+            status = gr.Textbox(label="Status", interactive=False)
     run_btn.click(
         fn=tts_stream,
         outputs=[audio_output, file_download, progress, status],
     )
+demo.queue().launch()