Spaces:

heerjtdev
/

koko

Sleeping

App Files Files Community

heerjtdev commited on Dec 4, 2025

Commit

1e48f34

verified ·

1 Parent(s): dff9996

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -37

app.py CHANGED Viewed

@@ -1,54 +1,63 @@
 import gradio as gr
-from kokoro import Kokoro
-import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print("🔄 Loading Kokoro model...")
-model = Kokoro(device=device)
-print("✅ Kokoro loaded!")
-def generate_audio(text, voice):
-    if not text.strip():
-        return None, "Please enter some text."
     try:
-        print("🎤 Generating...")
-        audio = model.tts(text, voice=voice)     # numpy float32 array
-        return (24000, audio), "Success!"
-    except Exception as e:
-        return None, f"Error: {str(e)}"
-voices = [
-    "af_heart",
-    "af_bella",
-    "af_nicole",
-    "am_adam",
-    "am_michael",
-    "bf_emma",
-    "bm_george"
-]
-with gr.Blocks(title="Kokoro-TTS") as demo:
-    gr.Markdown("## 🎧 Kokoro Text → Speech")
-    with gr.Row():
-        txt = gr.Textbox(lines=4, label="Input Text")
-        voice_select = gr.Dropdown(voices, value="af_heart", label="Voice")
-    audio_out = gr.Audio(label="Generated Audio")
-    status = gr.Textbox(label="Status")
-    btn = gr.Button("Generate Audio")
-    btn.click(
-        fn=generate_audio,
-        inputs=[txt, voice_select],
-        outputs=[audio_out, status]
-    )
-demo.launch()

+# app.py
 import gradio as gr
+import tempfile
+import soundfile as sf
+import numpy as np
+from kokoro import KPipeline  # correct import
+# Initialize pipeline once on startup.
+# lang_code: 'a' => American English, 'b' => British English, etc. See README for mapping.
+pipeline = KPipeline(lang_code="a")  # choose lang_code that matches the voice prefix
+# Example voices (prefix letter indicates language family)
+VOICES = [
+    "af_heart", "af_bella", "af_nicole",     # a* = american-ish voices
+    "am_adam", "am_michael",
+    "bf_emma", "bm_george"                  # b* = british-ish voices
+]
+def synthesize_to_file(text: str, voice: str = "af_heart"):
+    """Run kokoro pipeline and write first generated audio to a temporary wav file."""
+    text = (text or "").strip()
+    if not text:
+        return None, "Please enter text."
     try:
+        gen = pipeline(text, voice=voice)  # generator yielding (gs, ps, audio)
+        # take the first item produced
+        item = next(gen, None)
+        if item is None:
+            return None, "Kokoro returned no audio."
+        gs, ps, audio = item  # gs: generation metadata, ps: phonemes, audio: numpy float32
+        # Kokoro audio sample rate is 24000
+        sr = 24000
+        # Ensure numpy array dtype is float32
+        audio = np.asarray(audio, dtype=np.float32)
+        # Write to temporary wav file and return its path (Gradio can serve file paths)
+        tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        sf.write(tmp.name, audio, sr, format="WAV")
+        return tmp.name, f"Success — generated {len(audio)} samples @ {sr}Hz."
+    except Exception as e:
+        return None, f"Error: {e}"
+with gr.Blocks(title="Kokoro TTS (Gradio)") as demo:
+    gr.Markdown("## Kokoro-82M — Text → Speech (Gradio)")
+    with gr.Row():
+        txt = gr.Textbox(lines=4, placeholder="Type text to synthesize...", label="Input text")
+        voice = gr.Dropdown(choices=VOICES, value=VOICES[0], label="Voice")
+    out_audio = gr.Audio(label="Generated audio (wav file)")
+    status = gr.Textbox(label="Status", interactive=False)
+    btn = gr.Button("Generate")
+    btn.click(fn=synthesize_to_file, inputs=[txt, voice], outputs=[out_audio, status])
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)