Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,54 +1,63 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
-
|
| 3 |
-
import
|
|
|
|
| 4 |
|
|
|
|
| 5 |
|
| 6 |
-
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
-
def
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
| 16 |
|
| 17 |
try:
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
"af_bella",
|
| 28 |
-
"af_nicole",
|
| 29 |
-
"am_adam",
|
| 30 |
-
"am_michael",
|
| 31 |
-
"bf_emma",
|
| 32 |
-
"bm_george"
|
| 33 |
-
]
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
|
| 39 |
-
with gr.Row():
|
| 40 |
-
txt = gr.Textbox(lines=4, label="Input Text")
|
| 41 |
-
voice_select = gr.Dropdown(voices, value="af_heart", label="Voice")
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
-
|
|
|
|
| 47 |
|
| 48 |
-
btn.
|
| 49 |
-
|
| 50 |
-
inputs=[txt, voice_select],
|
| 51 |
-
outputs=[audio_out, status]
|
| 52 |
-
)
|
| 53 |
|
| 54 |
-
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
import gradio as gr
|
| 3 |
+
import tempfile
|
| 4 |
+
import soundfile as sf
|
| 5 |
+
import numpy as np
|
| 6 |
|
| 7 |
+
from kokoro import KPipeline # correct import
|
| 8 |
|
| 9 |
+
# Initialize pipeline once on startup.
|
| 10 |
+
# lang_code: 'a' => American English, 'b' => British English, etc. See README for mapping.
|
| 11 |
+
pipeline = KPipeline(lang_code="a") # choose lang_code that matches the voice prefix
|
| 12 |
|
| 13 |
+
# Example voices (prefix letter indicates language family)
|
| 14 |
+
VOICES = [
|
| 15 |
+
"af_heart", "af_bella", "af_nicole", # a* = american-ish voices
|
| 16 |
+
"am_adam", "am_michael",
|
| 17 |
+
"bf_emma", "bm_george" # b* = british-ish voices
|
| 18 |
+
]
|
| 19 |
|
| 20 |
|
| 21 |
+
def synthesize_to_file(text: str, voice: str = "af_heart"):
|
| 22 |
+
"""Run kokoro pipeline and write first generated audio to a temporary wav file."""
|
| 23 |
+
text = (text or "").strip()
|
| 24 |
+
if not text:
|
| 25 |
+
return None, "Please enter text."
|
| 26 |
|
| 27 |
try:
|
| 28 |
+
gen = pipeline(text, voice=voice) # generator yielding (gs, ps, audio)
|
| 29 |
+
# take the first item produced
|
| 30 |
+
item = next(gen, None)
|
| 31 |
+
if item is None:
|
| 32 |
+
return None, "Kokoro returned no audio."
|
| 33 |
|
| 34 |
+
gs, ps, audio = item # gs: generation metadata, ps: phonemes, audio: numpy float32
|
| 35 |
+
# Kokoro audio sample rate is 24000
|
| 36 |
+
sr = 24000
|
| 37 |
|
| 38 |
+
# Ensure numpy array dtype is float32
|
| 39 |
+
audio = np.asarray(audio, dtype=np.float32)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
+
# Write to temporary wav file and return its path (Gradio can serve file paths)
|
| 42 |
+
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
| 43 |
+
sf.write(tmp.name, audio, sr, format="WAV")
|
| 44 |
+
return tmp.name, f"Success β generated {len(audio)} samples @ {sr}Hz."
|
| 45 |
|
| 46 |
+
except Exception as e:
|
| 47 |
+
return None, f"Error: {e}"
|
| 48 |
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
with gr.Blocks(title="Kokoro TTS (Gradio)") as demo:
|
| 51 |
+
gr.Markdown("## Kokoro-82M β Text β Speech (Gradio)")
|
| 52 |
+
with gr.Row():
|
| 53 |
+
txt = gr.Textbox(lines=4, placeholder="Type text to synthesize...", label="Input text")
|
| 54 |
+
voice = gr.Dropdown(choices=VOICES, value=VOICES[0], label="Voice")
|
| 55 |
|
| 56 |
+
out_audio = gr.Audio(label="Generated audio (wav file)")
|
| 57 |
+
status = gr.Textbox(label="Status", interactive=False)
|
| 58 |
|
| 59 |
+
btn = gr.Button("Generate")
|
| 60 |
+
btn.click(fn=synthesize_to_file, inputs=[txt, voice], outputs=[out_audio, status])
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
+
if __name__ == "__main__":
|
| 63 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|