koko / app.py
heerjtdev's picture
Update app.py
3bb4d23 verified
# # app.py
# import gradio as gr
# import tempfile
# import soundfile as sf
# import numpy as np
# from kokoro import KPipeline # correct import
# # Initialize pipeline once on startup.
# # lang_code: 'a' => American English, 'b' => British English, etc. See README for mapping.
# pipeline = KPipeline(lang_code="a") # choose lang_code that matches the voice prefix
# # Example voices (prefix letter indicates language family)
# VOICES = [
# "af_heart", "af_bella", "af_nicole", # a* = american-ish voices
# "am_adam", "am_michael",
# "bf_emma", "bm_george" # b* = british-ish voices
# ]
# def synthesize_to_file(text: str, voice: str = "af_heart"):
# """Run kokoro pipeline and write first generated audio to a temporary wav file."""
# text = (text or "").strip()
# if not text:
# return None, "Please enter text."
# try:
# gen = pipeline(text, voice=voice) # generator yielding (gs, ps, audio)
# # take the first item produced
# item = next(gen, None)
# if item is None:
# return None, "Kokoro returned no audio."
# gs, ps, audio = item # gs: generation metadata, ps: phonemes, audio: numpy float32
# # Kokoro audio sample rate is 24000
# sr = 24000
# # Ensure numpy array dtype is float32
# audio = np.asarray(audio, dtype=np.float32)
# # Write to temporary wav file and return its path (Gradio can serve file paths)
# tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
# sf.write(tmp.name, audio, sr, format="WAV")
# return tmp.name, f"Success β€” generated {len(audio)} samples @ {sr}Hz."
# except Exception as e:
# return None, f"Error: {e}"
# with gr.Blocks(title="Kokoro TTS (Gradio)") as demo:
# gr.Markdown("## Kokoro-82M β€” Text β†’ Speech (Gradio)")
# with gr.Row():
# txt = gr.Textbox(lines=4, placeholder="Type text to synthesize...", label="Input text")
# voice = gr.Dropdown(choices=VOICES, value=VOICES[0], label="Voice")
# out_audio = gr.Audio(label="Generated audio (wav file)")
# status = gr.Textbox(label="Status", interactive=False)
# btn = gr.Button("Generate")
# btn.click(fn=synthesize_to_file, inputs=[txt, voice], outputs=[out_audio, status])
# if __name__ == "__main__":
# demo.launch(server_name="0.0.0.0", server_port=7860)
# import gradio as gr
# import tempfile
# import soundfile as sf
# import numpy as np
# from kokoro import KPipeline
# pipeline = KPipeline(lang_code="a")
# VOICES = [
# "af_heart", "af_bella", "af_nicole",
# "am_adam", "am_michael",
# "bf_emma", "bm_george"
# ]
# SR = 24000 # Kokoro standard sample rate
# def generate_full_audio(text, voice):
# text = (text or "").strip()
# if not text:
# return None, None, "Please enter text."
# try:
# # Kokoro returns a generator over chunks
# gen = pipeline(text, voice=voice)
# audio_chunks = []
# # Collect *all* audio chunks (fixes 6-second problem)
# for (gs, ps, audio) in gen:
# audio_chunks.append(np.asarray(audio, dtype=np.float32))
# if not audio_chunks:
# return None, None, "No audio produced."
# # Concatenate all chunks into one continuous waveform
# final_audio = np.concatenate(audio_chunks)
# # Save to WAV for download
# tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
# sf.write(tmp.name, final_audio, SR)
# return (SR, final_audio), tmp.name, f"Generated {len(final_audio)/SR:.2f} seconds of audio."
# except Exception as e:
# return None, None, f"Error: {e}"
# with gr.Blocks(title="Kokoro Unlimited TTS") as demo:
# gr.Markdown("## 🎧 Kokoro TTS β€” Unlimited Text, Downloadable Audio")
# with gr.Row():
# txt = gr.Textbox(
# lines=10,
# label="Input Text (no length limit)",
# placeholder="Paste long text here...",
# )
# voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")
# audio_out = gr.Audio(label="Generated Audio")
# download_out = gr.File(label="Download Audio (.wav)")
# status = gr.Textbox(label="Status", interactive=False)
# generate_btn = gr.Button("Generate")
# generate_btn.click(
# fn=generate_full_audio,
# inputs=[txt, voice],
# outputs=[audio_out, download_out, status]
# )
# demo.launch()
import gradio as gr
import tempfile
import soundfile as sf
import numpy as np
from kokoro import KPipeline
import time
pipeline = KPipeline(lang_code="a")
VOICES = [
"af_heart", "af_bella", "af_nicole",
"am_adam", "am_michael",
"bf_emma", "bm_george"
]
SR = 24000
def tts_stream(text, voice):
text = (text or "").strip()
if not text:
yield None, None, 0, "Please enter text."
return
# Split text into smaller chunks for progress-based streaming
# Helps prevent 60–90s stall timeout
sentences = text.split(". ")
total = len(sentences)
audio_chunks = []
for i, sentence in enumerate(sentences):
if not sentence.strip():
continue
# Run Kokoro on the chunk
gen = pipeline(sentence, voice=voice)
for (gs, ps, audio) in gen:
audio = np.asarray(audio, dtype=np.float32)
audio_chunks.append(audio)
# Progress streaming to UI every chunk
progress = int((i + 1) / total * 100)
yield None, None, progress, f"Processing chunk {i+1}/{total}..."
# HuggingFace anti-timeout heartbeat
time.sleep(0.1)
# Combine all audio into one file
final_audio = np.concatenate(audio_chunks)
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
sf.write(tmp.name, final_audio, SR)
yield (SR, final_audio), tmp.name, 100, "Completed!"
with gr.Blocks(title="Kokoro TTS (No Timeout)") as demo:
gr.Markdown("## ⚑ Kokoro TTS – Unlimited Length + Safe From Timeout + Progress Bar")
text = gr.Textbox(lines=12, label="Input text")
voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")
audio_output = gr.Audio(label="Audio Output")
file_download = gr.File(label="Download WAV")
progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False)
status = gr.Textbox(label="Status", interactive=False)
run_btn = gr.Button("Generate")
run_btn.click(
fn=tts_stream,
inputs=[text, voice],
outputs=[audio_output, file_download, progress, status],
)
demo.launch()