Kokoro_TTS / app.py
codewithjarair's picture
Update app.py
22bf7a3 verified
import gradio as gr
import os
import tempfile
from kokoro_engine import KokoroEngine
from processor import ScriptProcessor
import numpy as np
# Initialize components
engine = KokoroEngine()
processor = ScriptProcessor(engine)
def tts_process(text, voice, speed, lang, long_script_mode):
try:
if long_script_mode:
audio, sr = processor.process_long_script(text, voice, speed, lang)
else:
audio, sr = engine.generate(text, voice, speed, lang)
# Save to temp file
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
processor.save_audio(audio, sr, tmp.name)
return tmp.name
except Exception as e:
return f"Error: {str(e)}"
def clone_process(audio_ref):
if audio_ref is None:
return "Please upload an audio file for cloning."
# Placeholder for actual cloning logic
voice_id = engine.clone_voice_placeholder(audio_ref)
return f"Voice cloned successfully! Reference ID: {voice_id}. You can now use this voice (currently defaults to {voice_id})."
# Flatten voice list for dropdown
all_voices = []
for category, voices in engine.voices.items():
for v in voices:
all_voices.append(v)
# Premium CSS for high-end look
custom_css = """
.container {
max-width: 900px !important;
margin: auto !important;
padding-top: 2rem !important;
}
.header {
text-align: center;
margin-bottom: 2rem;
}
.header h1 {
font-size: 3rem !important;
font-weight: 800 !important;
background: linear-gradient(90deg, #ff00cc, #3333ff);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
margin-bottom: 0.5rem !important;
}
.header p {
font-size: 1.1rem !important;
color: #888;
}
.input-group {
border-radius: 12px !important;
border: 1px solid #333 !important;
background: #111 !important;
padding: 1rem !important;
margin-bottom: 1.5rem !important;
}
.footer {
visibility: hidden;
}
button.primary {
background: linear-gradient(90deg, #ff00cc, #3333ff) !important;
border: none !important;
font-weight: bold !important;
border-radius: 8px !important;
}
button.primary:hover {
transform: translateY(-2px);
box-shadow: 0 4px 15px rgba(255, 0, 204, 0.4);
}
"""
with gr.Blocks(title="Kokoro TTS Premium") as demo:
with gr.Column(elem_classes="container"):
with gr.Column(elem_classes="header"):
gr.Markdown("# 🌸 Kokoro TTS")
gr.Markdown("High-fidelity neural speech synthesis powered by Kokoro-82M")
with gr.Column(elem_classes="input-group"):
text_input = gr.Textbox(
label="Script Content",
placeholder="Paste your story, script, or text here...",
lines=10,
elem_id="text-input"
)
with gr.Row():
voice_select = gr.Dropdown(
choices=all_voices,
value="af_heart",
label="Voice Archetype",
scale=2
)
speed_slider = gr.Slider(
minimum=0.5,
maximum=2.0,
value=1.0,
step=0.1,
label="Pacing (Speed)",
scale=1
)
with gr.Accordion("⚙️ Engine Configurations", open=False):
with gr.Row():
lang_select = gr.Dropdown(
choices=[
("🇺🇸 English (US)", "en-us"),
("🇬🇧 English (UK)", "en-gb"),
("🇨🇳 Chinese", "zh"),
("🇮🇳 Hindi", "hi"),
("🇯🇵 Japanese", "ja"),
("🇪🇸 Spanish", "es"),
("🇫🇷 French", "fr"),
("🇮🇹 Italian", "it"),
("🇵🇹 Portuguese", "pt")
],
value="en-us",
label="Linguistic Context"
)
long_script_toggle = gr.Checkbox(
label="Optimize for Long Duration (Safe Chunking)",
value=False
)
generate_btn = gr.Button("⚡ Generate Neural Audio", variant="primary", size="lg")
with gr.Column(variant="compact"):
audio_output = gr.Audio(
label="Master Audio Output",
type="filepath"
)
generate_btn.click(
tts_process,
inputs=[text_input, voice_select, speed_slider, lang_select, long_script_toggle],
outputs=audio_output
)
if __name__ == "__main__":
demo.launch(ssr_mode=False, css=custom_css)