Spaces:

codewithjarair
/

Kokoro_TTS

Running

App Files Files Community

Kokoro_TTS / app.py

codewithjarair

Update app.py

22bf7a3 verified 2 days ago

raw

history blame contribute delete

5.01 kB

	import gradio as gr
	import os
	import tempfile
	from kokoro_engine import KokoroEngine
	from processor import ScriptProcessor
	import numpy as np

	# Initialize components
	engine = KokoroEngine()
	processor = ScriptProcessor(engine)

	def tts_process(text, voice, speed, lang, long_script_mode):
	try:
	if long_script_mode:
	audio, sr = processor.process_long_script(text, voice, speed, lang)
	else:
	audio, sr = engine.generate(text, voice, speed, lang)

	# Save to temp file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
	processor.save_audio(audio, sr, tmp.name)
	return tmp.name
	except Exception as e:
	return f"Error: {str(e)}"

	def clone_process(audio_ref):
	if audio_ref is None:
	return "Please upload an audio file for cloning."
	# Placeholder for actual cloning logic
	voice_id = engine.clone_voice_placeholder(audio_ref)
	return f"Voice cloned successfully! Reference ID: {voice_id}. You can now use this voice (currently defaults to {voice_id})."

	# Flatten voice list for dropdown
	all_voices = []
	for category, voices in engine.voices.items():
	for v in voices:
	all_voices.append(v)

	# Premium CSS for high-end look
	custom_css = """
	.container {
	max-width: 900px !important;
	margin: auto !important;
	padding-top: 2rem !important;
	}
	.header {
	text-align: center;
	margin-bottom: 2rem;
	}
	.header h1 {
	font-size: 3rem !important;
	font-weight: 800 !important;
	background: linear-gradient(90deg, #ff00cc, #3333ff);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	margin-bottom: 0.5rem !important;
	}
	.header p {
	font-size: 1.1rem !important;
	color: #888;
	}
	.input-group {
	border-radius: 12px !important;
	border: 1px solid #333 !important;
	background: #111 !important;
	padding: 1rem !important;
	margin-bottom: 1.5rem !important;
	}
	.footer {
	visibility: hidden;
	}
	button.primary {
	background: linear-gradient(90deg, #ff00cc, #3333ff) !important;
	border: none !important;
	font-weight: bold !important;
	border-radius: 8px !important;
	}
	button.primary:hover {
	transform: translateY(-2px);
	box-shadow: 0 4px 15px rgba(255, 0, 204, 0.4);
	}
	"""

	with gr.Blocks(title="Kokoro TTS Premium") as demo:
	with gr.Column(elem_classes="container"):
	with gr.Column(elem_classes="header"):
	gr.Markdown("# 🌸 Kokoro TTS")
	gr.Markdown("High-fidelity neural speech synthesis powered by Kokoro-82M")

	with gr.Column(elem_classes="input-group"):
	text_input = gr.Textbox(
	label="Script Content",
	placeholder="Paste your story, script, or text here...",
	lines=10,
	elem_id="text-input"
	)

	with gr.Row():
	voice_select = gr.Dropdown(
	choices=all_voices,
	value="af_heart",
	label="Voice Archetype",
	scale=2
	)
	speed_slider = gr.Slider(
	minimum=0.5,
	maximum=2.0,
	value=1.0,
	step=0.1,
	label="Pacing (Speed)",
	scale=1
	)

	with gr.Accordion("⚙️ Engine Configurations", open=False):
	with gr.Row():
	lang_select = gr.Dropdown(
	choices=[
	("🇺🇸 English (US)", "en-us"),
	("🇬🇧 English (UK)", "en-gb"),
	("🇨🇳 Chinese", "zh"),
	("🇮🇳 Hindi", "hi"),
	("🇯🇵 Japanese", "ja"),
	("🇪🇸 Spanish", "es"),
	("🇫🇷 French", "fr"),
	("🇮🇹 Italian", "it"),
	("🇵🇹 Portuguese", "pt")
	],
	value="en-us",
	label="Linguistic Context"
	)
	long_script_toggle = gr.Checkbox(
	label="Optimize for Long Duration (Safe Chunking)",
	value=False
	)

	generate_btn = gr.Button("⚡ Generate Neural Audio", variant="primary", size="lg")

	with gr.Column(variant="compact"):
	audio_output = gr.Audio(
	label="Master Audio Output",
	type="filepath"
	)

	generate_btn.click(
	tts_process,
	inputs=[text_input, voice_select, speed_slider, lang_select, long_script_toggle],
	outputs=audio_output
	)

	if __name__ == "__main__":
	demo.launch(ssr_mode=False, css=custom_css)