Spaces:

garyuzair
/

TTS

Runtime error

App Files Files Community

TTS / app.py

garyuzair

Upload 2 files

979c57e verified 8 months ago

raw

history blame contribute delete

6.02 kB

	import gradio as gr
	import os
	import numpy as np
	from tts_core import KokoroTTS
	import time

	# Initialize the TTS engine
	tts_engine = KokoroTTS()

	# CSS for styling the interface
	css = """
	.container {
	max-width: 900px;
	margin: auto;
	padding-top: 1.5rem;
	}
	.title {
	text-align: center;
	color: #2C3E50;
	}
	.subtitle {
	text-align: center;
	color: #7F8C8D;
	margin-bottom: 2rem;
	}
	.footer {
	text-align: center;
	margin-top: 2rem;
	color: #7F8C8D;
	font-size: 0.9rem;
	}
	.settings-block {
	padding: 1rem;
	border-radius: 8px;
	background-color: #f8f9fa;
	margin-bottom: 1rem;
	}
	.voice-selector {
	margin-bottom: 1rem;
	}
	.advanced-settings {
	margin-top: 1rem;
	}
	.output-block {
	margin-top: 1.5rem;
	}
	"""

	# Get all available voices
	voice_options = [(name, id) for id, name in tts_engine.us_english_voices.items()]

	def text_to_speech(text, voice, speed, add_pronunciation_guide):
	"""
	Convert text to speech using the selected voice and settings
	"""
	if not text.strip():
	return None, "Please enter some text to convert to speech."

	# Add pronunciation guide if requested
	if add_pronunciation_guide:
	# Add simple pronunciation guide for demonstration
	text = text.replace("Kokoro", "[Kokoro](/kˈOkəɹO/)")

	# Generate speech
	try:
	start_time = time.time()
	output_file, sample_rate, audio_data = tts_engine.generate_speech(
	text=text,
	voice=voice,
	speed=float(speed)
	)
	generation_time = time.time() - start_time

	# Create info message
	info = f"✅ Generated audio ({len(audio_data)/sample_rate:.2f}s) in {generation_time:.2f}s using voice: {tts_engine.us_english_voices[voice]}"

	return (sample_rate, audio_data), info
	except Exception as e:
	return None, f"❌ Error generating speech: {str(e)}"

	def create_demo():
	"""Create the Gradio interface"""

	with gr.Blocks(css=css) as demo:
	gr.HTML("""
	<div class="container">
	<h1 class="title">Kokoro82m Text-to-Speech</h1>
	<p class="subtitle">A CPU-optimized TTS application with all US English voices</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=2):
	# Text input area
	text_input = gr.Textbox(
	label="Text to convert to speech",
	placeholder="Enter text here...",
	lines=10,
	value="Kokoro is an open-weight TTS model with 82 million parameters. Despite its lightweight architecture, it delivers comparable quality to larger models while being significantly faster and more cost-efficient."
	)

	# Settings
	with gr.Box(elem_classes=["settings-block"]):
	gr.Markdown("### Voice Settings")

	# Voice selection
	voice_selector = gr.Dropdown(
	choices=voice_options,
	value="af_heart", # Default voice
	label="Select Voice",
	elem_classes=["voice-selector"]
	)

	with gr.Accordion("Advanced Settings", open=False, elem_classes=["advanced-settings"]):
	speed_slider = gr.Slider(
	minimum=0.5,
	maximum=2.0,
	value=1.0,
	step=0.05,
	label="Speech Speed"
	)

	pronunciation_checkbox = gr.Checkbox(
	label="Add pronunciation guides for better quality",
	value=False
	)

	# Generate button
	generate_btn = gr.Button("Generate Speech", variant="primary")

	with gr.Column(scale=1):
	# Output audio
	audio_output = gr.Audio(
	label="Generated Speech",
	type="numpy",
	elem_classes=["output-block"]
	)

	# Info message
	info_message = gr.Markdown("")

	# Set up event handlers
	generate_btn.click(
	fn=text_to_speech,
	inputs=[text_input, voice_selector, speed_slider, pronunciation_checkbox],
	outputs=[audio_output, info_message]
	)

	# Examples
	examples = [
	["Hello, my name is Kokoro. I am a text-to-speech model with 82 million parameters.", "af_heart", 1.0, True],
	["The quick brown fox jumps over the lazy dog. This is a sample of my voice.", "af_bella", 1.0, False],
	["Welcome to the world of artificial intelligence and text-to-speech technology.", "am_fenrir", 1.0, False],
	["This is an example of a slower speaking rate for more deliberate speech.", "af_nicole", 0.8, False],
	["This is an example of a faster speaking rate for more energetic speech.", "am_michael", 1.3, False]
	]

	gr.Examples(
	examples=examples,
	inputs=[text_input, voice_selector, speed_slider, pronunciation_checkbox],
	outputs=[audio_output, info_message],
	fn=text_to_speech,
	cache_examples=True
	)

	gr.HTML("""
	<div class="footer">
	<p>Powered by Kokoro82m TTS - An open-weight TTS model with 82 million parameters</p>
	<p>CPU-optimized for efficient inference on limited resources</p>
	</div>
	""")

	return demo

	# Create and launch the demo
	demo = create_demo()

	# For Hugging Face Spaces
	if __name__ == "__main__":
	demo.launch()