Spaces:

heerjtdev
/

koko

Sleeping

App Files Files Community

koko / app.py

iammraat

Update app.py

106ac57 verified about 1 month ago

raw

history blame

3.01 kB

	import gradio as gr
	import tempfile
	import soundfile as sf
	import numpy as np
	from kokoro import KPipeline
	import time
	import nltk

	# Download the necessary NLTK data for sentence splitting
	try:
	nltk.data.find('tokenizers/punkt_tab')
	except LookupError:
	nltk.download('punkt_tab')
	nltk.download('punkt')

	from nltk.tokenize import sent_tokenize

	pipeline = KPipeline(lang_code="a")

	VOICES = [
	"af_heart", "af_bella", "af_nicole",
	"am_adam", "am_michael",
	"bf_emma", "bm_george"
	]

	SR = 24000

	def tts_stream(text, voice):
	text = (text or "").strip()
	if not text:
	yield None, None, 0, "Please enter text."
	return

	# --- IMPROVEMENT HERE ---
	# Use NLTK to split text into linguistically correct sentences.
	# This handles "Dr.", "Mr.", "?", "!", and quotes correctly.
	sentences = sent_tokenize(text)

	total = len(sentences)
	audio_chunks = []

	# Initialize an empty array for the concatenated audio
	full_audio = np.array([], dtype=np.float32)

	print(f"Split into {total} sentences.")

	for i, sentence in enumerate(sentences):
	if not sentence.strip():
	continue

	# Run Kokoro on the specific sentence
	gen = pipeline(sentence, voice=voice)

	# Kokoro returns a generator, we grab the audio from it
	for (gs, ps, audio) in gen:
	audio = np.asarray(audio, dtype=np.float32)
	audio_chunks.append(audio)

	# Progress streaming to UI
	progress = int((i + 1) / total * 100)
	yield None, None, progress, f"Processing sentence {i+1}/{total}..."

	# Anti-timeout heartbeat
	time.sleep(0.05)

	if audio_chunks:
	final_audio = np.concatenate(audio_chunks)
	else:
	final_audio = np.array([], dtype=np.float32)

	# Write to a temp file for the download button
	tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
	sf.write(tmp.name, final_audio, SR)

	# Return the audio to the player and the file for download
	yield (SR, final_audio), tmp.name, 100, "Completed!"


	with gr.Blocks(title="Kokoro TTS (Smart Split)") as demo:
	gr.Markdown("## ⚡ Kokoro TTS – Smart Sentence Splitting")

	with gr.Row():
	with gr.Column():
	text = gr.Textbox(lines=12, label="Input text", placeholder="Paste long text here...")
	voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")
	run_btn = gr.Button("Generate", variant="primary")

	with gr.Column():
	audio_output = gr.Audio(label="Audio Output", interactive=False)
	file_download = gr.File(label="Download WAV")
	progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False)
	status = gr.Textbox(label="Status", interactive=False)

	run_btn.click(
	fn=tts_stream,
	inputs=[text, voice],
	outputs=[audio_output, file_download, progress, status],
	)

	demo.queue().launch()