Spaces:

heerjtdev
/

koko

Sleeping

App Files Files Community

koko / app.py

heerjtdev

Update app.py

3bb4d23 verified 24 days ago

raw

history blame contribute delete

6.68 kB

	# # app.py
	# import gradio as gr
	# import tempfile
	# import soundfile as sf
	# import numpy as np

	# from kokoro import KPipeline # correct import

	# # Initialize pipeline once on startup.
	# # lang_code: 'a' => American English, 'b' => British English, etc. See README for mapping.
	# pipeline = KPipeline(lang_code="a") # choose lang_code that matches the voice prefix

	# # Example voices (prefix letter indicates language family)
	# VOICES = [
	# "af_heart", "af_bella", "af_nicole", # a* = american-ish voices
	# "am_adam", "am_michael",
	# "bf_emma", "bm_george" # b* = british-ish voices
	# ]


	# def synthesize_to_file(text: str, voice: str = "af_heart"):
	# """Run kokoro pipeline and write first generated audio to a temporary wav file."""
	# text = (text or "").strip()
	# if not text:
	# return None, "Please enter text."

	# try:
	# gen = pipeline(text, voice=voice) # generator yielding (gs, ps, audio)
	# # take the first item produced
	# item = next(gen, None)
	# if item is None:
	# return None, "Kokoro returned no audio."

	# gs, ps, audio = item # gs: generation metadata, ps: phonemes, audio: numpy float32
	# # Kokoro audio sample rate is 24000
	# sr = 24000

	# # Ensure numpy array dtype is float32
	# audio = np.asarray(audio, dtype=np.float32)

	# # Write to temporary wav file and return its path (Gradio can serve file paths)
	# tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
	# sf.write(tmp.name, audio, sr, format="WAV")
	# return tmp.name, f"Success — generated {len(audio)} samples @ {sr}Hz."

	# except Exception as e:
	# return None, f"Error: {e}"


	# with gr.Blocks(title="Kokoro TTS (Gradio)") as demo:
	# gr.Markdown("## Kokoro-82M — Text → Speech (Gradio)")
	# with gr.Row():
	# txt = gr.Textbox(lines=4, placeholder="Type text to synthesize...", label="Input text")
	# voice = gr.Dropdown(choices=VOICES, value=VOICES[0], label="Voice")

	# out_audio = gr.Audio(label="Generated audio (wav file)")
	# status = gr.Textbox(label="Status", interactive=False)

	# btn = gr.Button("Generate")
	# btn.click(fn=synthesize_to_file, inputs=[txt, voice], outputs=[out_audio, status])

	# if __name__ == "__main__":
	# demo.launch(server_name="0.0.0.0", server_port=7860)
























	# import gradio as gr
	# import tempfile
	# import soundfile as sf
	# import numpy as np
	# from kokoro import KPipeline

	# pipeline = KPipeline(lang_code="a")

	# VOICES = [
	# "af_heart", "af_bella", "af_nicole",
	# "am_adam", "am_michael",
	# "bf_emma", "bm_george"
	# ]

	# SR = 24000 # Kokoro standard sample rate


	# def generate_full_audio(text, voice):
	# text = (text or "").strip()
	# if not text:
	# return None, None, "Please enter text."

	# try:
	# # Kokoro returns a generator over chunks
	# gen = pipeline(text, voice=voice)

	# audio_chunks = []

	# # Collect all audio chunks (fixes 6-second problem)
	# for (gs, ps, audio) in gen:
	# audio_chunks.append(np.asarray(audio, dtype=np.float32))

	# if not audio_chunks:
	# return None, None, "No audio produced."

	# # Concatenate all chunks into one continuous waveform
	# final_audio = np.concatenate(audio_chunks)

	# # Save to WAV for download
	# tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
	# sf.write(tmp.name, final_audio, SR)

	# return (SR, final_audio), tmp.name, f"Generated {len(final_audio)/SR:.2f} seconds of audio."

	# except Exception as e:
	# return None, None, f"Error: {e}"


	# with gr.Blocks(title="Kokoro Unlimited TTS") as demo:
	# gr.Markdown("## 🎧 Kokoro TTS — Unlimited Text, Downloadable Audio")

	# with gr.Row():
	# txt = gr.Textbox(
	# lines=10,
	# label="Input Text (no length limit)",
	# placeholder="Paste long text here...",
	# )
	# voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")

	# audio_out = gr.Audio(label="Generated Audio")
	# download_out = gr.File(label="Download Audio (.wav)")
	# status = gr.Textbox(label="Status", interactive=False)

	# generate_btn = gr.Button("Generate")

	# generate_btn.click(
	# fn=generate_full_audio,
	# inputs=[txt, voice],
	# outputs=[audio_out, download_out, status]
	# )

	# demo.launch()
















	import gradio as gr
	import tempfile
	import soundfile as sf
	import numpy as np
	from kokoro import KPipeline
	import time

	pipeline = KPipeline(lang_code="a")

	VOICES = [
	"af_heart", "af_bella", "af_nicole",
	"am_adam", "am_michael",
	"bf_emma", "bm_george"
	]

	SR = 24000


	def tts_stream(text, voice):
	text = (text or "").strip()
	if not text:
	yield None, None, 0, "Please enter text."
	return

	# Split text into smaller chunks for progress-based streaming
	# Helps prevent 60–90s stall timeout
	sentences = text.split(". ")
	total = len(sentences)
	audio_chunks = []

	for i, sentence in enumerate(sentences):
	if not sentence.strip():
	continue

	# Run Kokoro on the chunk
	gen = pipeline(sentence, voice=voice)

	for (gs, ps, audio) in gen:
	audio = np.asarray(audio, dtype=np.float32)
	audio_chunks.append(audio)

	# Progress streaming to UI every chunk
	progress = int((i + 1) / total * 100)
	yield None, None, progress, f"Processing chunk {i+1}/{total}..."

	# HuggingFace anti-timeout heartbeat
	time.sleep(0.1)

	# Combine all audio into one file
	final_audio = np.concatenate(audio_chunks)

	tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
	sf.write(tmp.name, final_audio, SR)

	yield (SR, final_audio), tmp.name, 100, "Completed!"


	with gr.Blocks(title="Kokoro TTS (No Timeout)") as demo:
	gr.Markdown("## ⚡ Kokoro TTS – Unlimited Length + Safe From Timeout + Progress Bar")

	text = gr.Textbox(lines=12, label="Input text")
	voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")

	audio_output = gr.Audio(label="Audio Output")
	file_download = gr.File(label="Download WAV")
	progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False)
	status = gr.Textbox(label="Status", interactive=False)

	run_btn = gr.Button("Generate")

	run_btn.click(
	fn=tts_stream,
	inputs=[text, voice],
	outputs=[audio_output, file_download, progress, status],
	)

	demo.launch()