Spaces:

chiauho
/

T2V

Sleeping

App Files Files Community

T2V / app.py

chiauho

Update app.py

b64d774 verified 9 months ago

raw

history blame contribute delete

2.32 kB

	# app for huggingface space
	# convert text to voice
	import gradio as gr
	from kokoro import KPipeline
	import soundfile as sf
	import tempfile
	import os
	import numpy as np

	# List of available voices (abbreviated for brevity)
	voices = [
	"af_heart", "af_alloy", "af_aoede", "af_bella", "af_jessica", "af_kore",
	"af_nicole", "af_nova", "af_river", "af_sarah", "af_sky",
	"am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael",
	"bf_alice", "bf_emma", "bf_isabella", "bf_lily",
	"bm_daniel", "bm_fable", "bm_george", "bm_lewis"
	]

	def generate_tts(text, file, voice, speed):
	# Read text from file if uploaded, else use pasted text
	if file is not None:
	with open(file.name, 'r', encoding='utf-8') as f:
	text = f.read()
	if not text or len(text.strip()) == 0:
	return None # No input to process

	# Choose language code based on voice prefix
	lang_code = 'a' if voice.startswith('af_') or voice.startswith('am_') else 'b'
	pipeline = KPipeline(lang_code=lang_code)

	# Generate audio for each paragraph and collect segments
	audio_segments = []
	for i, (gs, ps, audio) in enumerate(
	pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
	):
	audio_segments.append(audio)

	if not audio_segments:
	return None

	# Concatenate all audio segments
	combined_audio = np.concatenate(audio_segments)

	# Save the combined audio to a temp file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
	sf.write(tmpfile.name, combined_audio, 24000, format='WAV')
	audio_path = tmpfile.name

	return audio_path # Gradio will handle playback and download

	gr.Interface(
	fn=generate_tts,
	inputs=[
	gr.Textbox(label="Paste text here (ignored if file uploaded)", lines=5),
	gr.File(label="Or upload a .txt file"),
	gr.Dropdown(choices=voices, label="Select Voice", value=voices[0]),
	gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"),
	],
	outputs=gr.Audio(label="Generated Speech", type="filepath"),
	title="Kokoro Text-to-Speech",
	description="Paste text or upload a .txt file, select a voice, and generate speech. You can play and download the generated audio."
	).launch(share=True)