T2V / app.py
chiauho's picture
Update app.py
b64d774 verified
# app for huggingface space
# convert text to voice
import gradio as gr
from kokoro import KPipeline
import soundfile as sf
import tempfile
import os
import numpy as np
# List of available voices (abbreviated for brevity)
voices = [
"af_heart", "af_alloy", "af_aoede", "af_bella", "af_jessica", "af_kore",
"af_nicole", "af_nova", "af_river", "af_sarah", "af_sky",
"am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael",
"bf_alice", "bf_emma", "bf_isabella", "bf_lily",
"bm_daniel", "bm_fable", "bm_george", "bm_lewis"
]
def generate_tts(text, file, voice, speed):
# Read text from file if uploaded, else use pasted text
if file is not None:
with open(file.name, 'r', encoding='utf-8') as f:
text = f.read()
if not text or len(text.strip()) == 0:
return None # No input to process
# Choose language code based on voice prefix
lang_code = 'a' if voice.startswith('af_') or voice.startswith('am_') else 'b'
pipeline = KPipeline(lang_code=lang_code)
# Generate audio for each paragraph and collect segments
audio_segments = []
for i, (gs, ps, audio) in enumerate(
pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
):
audio_segments.append(audio)
if not audio_segments:
return None
# Concatenate all audio segments
combined_audio = np.concatenate(audio_segments)
# Save the combined audio to a temp file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
sf.write(tmpfile.name, combined_audio, 24000, format='WAV')
audio_path = tmpfile.name
return audio_path # Gradio will handle playback and download
gr.Interface(
fn=generate_tts,
inputs=[
gr.Textbox(label="Paste text here (ignored if file uploaded)", lines=5),
gr.File(label="Or upload a .txt file"),
gr.Dropdown(choices=voices, label="Select Voice", value=voices[0]),
gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"),
],
outputs=gr.Audio(label="Generated Speech", type="filepath"),
title="Kokoro Text-to-Speech",
description="Paste text or upload a .txt file, select a voice, and generate speech. You can play and download the generated audio."
).launch(share=True)