sky1 / app.py
triflix's picture
Create app.py
58646cc verified
import os
import uuid
import gradio as gr
import numpy as np
import soundfile as sf
from kokoro import KPipeline
# Create a directory to store audio files if it doesn't exist
AUDIO_DIR = "audio_files"
os.makedirs(AUDIO_DIR, exist_ok=True)
# Initialize the Kokoro TTS pipeline for American English
pipeline = KPipeline(lang_code='a')
def generate_tts(text, voice="af_sky", speed=1.0):
"""Generate TTS from input text and return a URL to the audio file."""
if not text.strip():
return "Please enter some text to convert to speech."
try:
# Generate audio without segmentation
segments = list(
pipeline(
text,
voice=voice,
speed=speed,
split_pattern=r'$^' # Regex that never matches β†’ full audio as one segment
)
)
# Concatenate all segments into one NumPy array
audio_full = np.concatenate([audio for (_, _, audio) in segments])
# Create a unique filename for the audio
session_id = str(uuid.uuid4())[:8]
filename = f"{session_id}_audio.wav"
filepath = os.path.join(AUDIO_DIR, filename)
# Save the audio to a WAV file
sf.write(filepath, audio_full, 24000)
# Generate a URL for the audio file
audio_url = f"/file={filepath}"
return audio_url, filepath
except Exception as e:
return f"Error generating speech: {str(e)}", None
# Define the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Kokoro Text-to-Speech Generator")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Enter text to convert to speech",
placeholder="Type your text here...",
lines=5
)
voice_selector = gr.Dropdown(
choices=["af_sky", "af_breeze", "af_rays", "af_glow", "af_ember"],
value="af_sky",
label="Voice"
)
speed_slider = gr.Slider(
minimum=0.5,
maximum=2.0,
value=1.0,
step=0.1,
label="Speech Speed"
)
submit_btn = gr.Button("Generate Speech")
with gr.Column():
audio_output = gr.Audio(label="Generated Speech", type="filepath")
url_output = gr.Textbox(label="Audio URL")
# Set up the event handler
submit_btn.click(
fn=generate_tts,
inputs=[text_input, voice_selector, speed_slider],
outputs=[url_output, audio_output]
)
gr.Markdown("""
## How to Use
1. Enter the text you want to convert to speech in the text box
2. Select a voice from the dropdown menu
3. Adjust the speech speed if needed
4. Click "Generate Speech" to create the audio
5. The audio will play automatically and a URL will be provided for download
""")
# Launch the app with file serving capability
demo.launch(share=True)