import os import uuid import gradio as gr import numpy as np import soundfile as sf from kokoro import KPipeline # Create a directory to store audio files if it doesn't exist AUDIO_DIR = "audio_files" os.makedirs(AUDIO_DIR, exist_ok=True) # Initialize the Kokoro TTS pipeline for American English pipeline = KPipeline(lang_code='a') def generate_tts(text, voice="af_sky", speed=1.0): """Generate TTS from input text and return a URL to the audio file.""" if not text.strip(): return "Please enter some text to convert to speech." try: # Generate audio without segmentation segments = list( pipeline( text, voice=voice, speed=speed, split_pattern=r'$^' # Regex that never matches → full audio as one segment ) ) # Concatenate all segments into one NumPy array audio_full = np.concatenate([audio for (_, _, audio) in segments]) # Create a unique filename for the audio session_id = str(uuid.uuid4())[:8] filename = f"{session_id}_audio.wav" filepath = os.path.join(AUDIO_DIR, filename) # Save the audio to a WAV file sf.write(filepath, audio_full, 24000) # Generate a URL for the audio file audio_url = f"/file={filepath}" return audio_url, filepath except Exception as e: return f"Error generating speech: {str(e)}", None # Define the Gradio interface with gr.Blocks() as demo: gr.Markdown("# Kokoro Text-to-Speech Generator") with gr.Row(): with gr.Column(): text_input = gr.Textbox( label="Enter text to convert to speech", placeholder="Type your text here...", lines=5 ) voice_selector = gr.Dropdown( choices=["af_sky", "af_breeze", "af_rays", "af_glow", "af_ember"], value="af_sky", label="Voice" ) speed_slider = gr.Slider( minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed" ) submit_btn = gr.Button("Generate Speech") with gr.Column(): audio_output = gr.Audio(label="Generated Speech", type="filepath") url_output = gr.Textbox(label="Audio URL") # Set up the event handler submit_btn.click( fn=generate_tts, inputs=[text_input, voice_selector, speed_slider], outputs=[url_output, audio_output] ) gr.Markdown(""" ## How to Use 1. Enter the text you want to convert to speech in the text box 2. Select a voice from the dropdown menu 3. Adjust the speech speed if needed 4. Click "Generate Speech" to create the audio 5. The audio will play automatically and a URL will be provided for download """) # Launch the app with file serving capability demo.launch(share=True)