import os
import uuid
import gradio as gr
import numpy as np
import soundfile as sf
from kokoro import KPipeline

# Create a directory to store audio files if it doesn't exist
AUDIO_DIR = "audio_files"
os.makedirs(AUDIO_DIR, exist_ok=True)

# Initialize the Kokoro TTS pipeline for American English
pipeline = KPipeline(lang_code='a')

def generate_tts(text, voice="af_sky", speed=1.0):
    """Generate TTS from input text and return a URL to the audio file."""
    if not text.strip():
        return "Please enter some text to convert to speech."
    
    try:
        # Generate audio without segmentation
        segments = list(
            pipeline(
                text,
                voice=voice,
                speed=speed,
                split_pattern=r'$^'  # Regex that never matches → full audio as one segment
            )
        )
        
        # Concatenate all segments into one NumPy array
        audio_full = np.concatenate([audio for (_, _, audio) in segments])
        
        # Create a unique filename for the audio
        session_id = str(uuid.uuid4())[:8]
        filename = f"{session_id}_audio.wav"
        filepath = os.path.join(AUDIO_DIR, filename)
        
        # Save the audio to a WAV file
        sf.write(filepath, audio_full, 24000)
        
        # Generate a URL for the audio file
        audio_url = f"/file={filepath}"
        
        return audio_url, filepath
    
    except Exception as e:
        return f"Error generating speech: {str(e)}", None

# Define the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Kokoro Text-to-Speech Generator")
    
    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(
                label="Enter text to convert to speech",
                placeholder="Type your text here...",
                lines=5
            )
            voice_selector = gr.Dropdown(
                choices=["af_sky", "af_breeze", "af_rays", "af_glow", "af_ember"],
                value="af_sky",
                label="Voice"
            )
            speed_slider = gr.Slider(
                minimum=0.5,
                maximum=2.0,
                value=1.0,
                step=0.1,
                label="Speech Speed"
            )
            submit_btn = gr.Button("Generate Speech")
        
        with gr.Column():
            audio_output = gr.Audio(label="Generated Speech", type="filepath")
            url_output = gr.Textbox(label="Audio URL")
    
    # Set up the event handler
    submit_btn.click(
        fn=generate_tts,
        inputs=[text_input, voice_selector, speed_slider],
        outputs=[url_output, audio_output]
    )
    
    gr.Markdown("""
    ## How to Use
    1. Enter the text you want to convert to speech in the text box
    2. Select a voice from the dropdown menu
    3. Adjust the speech speed if needed
    4. Click "Generate Speech" to create the audio
    5. The audio will play automatically and a URL will be provided for download
    """)

# Launch the app with file serving capability
demo.launch(share=True)