import gradio as gr
import tempfile
import soundfile as sf
import numpy as np
from kokoro import KPipeline
import time
import nltk

# Download the necessary NLTK data for sentence splitting
try:
    nltk.data.find('tokenizers/punkt_tab')
except LookupError:
    nltk.download('punkt_tab')
    nltk.download('punkt')

from nltk.tokenize import sent_tokenize

pipeline = KPipeline(lang_code="a")

VOICES = [
    "af_heart", "af_bella", "af_nicole",
    "am_adam", "am_michael",
    "bf_emma", "bm_george"
]

SR = 24000

def tts_stream(text, voice):
    text = (text or "").strip()
    if not text:
        yield None, None, 0, "Please enter text."
        return

    # --- IMPROVEMENT HERE ---
    # Use NLTK to split text into linguistically correct sentences.
    # This handles "Dr.", "Mr.", "?", "!", and quotes correctly.
    sentences = sent_tokenize(text)
    
    total = len(sentences)
    audio_chunks = []
    
    # Initialize an empty array for the concatenated audio
    full_audio = np.array([], dtype=np.float32)

    print(f"Split into {total} sentences.")

    for i, sentence in enumerate(sentences):
        if not sentence.strip():
            continue

        # Run Kokoro on the specific sentence
        gen = pipeline(sentence, voice=voice)

        # Kokoro returns a generator, we grab the audio from it
        for (gs, ps, audio) in gen:
            audio = np.asarray(audio, dtype=np.float32)
            audio_chunks.append(audio)
            
        # Progress streaming to UI
        progress = int((i + 1) / total * 100)
        yield None, None, progress, f"Processing sentence {i+1}/{total}..."

        # Anti-timeout heartbeat
        time.sleep(0.05)

    if audio_chunks:
        final_audio = np.concatenate(audio_chunks)
    else:
        final_audio = np.array([], dtype=np.float32)

    # Write to a temp file for the download button
    tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
    sf.write(tmp.name, final_audio, SR)

    # Return the audio to the player and the file for download
    yield (SR, final_audio), tmp.name, 100, "Completed!"


with gr.Blocks(title="Kokoro TTS (Smart Split)") as demo:
    gr.Markdown("## ⚡ Kokoro TTS – Smart Sentence Splitting")
    
    with gr.Row():
        with gr.Column():
            text = gr.Textbox(lines=12, label="Input text", placeholder="Paste long text here...")
            voice = gr.Dropdown(VOICES, value="af_heart", label="Voice")
            run_btn = gr.Button("Generate", variant="primary")
        
        with gr.Column():
            audio_output = gr.Audio(label="Audio Output", interactive=False)
            file_download = gr.File(label="Download WAV")
            progress = gr.Slider(0, 100, step=1, label="Progress", interactive=False)
            status = gr.Textbox(label="Status", interactive=False)

    run_btn.click(
        fn=tts_stream,
        inputs=[text, voice],
        outputs=[audio_output, file_download, progress, status],
    )

demo.queue().launch()