# app for huggingface space
# convert text to voice
import gradio as gr
from kokoro import KPipeline
import soundfile as sf
import tempfile
import os
import numpy as np

# List of available voices (abbreviated for brevity)
voices = [
    "af_heart", "af_alloy", "af_aoede", "af_bella", "af_jessica", "af_kore",
    "af_nicole", "af_nova", "af_river", "af_sarah", "af_sky",
    "am_adam", "am_echo", "am_eric", "am_fenrir", "am_liam", "am_michael",
    "bf_alice", "bf_emma", "bf_isabella", "bf_lily",
    "bm_daniel", "bm_fable", "bm_george", "bm_lewis"
]

def generate_tts(text, file, voice, speed):
    # Read text from file if uploaded, else use pasted text
    if file is not None:
        with open(file.name, 'r', encoding='utf-8') as f:
            text = f.read()
    if not text or len(text.strip()) == 0:
        return None  # No input to process

    # Choose language code based on voice prefix
    lang_code = 'a' if voice.startswith('af_') or voice.startswith('am_') else 'b'
    pipeline = KPipeline(lang_code=lang_code)

    # Generate audio for each paragraph and collect segments
    audio_segments = []
    for i, (gs, ps, audio) in enumerate(
        pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
    ):
        audio_segments.append(audio)

    if not audio_segments:
        return None

    # Concatenate all audio segments
    combined_audio = np.concatenate(audio_segments)

    # Save the combined audio to a temp file
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile:
        sf.write(tmpfile.name, combined_audio, 24000, format='WAV')
        audio_path = tmpfile.name

    return audio_path  # Gradio will handle playback and download

gr.Interface(
    fn=generate_tts,
    inputs=[
        gr.Textbox(label="Paste text here (ignored if file uploaded)", lines=5),
        gr.File(label="Or upload a .txt file"),
        gr.Dropdown(choices=voices, label="Select Voice", value=voices[0]),
        gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speech Speed"),
    ],
    outputs=gr.Audio(label="Generated Speech", type="filepath"),
    title="Kokoro Text-to-Speech",
    description="Paste text or upload a .txt file, select a voice, and generate speech. You can play and download the generated audio."
).launch(share=True)