import gradio as gr
import edge_tts
import asyncio
import tempfile
import random
import re
from pydub import AudioSegment
from pydub.effects import normalize

# 🎙️ Best voices
VOICES = {
    "US Female (Best)": "en-US-JennyNeural",
    "Indian Female": "en-IN-NeerjaNeural",
    "Indian Male": "en-IN-PrabhatNeural"
}

# 🧠 Emotion + intensity (natural)
def add_emotion(text, mood, intensity):
    if mood == "Happy":
        prefix = "(smiling)" if intensity <= 6 else "(excited)"
    elif mood == "Sad":
        prefix = "(softly)" if intensity <= 6 else "(low tone)"
    elif mood == "Angry":
        prefix = "(firm)" if intensity <= 6 else "(serious)"
    else:
        prefix = ""

    return f"{prefix} {text}".strip()

# 🧠 Smart pauses (clean, not overdone)
def smart_pause(text):
    text = re.sub(r'\.', '. ', text)
    text = re.sub(r',', ', ', text)
    text = re.sub(r'\?', '? ', text)
    text = re.sub(r'!', '! ', text)
    return text

# 🧠 Humanizer (light, not forced)
def humanize(text):
    text = smart_pause(text)

    # occasional natural filler
    if random.random() > 0.6:
        text = "hmm... " + text

    return text

# 🔊 Audio enhancement (clean & natural)
def enhance_audio(file_path):
    audio = AudioSegment.from_file(file_path)

    # normalize
    audio = normalize(audio)

    # clarity boost
    audio = audio.high_pass_filter(100)
    audio = audio.low_pass_filter(4000)

    # very light ambience (not echo-heavy)
    bg = audio - 35
    audio = audio.overlay(bg)

    # slight gain
    audio = audio + 1

    out = file_path.replace(".mp3", "_final.wav")
    audio.export(out, format="wav")
    return out

# 🔁 TTS generator
async def tts(text, voice):
    file_path = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3").name

    # controlled variation (natural range only)
    rate = random.choice(["-5%", "-6%", "-7%"])
    pitch = random.choice(["+1Hz", "+2Hz"])

    communicate = edge_tts.Communicate(
        text=text,
        voice=voice,
        rate=rate,
        pitch=pitch
    )

    await communicate.save(file_path)
    return file_path

# 🎯 Main function
def generate(text, mood, intensity, voice_name):
    if not text.strip():
        return None

    voice = VOICES[voice_name]

    text = add_emotion(text, mood, intensity)
    text = humanize(text)

    mp3_file = asyncio.run(tts(text, voice))
    final_audio = enhance_audio(mp3_file)

    return final_audio

# 🎨 UI
with gr.Blocks() as demo:
    gr.Markdown("## 🔥 Pro Human-like AI Voice (Free)")

    text_input = gr.Textbox(
        label="Enter Text",
        placeholder="Example: kya tum theek ho?"
    )

    mood = gr.Dropdown(
        ["Normal", "Happy", "Sad", "Angry"],
        value="Normal",
        label="Emotion"
    )

    intensity = gr.Slider(1, 10, value=5, label="Emotion Intensity")

    voice_select = gr.Dropdown(
        choices=list(VOICES.keys()),
        value="US Female (Best)",
        label="Voice"
    )

    output_audio = gr.Audio(label="Generated Voice")

    btn = gr.Button("Generate")
    btn.click(generate, inputs=[text_input, mood, intensity, voice_select], outputs=output_audio)

demo.launch()