import gradio as gr
import torch
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
import soundfile as sf
from pydub import AudioSegment
import os
import requests

# Load SpeechT5 models
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

# Generate a random but fixed speaker embedding
speaker_embeddings = torch.rand(1, 512)

# Rain background sound
DEFAULT_RAIN = "rain.mp3"
RAIN_URL = "https://cdn.pixabay.com/download/audio/2022/03/15/audio_7e9f0b47b6.mp3?filename=gentle-rain-ambient-11022.mp3"

if not os.path.exists(DEFAULT_RAIN):
    try:
        r = requests.get(RAIN_URL)
        with open(DEFAULT_RAIN, "wb") as f:
            f.write(r.content)
    except Exception as e:
        print(f"Error downloading rain: {e}")

def generate_audio(prompt, emotion, speed, background_audio):
    if not prompt:
        raise gr.Error("Text cannot be empty.")

    # Add ASMR effect for calm emotion
    if emotion == "calm":
        prompt = "... " + prompt.replace(".", "... ")

    inputs = processor(text=prompt, return_tensors="pt")
    with torch.no_grad():
        speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)

    temp_wav = "voice.wav"
    sf.write(temp_wav, speech.numpy(), samplerate=16000)

    # Load audio and apply adjustments
    final_audio = AudioSegment.from_file(temp_wav)

    # Adjust speed for ASMR
    if speed != 1.0:
        final_audio = final_audio._spawn(final_audio.raw_data, overrides={
            "frame_rate": int(final_audio.frame_rate * speed)
        }).set_frame_rate(final_audio.frame_rate)

    # Add background rain or user-uploaded audio
    try:
        if background_audio:
            bg = AudioSegment.from_file(background_audio).apply_gain(-20)
        else:
            bg = AudioSegment.from_file(DEFAULT_RAIN).apply_gain(-25)
        bg = bg[:len(final_audio)]
        final_audio = final_audio.overlay(bg)
    except Exception as e:
        print(f"Background merge failed: {e}")

    output_path = "final_output.mp3"
    final_audio.export(output_path, format="mp3")
    return output_path, "✅ Audio generated successfully!"

# Gradio UI
with gr.Blocks() as app:
    gr.Markdown("# 🎧 Midnight History ASMR TTS")
    gr.Markdown("Convert your text into soothing ASMR audio with background rain.")

    with gr.Row():
        with gr.Column():
            text_input = gr.Textbox(label="Enter Text", placeholder="Paste your script...", lines=8)
            emotion_choice = gr.Dropdown(["calm", "neutral"], value="calm", label="Emotion")
            speed_slider = gr.Slider(0.7, 1.3, value=0.9, step=0.05, label="Speed")
            bg_audio = gr.Audio(label="Upload Background (Optional)", type="filepath")
            btn = gr.Button("Generate")
        with gr.Column():
            audio_out = gr.Audio(label="Output", type="filepath")
            status = gr.Textbox(label="Status")

    btn.click(generate_audio, [text_input, emotion_choice, speed_slider, bg_audio], [audio_out, status])

app.launch(share=True)