import os
import tempfile
import gradio as gr
import librosa
import soundfile as sf
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
# This will delete cached model to force a clean download
import subprocess

# Clear cached XTTS model to force a fresh download (only needed once)
model_cache_path = os.path.expanduser("~/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v2")
if os.path.exists(model_cache_path):
    subprocess.run(["rm", "-rf", model_cache_path], check=True)


# ===== Step 1: Allowlist Required Classes for PyTorch >= 2.6 =====
from torch.serialization import add_safe_globals
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import XttsAudioConfig, XttsArgs
from TTS.config.shared_configs import BaseDatasetConfig

add_safe_globals([
    XttsConfig,
    XttsAudioConfig,
    XttsArgs,
    BaseDatasetConfig
])

# ===== Step 2: Agree to Coqui TTS Terms of Service =====
os.environ["COQUI_TOS_AGREED"] = "1"

# ===== Step 3: Load the Coqui XTTS Model =====
from TTS.api import TTS

tts = TTS(
    model_name="tts_models/multilingual/multi-dataset/xtts_v2",
    progress_bar=True,
    gpu=False  # Set to True if using CUDA
)

# ===== Step 4: Define Voice Cloning Inference Function =====
def text_to_speech_clone(text, voice_sample):
    if voice_sample is None:
        return "Please provide a voice sample audio.", None

    # Load the voice sample audio file
    sample_wav, sample_rate = librosa.load(voice_sample, sr=22050)

    # Save sample temporarily in correct format
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_sample:
        sf.write(tmp_sample.name, sample_wav, sample_rate)
        voice_sample_path = tmp_sample.name

    # Generate cloned Hindi speech and save it to a temp file
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_output:
        tts.tts_to_file(
            text=text,
            file_path=tmp_output.name,
            speaker_wav=voice_sample_path,
            language="hi"
        )
        output_path = tmp_output.name

    return output_path

# ===== Step 5: Gradio UI Interface =====
iface = gr.Interface(
    fn=text_to_speech_clone,
    inputs=[
        gr.Textbox(lines=5, placeholder="हिंदी में टेक्स्ट दर्ज करें...", label="Text"),
        gr.Audio(type="filepath", label="Voice Sample (Hindi speech)")
    ],
    outputs=gr.Audio(type="filepath", label="Generated Cloned Speech"),
    title="Hindi Text-to-Speech with Voice Cloning",
    description=(
        "यह ऐप हिंदी टेक्स्ट से वॉयस क्लोनिंग के साथ स्पीच जेनरेट करता है।\n"
        "एक छोटी सी हिंदी आवाज़ की रिकॉर्डिंग (5-10 सेकंड) अपलोड करें, और यह उसी आवाज़ में टेक्स्ट पढ़कर सुनाएगा।"
    )
)

# ===== Step 6: Launch the Web App =====
iface.launch()