import os
import json
from pathlib import Path
import gradio as gr

# Setup paths
MODEL_PATH = Path("G_777.pth")
CONFIG_PATH = Path("config.json")
BANNER_PATH = Path("assets/banner.png")

# Dynamic speaker loader
speakers = ["quevedo"]
if CONFIG_PATH.exists():
    try:
        with open(CONFIG_PATH, "r", encoding="utf-8") as f:
            config_data = json.load(f)
            if "spk" in config_data:
                speakers = list(config_data["spk"].keys())
    except Exception as e:
        print(f"Error loading speakers from config: {e}")

# Inference function
def convert_voice(input_audio, speaker, transpose, auto_predict_f0, f0_method, noise_scale):
    if input_audio is None:
        return None, "Please upload an audio file or use the microphone."

    input_path = Path(input_audio)
    output_path = input_path.parent / f"{input_path.stem}_quevedo.wav"

    # Lazy import to avoid startup errors if so-vits-svc-fork is not yet installed
    try:
        from so_vits_svc_fork.inference.main import infer
    except ImportError:
        return None, (
            "Error: 'so-vits-svc-fork' is not installed in this environment.\n"
            "Please run: pip install so-vits-svc-fork"
        )

    if not MODEL_PATH.exists():
        return None, f"Error: Model file {MODEL_PATH} not found."
    if not CONFIG_PATH.exists():
        return None, f"Error: Config file {CONFIG_PATH} not found."

    try:
        # Perform inference using the fork's main infer function
        infer(
            input_path=input_path,
            output_path=output_path,
            model_path=MODEL_PATH,
            config_path=CONFIG_PATH,
            recursive=False,
            speaker=speaker,
            transpose=int(transpose),
            auto_predict_f0=bool(auto_predict_f0),
            noise_scale=float(noise_scale),
            f0_method=f0_method
        )
        
        if output_path.exists():
            return str(output_path), "Conversion completed successfully!"
        else:
            return None, "Error: Output file was not generated."
            
    except Exception as e:
        return None, f"Error during inference: {str(e)}"

# Custom CSS for premium styling matching the blue-purple theme
custom_css = """
body {
    background-color: #0b0c10;
}
.gradio-container {
    background-color: #0b0c10 !important;
    font-family: 'Outfit', 'Inter', sans-serif !important;
    max-width: 900px !important;
    margin: 0 auto !important;
    border-radius: 12px;
}
.header-area {
    text-align: center;
    padding: 20px 0;
}
.header-title {
    color: #4f46e5;
    background: linear-gradient(90deg, #818cf8 0%, #c084fc 100%);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    font-weight: 800 !important;
    font-size: 2.5rem !important;
    margin-bottom: 0.5rem;
}
.header-desc {
    color: #9ca3af;
    font-size: 1.1rem;
    margin-bottom: 20px;
}
.main-box {
    background: rgba(17, 24, 39, 0.7);
    border: 1px solid rgba(255, 255, 255, 0.1);
    backdrop-filter: blur(10px);
    border-radius: 16px;
    padding: 20px;
    margin-bottom: 20px;
}
.convert-btn {
    background: linear-gradient(135deg, #6366f1 0%, #a855f7 100%) !important;
    border: none !important;
    color: white !important;
    font-weight: bold !important;
    transition: all 0.3s ease !important;
}
.convert-btn:hover {
    transform: translateY(-2px);
    box-shadow: 0 4px 20px rgba(139, 92, 246, 0.4);
}
"""

# Build Gradio UI
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
    # Banner/Header
    with gr.Row():
        # Fallback to CDN URL if local banner is missing or is just a small Git LFS pointer file
        if BANNER_PATH.exists() and BANNER_PATH.stat().st_size > 5000:
            gr.Image(str(BANNER_PATH), show_label=False, container=False, interactive=False)
        else:
            gr.Image("https://huggingface.co/lagosproject/quevedo/resolve/main/assets/banner.png", show_label=False, container=False, interactive=False)
            
    with gr.Row(elem_classes=["header-area"]):
        gr.HTML(
            "<h1 class='header-title'>🗣️ Quevedo Voice Model (so-vits-svc-fork)</h1>"
            "<p class='header-desc'>Convert any voice or singing file into the voice of the Spanish singer Quevedo.</p>"
        )

    # Main conversion section
    with gr.Row(elem_classes=["main-box"]):
        with gr.Column(scale=1):
            gr.Markdown("### 📥 1. Audio Input")
            input_audio = gr.Audio(
                label="Audio to Convert (Clean Vocals / Acapella)",
                type="filepath",
                sources=["upload", "microphone"]
            )
            
            gr.Markdown("### ⚙️ 2. Conversion Parameters")
            speaker = gr.Dropdown(
                choices=speakers,
                value=speakers[0],
                label="Speaker Name"
            )
            
            transpose = gr.Slider(
                minimum=-12,
                maximum=12,
                value=0,
                step=1,
                label="Pitch Shift (Semitones)",
                info="Increase for female-to-male voices (e.g. -5 to -12), or decrease for male-to-female."
            )
            
            with gr.Accordion("Advanced Options", open=False):
                auto_predict_f0 = gr.Checkbox(
                    value=False,
                    label="Auto Predict F0",
                    info="Recommended for speech/narration. UNCHECK for singing to preserve notes."
                )
                
                f0_method = gr.Dropdown(
                    choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
                    value="crepe",
                    label="F0 Predictor Algorithm",
                    info="crepe offers the best quality but is slower; dio is the fastest."
                )
                
                noise_scale = gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.4,
                    step=0.05,
                    label="Noise Scale",
                    info="Controls pitch variance and expressiveness (0.4 is standard)."
                )

        with gr.Column(scale=1):
            gr.Markdown("### 📤 3. Output Audio")
            output_audio = gr.Audio(
                label="Converted Audio",
                type="filepath"
            )
            
            status_output = gr.Textbox(
                label="Status",
                value="Ready",
                interactive=False
            )
            
            submit_btn = gr.Button(
                "Convert Voice 🚀",
                variant="primary",
                elem_classes=["convert-btn"]
            )
            
            submit_btn.click(
                fn=convert_voice,
                inputs=[input_audio, speaker, transpose, auto_predict_f0, f0_method, noise_scale],
                outputs=[output_audio, status_output]
            )

    # Footer
    gr.HTML(
        "<div style='text-align: center; color: #4b5563; font-size: 0.85rem; padding: 20px 0;'>"
        "This model is for artistic demonstration and research purposes only. "
        "Uses so-vits-svc-fork for inference.<br>"
        "Developed with 💜 for the open voice community.</div>"
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)