"""
RVC Voice Conversion – HuggingFace Space

Simple, fast, GPU/CPU auto-detected.
"""
from __future__ import annotations

import os
import subprocess
import tempfile
import shutil
from pathlib import Path

import gradio as gr
import numpy as np

from lib.config import (
    BUILTIN_MODELS,
    CSS,
    DEVICE_LABEL,
    MAX_INPUT_DURATION,
    logger,
)
from lib.jobs import (
    get_jobs_table,
    get_queue_info,
    poll_job,
    submit_job,
)
from lib.models import list_models, startup_downloads
from lib.ui import refresh_models, toggle_autotune, upload_model

# ── Startup ───────────────────────────────────────────────────────────────────
startup_status = ""
default_model = ""
try:
    default_model = startup_downloads()
    startup_status = f"✅ Ready &nbsp;·&nbsp; {DEVICE_LABEL}"
except Exception as e:
    startup_status = f"⚠️ Some assets unavailable: {e} &nbsp;·&nbsp; {DEVICE_LABEL}"
    logger.warning("Startup download issue: %s", e)

initial_models = list_models()
initial_value = default_model if default_model in initial_models else (
    initial_models[0] if initial_models else None
)


# ── Função para processar vídeo ───────────────────────────────────────────────
def process_video(video_file, model, pitch, f0_method, index_rate, protect, vol_env,
                  clean, clean_strength, split, autotune, autotune_strength,
                  filter_radius, output_format, reverb, reverb_room, reverb_damp, reverb_wet):
    """Processa vídeo e retorna ZIP com 5 outputs."""
    if video_file is None:
        return None, "No video file provided"
    
    # Criar diretório temporário
    output_dir = Path(tempfile.mkdtemp())
    
    # Extrair áudio
    audio_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
    cmd = ["ffmpeg", "-i", video_file, "-q:a", "0", "-map", "a", "-y", audio_path]
    subprocess.run(cmd, check=True, capture_output=True)
    
    # Salvar entrada.wav
    entrada_wav = output_dir / "entrada.wav"
    shutil.copy2(audio_path, entrada_wav)
    
    # Criar versões (simplificado)
    entrada_acapella = output_dir / "entrada_acapella.wav"
    entrada_instrumental = output_dir / "entrada_instrumental.wav"
    shutil.copy2(audio_path, entrada_acapella)
    
    import soundfile as sf
    data, sr = sf.read(audio_path)
    silent = np.zeros_like(data)
    sf.write(entrada_instrumental, silent, sr)
    
    # Converter com RVC
    status, converted = submit_job(
        None, str(entrada_acapella), model, pitch, f0_method,
        index_rate, protect, vol_env, clean, clean_strength,
        split, autotune, autotune_strength, filter_radius,
        output_format, reverb, reverb_room, reverb_damp, reverb_wet
    )
    
    if not converted:
        return None, f"Conversion failed: {status}"
    
    # Salvar outputs
    saida_acapella = output_dir / "saida_acapella.wav"
    shutil.copy2(converted, saida_acapella)
    
    # Mixar
    rvc_audio, rvc_sr = sf.read(saida_acapella)
    inst_audio, inst_sr = sf.read(entrada_instrumental)
    if rvc_sr != inst_sr:
        from scipy import signal
        inst_audio = signal.resample(inst_audio, int(len(inst_audio) * rvc_sr / inst_sr))
    min_len = min(len(rvc_audio), len(inst_audio))
    mixed = rvc_audio[:min_len] + inst_audio[:min_len]
    mixed = mixed * (0.95 / max(np.abs(mixed))) if max(np.abs(mixed)) > 0.95 else mixed
    saida_wav = output_dir / "saida.wav"
    sf.write(saida_wav, mixed, rvc_sr)
    
    # Criar ZIP
    zip_path = output_dir / "outputs.zip"
    import zipfile
    with zipfile.ZipFile(zip_path, 'w') as z:
        z.write(saida_acapella, "saida_acapella.wav")
        z.write(saida_wav, "saida.wav")
        z.write(entrada_acapella, "entrada_acapella.wav")
        z.write(entrada_wav, "entrada.wav")
        z.write(entrada_instrumental, "entrada_instrumental.wav")
    
    return str(zip_path), "✅ Conversion complete! ZIP with 5 files ready."


# ── Função para refresh completo dos modelos ──────────────────────────────────
def refresh_all_models():
    """Atualiza todos os dropdowns de modelos."""
    models = list_models()
    models_table_data = [[m] for m in models]
    first_model = models[0] if models else None
    return models_table_data, gr.Dropdown(choices=models, value=first_model), gr.Dropdown(choices=models, value=first_model)


# ── Gradio UI ─────────────────────────────────────────────────────────────────
with gr.Blocks(title="RVC Voice Conversion", delete_cache=(3600, 3600)) as demo:

    gr.HTML(f"""
    <div id="header">
        <h1>🎙️ RVC Voice Conversion</h1>
        <p>Retrieval-Based Voice Conversion · record or upload · custom models · GPU/CPU auto</p>
    </div>
    <p id="status">{startup_status}</p>
    """)

    with gr.Tabs():

        # ── TAB 1: Convert ────────────────────────────────────────────────────
        with gr.Tab("🎤 Convert"):
            with gr.Row():

                with gr.Column(scale=1):
                    gr.Markdown("### 🔊 Input Audio")
                    with gr.Tabs():
                        with gr.Tab("🎙️ Microphone"):
                            inp_mic = gr.Audio(
                                sources=["microphone"],
                                type="filepath",
                                label="Record",
                            )
                        with gr.Tab("📁 Upload File"):
                            inp_file = gr.Audio(
                                sources=["upload"],
                                type="filepath",
                                label="Upload audio (wav / mp3 / flac / ogg …)",
                            )

                    gr.Markdown("### 🤖 Model")
                    model_dd = gr.Dropdown(
                        choices=initial_models,
                        value=initial_value,
                        label="Active Voice Model",
                        interactive=True,
                    )

                    gr.Markdown("### 🎚️ Basic Settings")
                    pitch_sl = gr.Slider(
                        minimum=-24, maximum=24, value=0, step=1,
                        label="Pitch Shift (semitones)",
                        info="0 = unchanged · positive = higher · negative = lower",
                    )
                    f0_radio = gr.Radio(
                        choices=["rmvpe", "fcpe", "crepe", "crepe-tiny"],
                        value="rmvpe",
                        label="Pitch Extraction Method",
                        info="rmvpe = fastest & accurate · crepe = highest quality (slower)",
                    )

                with gr.Column(scale=1):
                    gr.Markdown("### ⚙️ Advanced Settings")
                    with gr.Accordion("Expand advanced options", open=False):
                        index_rate_sl = gr.Slider(
                            0.0, 1.0, value=0.75, step=0.05,
                            label="Index Rate",
                            info="How strongly the FAISS index influences timbre (0 = off)",
                        )
                        protect_sl = gr.Slider(
                            0.0, 0.5, value=0.5, step=0.01,
                            label="Protect Consonants",
                            info="Protects unvoiced consonants — 0.5 = max protection",
                        )
                        filter_radius_sl = gr.Slider(
                            0, 7, value=3, step=1,
                            label="Respiration Filter Radius",
                            info="Median filter on pitch — higher = smoother, reduces breath noise",
                        )
                        vol_env_sl = gr.Slider(
                            0.0, 1.0, value=0.25, step=0.05,
                            label="Volume Envelope Mix",
                            info="0.25 = natural blend · 1 = preserve input loudness · 0 = model output",
                        )
                        with gr.Row():
                            clean_cb = gr.Checkbox(value=False, label="Noise Reduction")
                            clean_sl = gr.Slider(
                                0.0, 1.0, value=0.5, step=0.05,
                                label="Reduction Strength",
                            )
                        with gr.Row():
                            split_cb = gr.Checkbox(value=False, label="Split Long Audio")
                            autotune_cb = gr.Checkbox(value=False, label="Autotune")
                            autotune_sl = gr.Slider(
                                0.0, 1.0, value=1.0, step=0.05,
                                label="Autotune Strength",
                                visible=False,
                            )
                            autotune_cb.change(
                                fn=toggle_autotune,
                                inputs=autotune_cb,
                                outputs=autotune_sl,
                            )

                    gr.Markdown("**🎛️ Reverb**")
                    reverb_cb = gr.Checkbox(value=False, label="Enable Reverb")
                    with gr.Group(visible=False) as reverb_group:
                        reverb_room_sl = gr.Slider(
                            0.0, 1.0, value=0.15, step=0.05,
                            label="Room Size",
                            info="Larger = bigger sounding space",
                        )
                        reverb_damp_sl = gr.Slider(
                            0.0, 1.0, value=0.7, step=0.05,
                            label="Damping",
                            info="Higher = more absorption, less echo tail",
                        )
                        reverb_wet_sl = gr.Slider(
                            0.0, 1.0, value=0.15, step=0.05,
                            label="Wet Level",
                            info="How much reverb is mixed in (0.15 = subtle)",
                        )
                    reverb_cb.change(
                        fn=lambda v: gr.update(visible=v),
                        inputs=reverb_cb,
                        outputs=reverb_group,
                    )

                    fmt_radio = gr.Radio(
                        choices=["WAV", "MP3", "FLAC", "OPUS"],
                        value="WAV",
                        label="Output Format",
                        info="OPUS = small file (~64 kbps, Telegram/Discord quality)",
                    )
                    convert_btn = gr.Button(
                        "🚀 Convert Voice",
                        variant="primary",
                    )

                    gr.Markdown("### 🎧 Output")
                    out_status = gr.Markdown(value="")
                    out_audio = gr.Audio(label="Result (if still on page)", type="filepath", interactive=False)

                    gr.Markdown("#### 🔍 Check Job Status")
                    with gr.Row():
                        job_id_box = gr.Textbox(
                            label="Job ID",
                            placeholder="e.g. a3f2b1c9",
                            scale=3,
                        )
                        poll_btn = gr.Button("🔄 Check", scale=1)
                    poll_status = gr.Markdown(value="")
                    poll_audio = gr.Audio(label="Result", type="filepath", interactive=False)

        # ── TAB 2: Video Convert ────────────────────────────────────────────────
        with gr.Tab("🎬 Video Convert"):
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### 🎥 Input Video")
                    video_file = gr.Video(label="Upload MP4 Video", sources=["upload"], format="mp4")
                    
                    gr.Markdown("### 🤖 Model")
                    video_model = gr.Dropdown(
                        choices=initial_models,
                        value=initial_value,
                        label="Active Voice Model",
                        interactive=True,
                    )
                    
                    gr.Markdown("### 🎚️ Basic Settings")
                    video_pitch = gr.Slider(-24, 24, value=0, step=1, label="Pitch Shift")
                    video_f0 = gr.Radio(["rmvpe", "fcpe", "crepe", "crepe-tiny"], value="rmvpe", label="Pitch Method")
                    
                with gr.Column(scale=1):
                    gr.Markdown("### ⚙️ Advanced Settings")
                    with gr.Accordion("Expand", open=False):
                        video_index = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Index Rate")
                        video_protect = gr.Slider(0.0, 0.5, value=0.5, step=0.01, label="Protect Consonants")
                        video_filter = gr.Slider(0, 7, value=3, step=1, label="Filter Radius")
                        video_vol = gr.Slider(0.0, 1.0, value=0.25, step=0.05, label="Volume Envelope")
                        video_clean = gr.Checkbox(value=False, label="Noise Reduction")
                        video_clean_strength = gr.Slider(0.0, 1.0, value=0.5, step=0.05, label="Clean Strength")
                        video_split = gr.Checkbox(value=False, label="Split Long Audio")
                        video_autotune = gr.Checkbox(value=False, label="Autotune")
                        video_autotune_strength = gr.Slider(0.0, 1.0, value=1.0, step=0.05, label="Autotune Strength")
                    
                    gr.Markdown("**🎛️ Reverb**")
                    video_reverb = gr.Checkbox(value=False, label="Enable Reverb")
                    video_reverb_room = gr.Slider(0.0, 1.0, value=0.15, step=0.05, label="Room Size")
                    video_reverb_damp = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Damping")
                    video_reverb_wet = gr.Slider(0.0, 1.0, value=0.15, step=0.05, label="Wet Level")
                    
                    video_format = gr.Radio(["WAV", "MP3", "FLAC", "OPUS"], value="WAV", label="Output Format")
                    video_convert_btn = gr.Button("🎬 Convert Video", variant="primary")
                    
                    gr.Markdown("### 📦 Output")
                    video_status = gr.Markdown(value="")
                    video_output = gr.File(label="Download ZIP (5 audio files)", type="filepath")

        # ── TAB 3: Models ─────────────────────────────────────────────────────
        with gr.Tab("📦 Models"):
            gr.Markdown("""
            ### Upload a Custom RVC Model
            Provide a **`.zip`** containing:
            - **`model.pth`** — weights (required)
            - **`model.index`** — FAISS index (optional, improves voice matching)

            **Built-in models** (pre-downloaded on startup):
            Vestia Zeta v1 · Vestia Zeta v2 · Ayunda Risu · Gawr Gura
            """)
            with gr.Row():
                with gr.Column(scale=1):
                    up_zip = gr.File(label="Model ZIP", file_types=[".zip"])
                    up_name = gr.Textbox(
                        label="Model Name",
                        placeholder="Leave blank to use zip filename",
                    )
                    up_btn = gr.Button("📤 Load Model", variant="primary")
                    up_status = gr.Textbox(label="Status", interactive=False, lines=2)
                with gr.Column(scale=1):
                    gr.Markdown("### Loaded Models")
                    models_table = gr.Dataframe(
                        col_count=(1, "fixed"),
                        value=[[m] for m in initial_models],
                        interactive=False,
                        label="",
                    )
                    refresh_btn = gr.Button("🔄 Refresh")

            up_btn.click(
                fn=upload_model,
                inputs=[up_zip, up_name],
                outputs=[up_status, model_dd, models_table],
            )
            
            # Refresh button atualiza todos os modelos (incluindo o video_model)
            refresh_btn.click(
                fn=refresh_all_models,
                outputs=[models_table, model_dd, video_model],
            )

        # ── TAB 4: Jobs ───────────────────────────────────────────────────────
        with gr.Tab("📋 Jobs"):
            gr.Markdown("All submitted jobs, newest first. Click **Refresh** to update.")
            queue_status = gr.Markdown(value=get_queue_info, every=10)
            jobs_table = gr.Dataframe(
                headers=["Job ID", "Model", "Status", "Time", "Download"],
                col_count=(5, "fixed"),
                value=get_jobs_table,
                interactive=False,
                wrap=True,
                datatype=["str", "str", "str", "str", "markdown"],
                every=10,
            )
            refresh_jobs_btn = gr.Button("🔄 Refresh")

            def _refresh_jobs():
                return get_queue_info(), get_jobs_table()

            refresh_jobs_btn.click(fn=_refresh_jobs, outputs=[queue_status, jobs_table])

        # ── TAB 5: Help ───────────────────────────────────────────────────────
        with gr.Tab("ℹ️ Help"):
            gr.Markdown(f"""
            ## How it works
            RVC (Retrieval-Based Voice Conversion) transforms a voice recording to sound
            like a target speaker using only that speaker's model file.

            ---

            ## Quick Guide
            1. Open the **Convert** tab
            2. **Record** via microphone or **upload** an audio file (wav, mp3, flac, ogg …)
            3. Choose a **model** from the dropdown — 4 models are pre-loaded on startup
            4. Set **Pitch Shift** if needed (e.g. male → female: try +12 semitones)
            5. Click **🚀 Convert Voice** and wait for the result

            ---

            ## Built-in Models
            | Model | Description |
            |---|---|
            | **Vestia Zeta v1** | Hololive ID VTuber, v1 model |
            | **Vestia Zeta v2** | Hololive ID VTuber, v2 model (recommended) |
            | **Ayunda Risu** | Hololive ID VTuber |
            | **Gawr Gura** | Hololive EN VTuber |

            ---

            ## Pitch Extraction Methods
            | Method | Speed | Quality | Best for |
            |---|---|---|---|
            | **rmvpe** | ⚡⚡⚡ | ★★★★ | General use (default) |
            | **fcpe** | ⚡⚡ | ★★★★ | Singing |
            | **crepe** | ⚡ | ★★★★★ | Highest quality, slow |
            | **crepe-tiny** | ⚡⚡ | ★★★ | Low resource |

            ---

            ## Advanced Settings
            | Setting | Description |
            |---|---|
            | **Index Rate** | Influence of FAISS index on output timbre (0.75 recommended) |
            | **Protect Consonants** | Prevents artefacts on consonants (0.5 = max) |
            | **Respiration Filter Radius** | Smooths pitch curve — higher reduces breath noise (0–7, default 3) |
            | **Volume Envelope Mix** | 0.25 = natural blend · 1 = preserve input loudness |
            | **Noise Reduction** | Removes background noise before conversion |
            | **Split Long Audio** | Chunks audio for recordings > 60 s |
            | **Autotune** | Snaps pitch to nearest musical note |

            ---

            ## Output Formats
            | Format | Size | Quality |
            |---|---|---|
            | **WAV** | Large | Lossless |
            | **FLAC** | Medium | Lossless compressed |
            | **MP3** | Small | Lossy |
            | **OPUS** | Tiny (~64 kbps) | Telegram/Discord quality |

            ---

            **Device:** `{DEVICE_LABEL}`
            **Max input duration:** {MAX_INPUT_DURATION // 60} minutes

            ---

            ## Credits
            Engine: [Ultimate RVC](https://github.com/JackismyShephard/ultimate-rvc)
            """)

    # Wire convert button after all tabs
    def _submit_and_extract_id(*args):
        import re
        status, audio = submit_job(*args)
        match = re.search(r"[a-f0-9]{8}", status or "")
        job_id = match.group(0) if match else ""
        return status, audio, job_id, get_queue_info(), get_jobs_table()

    convert_btn.click(
        fn=_submit_and_extract_id,
        inputs=[
            inp_mic, inp_file, model_dd,
            pitch_sl, f0_radio,
            index_rate_sl, protect_sl, vol_env_sl,
            clean_cb, clean_sl,
            split_cb, autotune_cb, autotune_sl,
            filter_radius_sl,
            fmt_radio,
            reverb_cb, reverb_room_sl, reverb_damp_sl, reverb_wet_sl,
        ],
        outputs=[out_status, out_audio, job_id_box, queue_status, jobs_table],
    )

    def _poll_and_refresh(job_id):
        status, file = poll_job(job_id)
        return status, file, get_queue_info(), get_jobs_table()

    poll_btn.click(
        fn=_poll_and_refresh,
        inputs=[job_id_box],
        outputs=[poll_status, poll_audio, queue_status, jobs_table],
    )
    
    # Video convert
    video_convert_btn.click(
        fn=process_video,
        inputs=[
            video_file, video_model, video_pitch, video_f0,
            video_index, video_protect, video_vol,
            video_clean, video_clean_strength,
            video_split, video_autotune, video_autotune_strength,
            video_filter, video_format,
            video_reverb, video_reverb_room, video_reverb_damp, video_reverb_wet
        ],
        outputs=[video_output, video_status]
    )


# ── Launch ────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
    demo.queue(default_concurrency_limit=5)
    demo.launch(
        server_name="0.0.0.0",
        server_port=int(os.getenv("PORT", 7860)),
        max_threads=10,
        ssr_mode=False,
        css=CSS,
    )