"""Anima 2B Image Generation (ZeroGPU) — sd.cpp runtime build"""

import os, re, time, shutil, subprocess
import spaces
import gradio as gr
from huggingface_hub import hf_hub_download

# ---------------------------------------------------------------------------
# sd.cpp を実行時にCUDA付きでコンパイル（初回のみ、アプリディレクトリにキャッシュ）
# ---------------------------------------------------------------------------
SD_CLI     = "/home/user/app/sd_bin"
SD_SRC_DIR = "/tmp/sd_src"
SD_BLD_DIR = "/tmp/sd_build"

def build_sd_cli() -> str:
    if os.path.exists(SD_CLI) and os.access(SD_CLI, os.X_OK):
        print("[init] sd binary already exists, skipping build.")
        return SD_CLI

    print("[init] Cloning stable-diffusion.cpp (with submodules) ...")
    subprocess.run(
        ["git", "clone",
         "--recurse-submodules",
         "--depth=1",
         "--shallow-submodules",
         "https://github.com/leejet/stable-diffusion.cpp",
         SD_SRC_DIR],
        check=True,
    )

    print("[init] CMake configure (CUDA) ...")
    os.makedirs(SD_BLD_DIR, exist_ok=True)
    subprocess.run(
        ["cmake", "-B", SD_BLD_DIR,
         "-DSD_CUBLAS=ON",
         "-DSD_BUILD_EXAMPLES=ON",
         "-DCMAKE_BUILD_TYPE=Release",
         SD_SRC_DIR],
        check=True,
    )

    print("[init] Building ...")
    subprocess.run(
        ["cmake", "--build", SD_BLD_DIR,
         "--config", "Release",
         "-j", str(os.cpu_count() or 4)],
        check=True,
    )

    # ビルド後にバイナリを動的に探索
    result = subprocess.run(
        ["find", SD_BLD_DIR, "-type", "f", "-executable"],
        capture_output=True, text=True,
    )
    candidates = [p.strip() for p in result.stdout.splitlines() if p.strip()]
    print(f"[init] Built executables: {candidates}")

    sd_bin = None
    for priority in ["sd", "sd-server"]:
        for c in candidates:
            if os.path.basename(c) == priority:
                sd_bin = c
                break
        if sd_bin:
            break

    if not sd_bin:
        raise RuntimeError(f"sd binary not found.\nCandidates: {candidates}")

    shutil.copy2(sd_bin, SD_CLI)
    os.chmod(SD_CLI, 0o755)
    print(f"[init] Build complete: {SD_CLI}  (source: {sd_bin})")
    return SD_CLI


def detect_flags(sd_bin: str) -> dict:
    """--help を解析して実際のフラグ名を自動検出する。"""
    res = subprocess.run([sd_bin, "--help"], capture_output=True, text=True)
    help_text = res.stdout + res.stderr

    # 全文をログに出力（デバッグ用）
    print(f"[init] === sd --help ===\n{help_text}\n===================")

    def pick(candidates: list) -> str:
        """help テキストに現れる最初の候補を返す。"""
        for c in candidates:
            if re.search(r'(?:^|\s)' + re.escape(c) + r'(?:\s|,|\]|$)',
                         help_text, re.MULTILINE):
                return c
        return candidates[0]   # 見つからなければ先頭をデフォルトに

    flags = {
        "output":   pick(["--output", "-o", "--out"]),
        "prompt":   pick(["--prompt", "-p"]),
        "neg":      pick(["--negative-prompt", "-n", "--neg-prompt"]),
        "width":    pick(["--width", "-W"]),
        "height":   pick(["--height", "-H"]),
        "seed":     pick(["--seed", "-s"]),
    }
    print(f"[init] Detected flags: {flags}")
    return flags


sd_cli   = build_sd_cli()
SD_FLAGS = detect_flags(sd_cli)   # 起動のたびに --help を解析

# ---------------------------------------------------------------------------
# モデルダウンロード
# ---------------------------------------------------------------------------
MODELS_DIR = "/tmp/anima_models"
LORA_DIR   = "/tmp/loras"
os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(LORA_DIR,   exist_ok=True)

def ensure_model(repo_id: str, filename: str, subdir: str = "") -> str:
    dest = os.path.join(MODELS_DIR, filename)
    if os.path.exists(dest):
        return dest
    print(f"[init] Downloading {repo_id}/{filename} ...")
    src = hf_hub_download(
        repo_id=repo_id,
        filename=f"{subdir}/{filename}" if subdir else filename,
    )
    shutil.copy2(src, dest)
    return dest

print("[init] Ensuring model files ...")
t0 = time.time()
diffusion_path = ensure_model("JusteLeo/Anima2-GGUF",         "anima-preview2_q4_K_M.gguf")
llm_path       = ensure_model("circlestone-labs/Anima",       "qwen_3_06b_base.safetensors", "split_files/text_encoders")
vae_path       = ensure_model("circlestone-labs/Anima",       "qwen_image_vae.safetensors",  "split_files/vae")
_lora_src      = hf_hub_download("Einhorn/Anima-Preview2-Turbo-LoRA",
                                 "anima_preview2_turbo_8step.safetensors")
lora_path      = os.path.join(LORA_DIR, "anima_turbo_8step.safetensors")
if not os.path.exists(lora_path):
    shutil.copy2(_lora_src, lora_path)
print(f"[init] Models ready in {time.time()-t0:.1f}s")

# ---------------------------------------------------------------------------
# ZeroGPU 推論（subprocess + CUDA sd binary）
# ---------------------------------------------------------------------------
from PIL import Image
import tempfile

RESOLUTIONS = ["512x512", "768x768", "1024x1024", "1024x768", "768x1024"]

@spaces.GPU(duration=60)
def generate(prompt: str, negative_prompt: str, resolution: str,
             steps: int, cfg_scale: float, seed: int):
    if not prompt or not prompt.strip():
        raise gr.Error("プロンプトを入力してください。")

    w, h     = (int(x) for x in resolution.split("x"))
    seed_val = int(seed) if int(seed) >= 0 else -1

    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
        output_path = f.name

    f = SD_FLAGS
    cmd = [
        sd_cli,
        "--diffusion-model",    diffusion_path,
        "--llm",                llm_path,
        "--vae",                vae_path,
        "--lora-model-dir",     LORA_DIR,
        f["prompt"],            f"<lora:anima_turbo_8step:1.0> {prompt}",
        f["neg"],               negative_prompt or "",
        f["width"],             str(w),
        f["height"],            str(h),
        "--steps",              str(int(steps)),
        "--cfg-scale",          str(float(cfg_scale)),
        "--sampling-method",    "euler",
        f["output"],            output_path,
        "--diffusion-fa",
        "--vae-tiling",
        "-v",
    ]
    if seed_val >= 0:
        cmd += [f["seed"], str(seed_val)]

    print(f"[gen] cmd: {' '.join(cmd)}")
    t0 = time.time()
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=1800)
        elapsed = time.time() - t0

        if result.returncode != 0:
            # デバッグ用に stdout/stderr を全出力
            print(f"[gen] stdout:\n{result.stdout}")
            print(f"[gen] stderr:\n{result.stderr}")
            err = result.stderr[-800:] if result.stderr else "Unknown error"
            raise gr.Error(f"sd failed (code {result.returncode}): {err}")

        if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
            raise gr.Error("画像の生成に失敗しました。")

        img    = Image.open(output_path)
        status = f"Generated in {elapsed:.1f}s ({w}×{h}, {steps} steps, cfg {cfg_scale})"
        print(f"[gen] {status}")
        return img, status

    except subprocess.TimeoutExpired:
        raise gr.Error("タイムアウト（30分制限）")
    except gr.Error:
        raise
    except Exception as e:
        raise gr.Error(f"エラー: {e}")

# ---------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------
with gr.Blocks(title="Anima 2B (ZeroGPU)", theme="NoCrypt/miku") as demo:
    gr.Markdown(
        "# Anima 2B Image Generation (ZeroGPU)\n"
        "Generate anime images with [Anima 2B](https://huggingface.co/circlestone-labs/Anima) "
        "+ [Turbo LoRA](https://huggingface.co/Einhorn/Anima-Preview2-Turbo-LoRA) (8 steps). "
        "Running on **ZeroGPU (H200)**."
    )
    with gr.Row():
        with gr.Column():
            prompt_input = gr.Textbox(
                label="Prompt", lines=3,
                placeholder="anime girl with silver hair, fantasy armor, dramatic lighting",
            )
            neg_input = gr.Textbox(
                label="Negative Prompt", lines=2,
                value="lowres, bad anatomy, bad hands, text, error, worst quality, blurry, censored",
            )
            res_input   = gr.Dropdown(choices=RESOLUTIONS, value="512x512", label="Resolution")
            with gr.Row():
                steps_input = gr.Slider(minimum=4, maximum=30, value=8,   step=1,   label="Steps")
                cfg_input   = gr.Slider(minimum=1.0, maximum=10.0, value=1.0, step=0.5, label="CFG Scale")
                seed_input  = gr.Number(value=-1, label="Seed", precision=0)
            gen_btn = gr.Button("Generate", variant="primary", size="lg")
        with gr.Column():
            output_img = gr.Image(type="pil", label="Output")
            status_box = gr.Textbox(label="Status", interactive=False)

    gen_btn.click(
        fn=generate,
        inputs=[prompt_input, neg_input, res_input, steps_input, cfg_input, seed_input],
        outputs=[output_img, status_box],
    )
    gr.Markdown(
        "---\n"
        "Anima 2B Q4_K_M + Turbo LoRA (8 steps) | "
        "[Model](https://huggingface.co/circlestone-labs/Anima) | "
        "[sd.cpp](https://github.com/leejet/stable-diffusion.cpp)"
    )

demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)