import os
import gc
import gradio as gr
import numpy as np
import spaces
import torch
import random
from PIL import Image
from typing import Iterable
from gradio.themes import Soft
from gradio.themes.utils import colors, fonts, sizes

# ═══════════════════════════════════════════════════════════════════════
#  THEME
# ═══════════════════════════════════════════════════════════════════════

colors.fire_red = colors.Color(
    name="fire_red",
    c50="#FFF5F0",
    c100="#FFE8DB",
    c200="#FFD0B5",
    c300="#FFB088",
    c400="#FF8C5A",
    c500="#FF6B35",
    c600="#E8531F",
    c700="#CC4317",
    c800="#A63812",
    c900="#80300F",
    c950="#5C220A",
)


class FireRedTheme(Soft):
    def __init__(
        self,
        *,
        primary_hue: colors.Color | str = colors.gray,
        secondary_hue: colors.Color | str = colors.fire_red,
        neutral_hue: colors.Color | str = colors.slate,
        text_size: sizes.Size | str = sizes.text_md,
        font: fonts.Font | str | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("Inter"),
            "system-ui",
            "sans-serif",
        ),
        font_mono: fonts.Font | str | Iterable[fonts.Font | str] = (
            fonts.GoogleFont("JetBrains Mono"),
            "ui-monospace",
            "monospace",
        ),
    ):
        super().__init__(
            primary_hue=primary_hue,
            secondary_hue=secondary_hue,
            neutral_hue=neutral_hue,
            text_size=text_size,
            font=font,
            font_mono=font_mono,
        )
        super().set(
            body_background_fill="#f0f2f6",
            body_background_fill_dark="*neutral_950",
            background_fill_primary="white",
            background_fill_primary_dark="*neutral_900",
            block_background_fill="white",
            block_background_fill_dark="*neutral_800",
            block_border_width="1px",
            block_border_color="*neutral_200",
            block_border_color_dark="*neutral_700",
            block_shadow="0 1px 4px rgba(0,0,0,0.05)",
            block_shadow_dark="0 1px 4px rgba(0,0,0,0.25)",
            block_title_text_weight="600",
            block_label_background_fill="*neutral_50",
            block_label_background_fill_dark="*neutral_800",
            button_primary_text_color="white",
            button_primary_text_color_hover="white",
            button_primary_background_fill="linear-gradient(135deg, *secondary_500, *secondary_600)",
            button_primary_background_fill_hover="linear-gradient(135deg, *secondary_600, *secondary_700)",
            button_primary_background_fill_dark="linear-gradient(135deg, *secondary_500, *secondary_600)",
            button_primary_background_fill_hover_dark="linear-gradient(135deg, *secondary_600, *secondary_700)",
            button_primary_shadow="0 4px 14px rgba(232, 83, 31, 0.25)",
            button_secondary_text_color="*secondary_700",
            button_secondary_text_color_dark="*secondary_300",
            button_secondary_background_fill="*secondary_50",
            button_secondary_background_fill_hover="*secondary_100",
            button_secondary_background_fill_dark="rgba(255, 107, 53, 0.1)",
            button_secondary_background_fill_hover_dark="rgba(255, 107, 53, 0.2)",
            button_large_padding="12px 24px",
            slider_color="*secondary_500",
            slider_color_dark="*secondary_500",
            input_border_color_focus="*secondary_400",
            input_border_color_focus_dark="*secondary_500",
            color_accent_soft="*secondary_50",
            color_accent_soft_dark="rgba(255, 107, 53, 0.15)",
        )


theme = FireRedTheme()

# ═══════════════════════════════════════════════════════════════════════
#  GLOBAL CUDA OPTIMIZATIONS
# ═══════════════════════════════════════════════════════════════════════

# Enable cuDNN autotuner — finds the fastest convolution algorithms for
# the hardware and input sizes after a short warm-up.
torch.backends.cudnn.benchmark = True

# Allow TF32 on Ampere+ GPUs for ~3× faster matmuls with negligible
# precision loss (already bf16 pipeline, so this is free perf).
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
torch.set_float32_matmul_precision("high")

# ═══════════════════════════════════════════════════════════════════════
#  MODEL
# ═══════════════════════════════════════════════════════════════════════

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("CUDA_VISIBLE_DEVICES =", os.environ.get("CUDA_VISIBLE_DEVICES"))
print("torch.__version__    =", torch.__version__)
print("device               =", device)

from diffusers import FlowMatchEulerDiscreteScheduler
from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3

dtype = torch.bfloat16

# Load transformer separately so we can optimise it before plugging in
transformer = QwenImageTransformer2DModel.from_pretrained(
    "prithivMLmods/Qwen-Image-Edit-Rapid-AIO-V23",
    torch_dtype=dtype,
    device_map="cuda",
)

# Attempt torch.compile for a fused-kernel speed-up on the denoising
# backbone. Falls back gracefully if the environment doesn't support it
# (older driver / torch version / dynamic-shape issues).
try:
    transformer = torch.compile(transformer, mode="reduce-overhead")
    print("torch.compile applied to transformer (reduce-overhead).")
except Exception as e:
    print(f"torch.compile skipped: {e}")

pipe = QwenImageEditPlusPipeline.from_pretrained(
    "FireRedTeam/FireRed-Image-Edit-1.1",
    transformer=transformer,
    torch_dtype=dtype,
).to(device)

# Flash Attention 3 processor — fastest path when available
try:
    pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
    print("Flash Attention 3 Processor set successfully.")
except Exception as e:
    print(f"Warning: Could not set FA3 processor: {e}")

# VAE optimisations — process large images in tiles / slices so we
# never OOM on the decode step, and still stay fast for normal sizes.
try:
    pipe.vae.enable_tiling()
    print("VAE tiling enabled.")
except Exception:
    pass

try:
    pipe.vae.enable_slicing()
    print("VAE slicing enabled.")
except Exception:
    pass

# ── Warmup pass ─────────────────────────────────────────────────────
# The first inference is always slower (CUDA context init, cuDNN
# autotuner, torch.compile tracing). Run a tiny dummy forward so that
# cost is paid at startup, not on the first user request.
print("Running warmup inference …")
try:
    _warmup_img = Image.new("RGB", (64, 64), color=(128, 128, 128))
    _warmup_gen = torch.Generator(device=device).manual_seed(0)
    with torch.inference_mode():
        pipe(
            image=[_warmup_img],
            prompt="warmup",
            negative_prompt="",
            height=64,
            width=64,
            num_inference_steps=1,
            generator=_warmup_gen,
            true_cfg_scale=1.0,
        )
    del _warmup_img, _warmup_gen
    gc.collect()
    torch.cuda.empty_cache()
    print("Warmup complete.")
except Exception as e:
    print(f"Warmup skipped: {e}")

MAX_SEED = np.iinfo(np.int32).max

DEFAULT_NEGATIVE_PROMPT = (
    "worst quality, low quality, bad anatomy, bad hands, text, error, "
    "missing fingers, extra digit, fewer digits, cropped, jpeg artifacts, "
    "signature, watermark, username, blurry"
)

# ═══════════════════════════════════════════════════════════════════════
#  HELPERS
# ═══════════════════════════════════════════════════════════════════════

def update_dimensions_on_upload(image):
    if image is None:
        return 1024, 1024
    w, h = image.size
    if w > h:
        nw, nh = 1024, int(1024 * h / w)
    else:
        nh, nw = 1024, int(1024 * w / h)
    return (nw // 8) * 8, (nh // 8) * 8


def format_seed(seed_val):
    return f"{int(seed_val)}"


def format_info(seed_val, images):
    if images:
        try:
            first = images[0]
            path = first[0] if isinstance(first, (tuple, list)) else first
            if isinstance(path, str):
                im = Image.open(path)
            elif isinstance(path, Image.Image):
                im = path
            else:
                im = Image.open(path.name)
            ow, oh = im.size
            nw, nh = update_dimensions_on_upload(im)
            return (
                f"**Seed:** `{int(seed_val)}`\n\n"
                f"**Original:** {ow}×{oh} → **Output:** {nw}×{nh}"
            )
        except Exception:
            pass
    return f"**Seed:** `{int(seed_val)}`"


# ═══════════════════════════════════════════════════════════════════════
#  INFERENCE
# ═══════════════════════════════════════════════════════════════════════

@spaces.GPU
def infer(
    images, prompt, negative_prompt,
    seed, randomize_seed, guidance_scale, steps,
    progress=gr.Progress(track_tqdm=True),
):
    # ── Input validation (cheap, do first) ──────────────────────────
    if not images:
        raise gr.Error("⚠️  Please upload at least one image.")
    if not prompt or not prompt.strip():
        raise gr.Error("⚠️  Please enter an edit prompt.")

    pil_images = []
    for item in images:
        try:
            path = item[0] if isinstance(item, (tuple, list)) else item
            if isinstance(path, str):
                pil_images.append(Image.open(path).convert("RGB"))
            elif isinstance(path, Image.Image):
                pil_images.append(path.convert("RGB"))
            else:
                pil_images.append(Image.open(path.name).convert("RGB"))
        except Exception as e:
            print(f"Skipping invalid image: {e}")

    if not pil_images:
        raise gr.Error("⚠️  Could not process uploaded images.")

    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    generator = torch.Generator(device=device).manual_seed(seed)
    width, height = update_dimensions_on_upload(pil_images[0])

    try:
        # torch.inference_mode is strictly faster than torch.no_grad —
        # it also disables view-tracking and version-counter bumps.
        with torch.inference_mode():
            result = pipe(
                image=pil_images,
                prompt=prompt,
                negative_prompt=negative_prompt,
                height=height,
                width=width,
                num_inference_steps=steps,
                generator=generator,
                true_cfg_scale=guidance_scale,
            ).images[0]
        return result, seed
    finally:
        # GC *after* inference to reclaim any temporaries the pipeline
        # allocated. Avoid gc.collect() + empty_cache() *before*
        # inference — that stalls the CUDA stream for nothing.
        gc.collect()
        torch.cuda.empty_cache()


@spaces.GPU
def infer_example(images, prompt):
    if not images:
        return None, 0
    images_list = [images] if isinstance(images, str) else images
    return infer(images_list, prompt, DEFAULT_NEGATIVE_PROMPT, 0, True, 1.0, 4)


# ═══════════════════════════════════════════════════════════════════════
#  PROMPT SUGGESTIONS
# ═══════════════════════════════════════════════════════════════════════

SUGGESTIONS = [
    "Transform into anime style",
    "Convert to oil painting",
    "Add dramatic sunset lighting",
    "Make it a pencil sketch",
    "Apply cyberpunk neon aesthetic",
    "Add snow and winter vibes",
    "Turn into watercolor art",
    "Make it look vintage 1970s",
]

# ═══════════════════════════════════════════════════════════════════════
#  CSS
# ═══════════════════════════════════════════════════════════════════════

css = """
/* ── Container ─────────────────────────────────────────────── */
#col-container {
    margin: 0 auto;
    max-width: 1120px;
}

/* ── Header ────────────────────────────────────────────────── */
.hdr {
    text-align: center;
    padding: 38px 28px 30px;
    background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
    border-radius: 20px;
    margin-bottom: 22px;
    border: 1px solid rgba(255,107,53,.15);
    box-shadow: 0 12px 44px rgba(0,0,0,.10);
    position: relative;
    overflow: hidden;
}
.hdr::before {
    content: "";
    position: absolute; inset: 0;
    background:
        radial-gradient(ellipse at 25% 50%, rgba(255,107,53,.07) 0%, transparent 60%),
        radial-gradient(ellipse at 80% 25%, rgba(255,140,90,.05) 0%, transparent 50%);
    pointer-events: none;
}
.hdr > * { position: relative; z-index: 1; }
.hdr h1 {
    font-size: 2.6em; font-weight: 800;
    background: linear-gradient(135deg, #FF8C5A, #FF6B35, #FF4500);
    -webkit-background-clip: text; -webkit-text-fill-color: transparent;
    background-clip: text;
    margin: 0 0 8px; letter-spacing: -.02em; line-height: 1.15;
}
.hdr .sub {
    color: #94a3b8; font-size: 1.05em; margin: 0 0 16px; line-height: 1.55;
}
.hdr .sub a {
    color: #FF8C5A; text-decoration: none;
    border-bottom: 1px solid rgba(255,140,90,.3);
    transition: border-color .2s;
}
.hdr .sub a:hover { border-bottom-color: #FF8C5A; }
.badges { display: flex; justify-content: center; gap: 8px; flex-wrap: wrap; }
.bdg {
    background: rgba(255,107,53,.12); color: #FFB088;
    padding: 5px 14px; border-radius: 100px;
    font-size: .82em; font-weight: 500;
    border: 1px solid rgba(255,107,53,.18);
}

/* ── Section Label ─────────────────────────────────────────── */
.stl {
    font-size: .92em; font-weight: 700; color: #475569;
    margin: 0 0 6px; display: flex; align-items: center; gap: 6px;
}
.dark .stl { color: #cbd5e1; }

/* ── Generate Button ───────────────────────────────────────── */
#gen-btn {
    margin-top: 14px !important;
    font-size: 1.1em !important; font-weight: 700 !important;
    padding: 14px 28px !important; border-radius: 14px !important;
    letter-spacing: .3px;
    transition: all .25s cubic-bezier(.4,0,.2,1) !important;
    min-height: 52px !important;
}
#gen-btn:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 8px 28px rgba(232,83,31,.40) !important;
}
#gen-btn:active { transform: translateY(0) !important; }

/* ── Clear Button ──────────────────────────────────────────── */
#clear-btn {
    min-height: 52px !important;
    margin-top: 14px !important;
    border-radius: 14px !important;
    font-weight: 600 !important;
}

/* ── Prompt Chip Row ───────────────────────────────────────── */
.chip-row { gap: 6px !important; margin-top: 2px !important; }
.chip-btn {
    font-size: .78em !important; padding: 5px 13px !important;
    border-radius: 100px !important; min-width: 0 !important;
    font-weight: 500 !important; white-space: nowrap !important;
    transition: all .2s ease !important;
}
.chip-btn:hover { transform: translateY(-1px) !important; }

/* ── Output Image ──────────────────────────────────────────── */
#output-img { border-radius: 14px !important; overflow: hidden; }

/* ── Info Box ──────────────────────────────────────────────── */
#info-box {
    margin-top: 6px !important;
    border-radius: 12px !important;
}
#info-box .prose {
    font-family: 'JetBrains Mono', monospace;
    font-size: .88em;
}

/* ── Tips ──────────────────────────────────────────────────── */
.tips {
    background: linear-gradient(135deg, #FFF5F0, #FFE8DB);
    border: 1px solid #FFD0B5; border-radius: 14px;
    padding: 18px 24px; margin-top: 14px;
}
.tips h4 { margin: 0 0 10px; font-size: .95em; color: #A63812; }
.tips ul {
    margin: 0; padding: 0 0 0 20px;
    color: #80300F; font-size: .85em; line-height: 1.75;
}
.tips li { margin-bottom: 2px; }
.tips li::marker { color: #FF6B35; }
.tips strong { color: #A63812; }

.dark .tips {
    background: linear-gradient(135deg, #2a1a10, #201510);
    border-color: rgba(255,107,53,.2);
}
.dark .tips h4 { color: #FFB088; }
.dark .tips ul { color: #FFD0B5; }
.dark .tips strong { color: #FFB088; }

/* ── Footer ────────────────────────────────────────────────── */
.ftr {
    text-align: center; padding: 18px; margin-top: 20px;
    color: #94a3b8; font-size: .82em;
    border-top: 1px solid #e2e8f0;
}
.dark .ftr { border-top-color: rgba(255,255,255,.08); }
.ftr a { color: #E8531F; text-decoration: none; font-weight: 500; }
.ftr a:hover { text-decoration: underline; }

/* ── Responsive ────────────────────────────────────────────── */
@media (max-width: 768px) {
    .hdr h1 { font-size: 1.8em; }
    .hdr { padding: 24px 16px 22px; }
    .bdg { font-size: .72em; padding: 4px 10px; }
    .chip-btn { font-size: .72em !important; padding: 4px 10px !important; }
}
"""

# ═══════════════════════════════════════════════════════════════════════
#  UI
# ═══════════════════════════════════════════════════════════════════════

with gr.Blocks(css=css, theme=theme, title="🔥 FireRed Image Edit") as demo:
    with gr.Column(elem_id="col-container"):

        # ── Header ──────────────────────────────────────────────────
        gr.HTML("""
        <div class="hdr">
            <h1>🔥 FireRed Image Edit</h1>
            <p class="sub">
                AI-powered image editing with blazing-fast <strong>4-step inference</strong><br>
                Powered by
                <a href="https://huggingface.co/FireRedTeam/FireRed-Image-Edit-1.1"
                   target="_blank">FireRed-Image-Edit-1.1</a>
                &amp;
                <a href="https://huggingface.co/prithivMLmods/Qwen-Image-Edit-Rapid-AIO-V23"
                   target="_blank">Rapid-AIO-V23</a>
            </p>
            <div class="badges">
                <span class="bdg">⚡ 4-Step Fast</span>
                <span class="bdg">🎨 Style Transfer</span>
                <span class="bdg">📐 Auto Resize</span>
                <span class="bdg">🖼️ Multi-Image</span>
                <span class="bdg">🔧 BF16 Precision</span>
            </div>
        </div>
        """)

        # ── Main two-column layout ─────────────────────────────────
        with gr.Row(equal_height=False):

            # ─── Left: inputs ───────────────────────────────────────
            with gr.Column(scale=1):
                gr.HTML('<p class="stl">📤&nbsp; Upload Image(s)</p>')

                images = gr.Gallery(
                    label="Upload Images",
                    type="filepath",
                    columns=2,
                    rows=1,
                    height=280,
                    allow_preview=True,
                    object_fit="contain",
                )

                gr.HTML('<p class="stl" style="margin-top:16px">✏️&nbsp; Describe Your Edit</p>')

                prompt = gr.Textbox(
                    show_label=False,
                    max_lines=3,
                    placeholder=(
                        "e.g.  'Transform into a Studio Ghibli anime scene "
                        "with warm golden-hour lighting'"
                    ),
                )

                # Suggestion chips
                gr.HTML(
                    '<p style="font-size:.78em;color:#94a3b8;margin:10px 0 4px;">'
                    "💡 Quick suggestions — click to fill prompt:</p>"
                )

                chip_data_1, chip_data_2 = [], []
                with gr.Row(elem_classes="chip-row"):
                    for t in SUGGESTIONS[:4]:
                        b = gr.Button(t, size="sm", variant="secondary",
                                      elem_classes="chip-btn")
                        chip_data_1.append((b, t))

                with gr.Row(elem_classes="chip-row"):
                    for t in SUGGESTIONS[4:]:
                        b = gr.Button(t, size="sm", variant="secondary",
                                      elem_classes="chip-btn")
                        chip_data_2.append((b, t))

                with gr.Row():
                    run_button = gr.Button(
                        "🎨  Generate Edit",
                        variant="primary", elem_id="gen-btn", size="lg", scale=3,
                    )
                    clear_button = gr.Button(
                        "🗑️  Clear",
                        variant="secondary", elem_id="clear-btn", size="lg", scale=1,
                    )

            # ─── Right: output ──────────────────────────────────────
            with gr.Column(scale=1):
                gr.HTML('<p class="stl">🖼️&nbsp; Result</p>')

                output_image = gr.Image(
                    show_label=False,
                    interactive=False,
                    format="png",
                    height=420,
                    elem_id="output-img",
                )

                info_box = gr.Markdown(
                    value="*Generate an edit to see details here.*",
                    elem_id="info-box",
                )

        # ── Advanced settings ───────────────────────────────────────
        with gr.Accordion("⚙️  Advanced Settings", open=False):
            with gr.Row():
                seed = gr.Slider(
                    label="Seed", minimum=0, maximum=MAX_SEED, step=1,
                    value=0, scale=3,
                )
                randomize_seed = gr.Checkbox(
                    label="🎲 Randomize seed", value=True, scale=1,
                )

            with gr.Row():
                guidance_scale = gr.Slider(
                    label="Guidance Scale",
                    minimum=1.0, maximum=10.0, step=0.1, value=1.0,
                    info="Higher → stronger prompt adherence",
                )
                steps = gr.Slider(
                    label="Inference Steps",
                    minimum=1, maximum=50, step=1, value=4,
                    info="More steps → higher quality (slower)",
                )

            negative_prompt = gr.Textbox(
                label="Negative Prompt",
                value=DEFAULT_NEGATIVE_PROMPT,
                max_lines=3,
                info="Describe what to avoid in the output",
            )

        # ── Tips ────────────────────────────────────────────────────
        gr.HTML("""
        <div class="tips">
            <h4>💡 Tips for Best Results</h4>
            <ul>
                <li><strong>Be specific</strong> — clearly describe
                    the change you want</li>
                <li><strong>Style keywords</strong> — "anime", "oil painting",
                    "watercolor", "pixel art", "3D render"</li>
                <li><strong>Lighting</strong> — "golden hour", "dramatic shadows",
                    "soft diffused light", "neon glow"</li>
                <li><strong>Higher quality</strong> — increase steps to 8-12
                    for finer details (takes longer)</li>
                <li><strong>Multiple images</strong> — upload extra reference
                    images for richer context</li>
            </ul>
        </div>
        """)

        # ── Footer ──────────────────────────────────────────────────
        gr.HTML("""
        <div class="ftr">
            Model&nbsp;
            <a href="https://huggingface.co/FireRedTeam/FireRed-Image-Edit-1.1"
               target="_blank">FireRed-Image-Edit-1.1</a>
            &nbsp;·&nbsp; Accelerated&nbsp;
            <a href="https://huggingface.co/prithivMLmods/Qwen-Image-Edit-Rapid-AIO-V19"
               target="_blank">Rapid-AIO-V19</a>
        </div>
        """)

    # ═══════════════════════════════════════════════════════════════
    #  EVENT WIRING
    # ═══════════════════════════════════════════════════════════════

    # Suggestion chips → fill prompt
    for btn, text in chip_data_1 + chip_data_2:
        btn.click(fn=lambda t=text: t, inputs=[], outputs=[prompt])

    # Clear button
    clear_button.click(
        fn=lambda: (None, "", None, "*Generate an edit to see details here.*"),
        inputs=[],
        outputs=[images, prompt, output_image, info_box],
    )

    # Generate — with a public api_name so the endpoint is discoverable
    run_button.click(
        fn=infer,
        inputs=[
            images, prompt, negative_prompt,
            seed, randomize_seed, guidance_scale, steps,
        ],
        outputs=[output_image, seed],
        api_name="edit",
    ).then(
        fn=format_info,
        inputs=[seed, images],
        outputs=[info_box],
    )

# ═══════════════════════════════════════════════════════════════════════
#  LAUNCH
# ═══════════════════════════════════════════════════════════════════════

if __name__ == "__main__":
    demo.queue(
        max_size=30,
        default_concurrency_limit=2,   # allow 2 concurrent GPU jobs
    ).launch(
        share=True,          # ← public shareable link
    )