import base64
import json
import os
import random
import time
from io import BytesIO
from typing import Optional, Tuple

import gradio as gr
import numpy as np
import requests
import spaces
import torch
from PIL import Image

from fibo_edit_pipeline import BriaFiboEditPipeline
from utils import AngleInstruction

# --- Configuration ---
device = "cuda" if torch.cuda.is_available() else "cpu"

# Run locally or on HuggingFace Spaces
RUN_LOCAL = False

# Model paths
BASE_CHECKPOINT = "briaai/FIBO-Edit"  # HuggingFace model ID
LORA_CHECKPOINT = "briaai/fibo_edit_multi_angle_full_0121_full_1k"  # HuggingFace LoRA model ID

# BRIA API configuration
BRIA_API_URL = "https://engine.prod.bria-api.com/v2/structured_prompt/generate/pro"
BRIA_API_TOKEN = os.environ.get("BRIA_API_TOKEN")

if not BRIA_API_TOKEN:
    raise ValueError(
        "BRIA_API_TOKEN environment variable is not set. "
        "Please add it as a HuggingFace Space secret."
    )

# Generation defaults
DEFAULT_NUM_INFERENCE_STEPS = 50
DEFAULT_GUIDANCE_SCALE = 3.5
DEFAULT_SEED = 100050

MAX_SEED = np.iinfo(np.int32).max

print("🚀 Starting Fibo Edit Multi-Angle LoRA Gradio App")
print(f"Device: {device}")
print(f"Base checkpoint: {BASE_CHECKPOINT}")
print(f"LoRA checkpoint: {LORA_CHECKPOINT}")


# --- Helper Functions ---
def load_pipeline_fiboedit(
    checkpoint: str,
    lora_checkpoint: Optional[str] = None,
    lora_scale: Optional[float] = None,
    fuse_lora: bool = True,
):
    """
    Load the Fibo Edit pipeline using BriaFiboEditPipeline with optional LoRA weights.

    Args:
        checkpoint: HuggingFace model ID for base model
        lora_checkpoint: Optional HuggingFace model ID for LoRA weights
        lora_scale: Scale for LoRA weights when fusing (default None = 1.0)
        fuse_lora: Whether to fuse LoRA into base weights (default True)

    Returns:
        Loaded BriaFiboEditPipeline
    """
    print(f"Loading BriaFiboEditPipeline from {checkpoint}")
    if lora_checkpoint:
        print(f"  with LoRA from {lora_checkpoint}")

    # Load pipeline from HuggingFace
    print("Loading pipeline...")
    pipe = BriaFiboEditPipeline.from_pretrained(
        checkpoint,
        torch_dtype=torch.bfloat16,
    )
    pipe.to("cuda")
    print(f"  Pipeline loaded from {checkpoint}")

    # Load LoRA weights if provided (PEFT format)
    if lora_checkpoint:
        print(f"Loading PEFT LoRA from {lora_checkpoint}...")
        from peft import PeftModel

        print("  Loading PEFT adapter onto transformer...")
        pipe.transformer = PeftModel.from_pretrained(
            pipe.transformer,
            lora_checkpoint,
        )
        print("  PEFT adapter loaded successfully")

        if fuse_lora:
            print("  Merging LoRA into base weights...")
            if hasattr(pipe.transformer, "merge_and_unload"):
                pipe.transformer = pipe.transformer.merge_and_unload()
                print("  LoRA merged and unloaded")
            else:
                print("  [WARN] transformer.merge_and_unload() not available")

    print("✅ Pipeline loaded successfully!")
    return pipe


def generate_structured_caption(
    image: Image.Image, prompt: str, seed: int = 1
) -> Optional[dict]:
    """Generate structured caption using BRIA API."""
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    image_bytes = base64.b64encode(buffered.getvalue()).decode("utf-8")

    payload = {
        "seed": seed,
        "sync": True,
        "images": [image_bytes],
        "prompt": prompt,
    }

    headers = {
        "Content-Type": "application/json",
        "api_token": BRIA_API_TOKEN,
    }

    max_retries = 3
    for attempt in range(max_retries):
        try:
            response = requests.post(
                BRIA_API_URL, json=payload, headers=headers, timeout=60
            )
            response.raise_for_status()
            data = response.json()
            structured_prompt_str = data["result"]["structured_prompt"]
            return json.loads(structured_prompt_str)
        except Exception as e:
            if attempt == max_retries - 1:
                print(f"Failed to generate structured caption: {e}")
                return None
            time.sleep(3)

    return None


# --- Model Loading ---
print("Loading Fibo Edit pipeline...")

try:
    pipe = load_pipeline_fiboedit(
        checkpoint=BASE_CHECKPOINT,
        lora_checkpoint=LORA_CHECKPOINT,
        lora_scale=None,
        fuse_lora=True,
    )

    if torch.cuda.is_available():
        mem_allocated = torch.cuda.memory_allocated(0) / 1024**3
        print(f"   GPU memory allocated: {mem_allocated:.2f} GB")

except Exception as e:
    print(f"❌ Error loading pipeline: {e}")
    import traceback

    traceback.print_exc()
    raise


def build_camera_prompt(
    rotate_deg: float = 0.0, zoom: float = 0.0, vertical_tilt: float = 0.0
) -> str:
    """Build a natural language camera instruction from parameters."""
    # Create AngleInstruction from camera parameters
    angle_instruction = AngleInstruction.from_camera_params(
        rotation=rotate_deg, tilt=vertical_tilt, zoom=zoom
    )

    # Generate natural language description
    view_map = {
        "back view": "view from the opposite side",
        "back-left quarter view": "rotate 135 degrees left",
        "back-right quarter view": "rotate 135 degrees right",
        "front view": "keep the front view",
        "front-left quarter view": "rotate 45 degrees left",
        "front-right quarter view": "rotate 45 degrees right",
        "left side view": "rotate 90 degrees left",
        "right side view": "rotate 90 degrees right",
    }

    shot_map = {
        "elevated shot": "with an elevated viewing angle",
        "eye-level shot": "with an eye-level viewing angle",
        "high-angle shot": "with a high-angle viewing angle",
        "low-angle shot": "with a low-angle viewing angle",
    }

    zoom_map = {
        "close-up": "and make it a close-up shot",
        "medium shot": "",  # Omit medium shot
        "wide shot": "and make it a wide shot",
    }

    view_text = view_map[angle_instruction.view.value]
    shot_text = shot_map[angle_instruction.shot.value]
    zoom_text = zoom_map[angle_instruction.zoom.value]

    # Construct the natural language prompt starting with "Change the viewing angle"
    parts = [view_text, shot_text]
    if zoom_text:  # Only add zoom if not empty (medium shot is omitted)
        parts.append(zoom_text)
    natural_prompt = "Change the viewing angle: " + ", ".join(parts)

    return natural_prompt, angle_instruction


def fetch_structured_caption(
    image: Optional[Image.Image] = None,
    rotate_deg: float = 0.0,
    zoom: float = 0.0,
    vertical_tilt: float = 0.0,
    seed: int = 0,
    randomize_seed: bool = True,
    prev_output: Optional[Image.Image] = None,
) -> Tuple[int, str, dict, Image.Image]:
    """Fetch structured caption from BRIA API."""

    # Build natural language prompt and angle instruction
    natural_prompt, angle_instruction = build_camera_prompt(
        rotate_deg, zoom, vertical_tilt
    )
    print(f"Natural Language Prompt: {natural_prompt}")
    print(f"Angle Instruction: {str(angle_instruction)}")

    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    # Get input image
    if image is not None:
        if isinstance(image, Image.Image):
            input_image = image.convert("RGB")
        elif hasattr(image, "name"):
            input_image = Image.open(image.name).convert("RGB")
        else:
            input_image = image
    elif prev_output:
        input_image = prev_output.convert("RGB")
    else:
        raise gr.Error("Please upload an image first.")

    # Generate structured caption using BRIA API
    print("Generating structured caption from BRIA API...")
    structured_caption = generate_structured_caption(
        input_image, natural_prompt, seed=seed
    )

    if structured_caption is None:
        raise gr.Error("Failed to generate structured caption from BRIA API")

    # Replace edit_instruction with angle instruction string
    structured_caption["edit_instruction"] = str(angle_instruction)

    print(
        f"Structured caption received: {json.dumps(structured_caption, ensure_ascii=False)}"
    )

    return seed, natural_prompt, structured_caption, input_image


@spaces.GPU(duration=240)
def generate_image_from_caption(
    input_image: Image.Image,
    structured_caption: dict,
    seed: int,
    guidance_scale: float = 3.5,
    num_inference_steps: int = 50,
) -> Image.Image:
    """Generate image using Fibo Edit pipeline with structured caption."""

    structured_prompt = json.dumps(structured_caption, ensure_ascii=False)
    print("Generating image with structured prompt...")

    generator = torch.Generator(device=device).manual_seed(seed)

    result = pipe(
        image=input_image,
        prompt=structured_prompt,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        generator=generator,
        num_images_per_prompt=1,
    ).images[0]

    return result


# --- 3D Camera Control Component ---
# Using gr.HTML directly with templates (Gradio 6 style)

CAMERA_3D_HTML_TEMPLATE = """
<div id="camera-control-wrapper" style="width: 100%; height: 400px; position: relative; background: #1a1a1a; border-radius: 12px; overflow: hidden;">
    <div id="prompt-overlay" style="position: absolute; bottom: 10px; left: 50%; transform: translateX(-50%); background: rgba(0,0,0,0.8); padding: 8px 16px; border-radius: 8px; font-family: monospace; font-size: 11px; color: #00ff88; white-space: nowrap; z-index: 10; max-width: 90%; overflow: hidden; text-overflow: ellipsis;"></div>
    <div id="control-legend" style="position: absolute; top: 10px; left: 10px; background: rgba(0,0,0,0.7); padding: 8px 12px; border-radius: 8px; font-family: system-ui; font-size: 11px; color: #fff; z-index: 10;">
        <div style="margin-bottom: 4px;"><span style="color: #00ff88;">●</span> Rotation (↔)</div>
        <div style="margin-bottom: 4px;"><span style="color: #ff69b4;">●</span> Vertical Tilt (↕)</div>
        <div><span style="color: #ffa500;">●</span> Distance/Zoom</div>
    </div>
</div>
"""

CAMERA_3D_JS = """
(() => {
    const wrapper = element.querySelector('#camera-control-wrapper');
    const promptOverlay = element.querySelector('#prompt-overlay');

    const initScene = () => {
        if (typeof THREE === 'undefined') {
            setTimeout(initScene, 100);
            return;
        }

        const scene = new THREE.Scene();
        scene.background = new THREE.Color(0x1a1a1a);

        const camera = new THREE.PerspectiveCamera(50, wrapper.clientWidth / wrapper.clientHeight, 0.1, 1000);
        camera.position.set(4, 3, 4);
        camera.lookAt(0, 0.75, 0);

        const renderer = new THREE.WebGLRenderer({ antialias: true });
        renderer.setSize(wrapper.clientWidth, wrapper.clientHeight);
        renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
        wrapper.insertBefore(renderer.domElement, wrapper.firstChild);

        scene.add(new THREE.AmbientLight(0xffffff, 0.6));
        const dirLight = new THREE.DirectionalLight(0xffffff, 0.6);
        dirLight.position.set(5, 10, 5);
        scene.add(dirLight);

        scene.add(new THREE.GridHelper(6, 12, 0x333333, 0x222222));

        const CENTER = new THREE.Vector3(0, 0.75, 0);
        const BASE_DISTANCE = 2.0;
        const ROTATION_RADIUS = 2.2;
        const TILT_RADIUS = 1.6;

        let rotateDeg = props.value?.rotate_deg || 0;
        let zoom = props.value?.zoom || 5.0;
        let verticalTilt = props.value?.vertical_tilt || 0;

        const rotateSteps = [-180, -135, -90, -45, 0, 45, 90, 135, 180];
        const zoomSteps = [0, 5, 10];
        const tiltSteps = [-1, -0.5, 0, 0.5, 1];

        function snapToNearest(value, steps) {
            return steps.reduce((prev, curr) => Math.abs(curr - value) < Math.abs(prev - value) ? curr : prev);
        }

        function createPlaceholderTexture() {
            const canvas = document.createElement('canvas');
            canvas.width = 256;
            canvas.height = 256;
            const ctx = canvas.getContext('2d');
            ctx.fillStyle = '#3a3a4a';
            ctx.fillRect(0, 0, 256, 256);
            ctx.fillStyle = '#ffcc99';
            ctx.beginPath();
            ctx.arc(128, 128, 80, 0, Math.PI * 2);
            ctx.fill();
            ctx.fillStyle = '#333';
            ctx.beginPath();
            ctx.arc(100, 110, 10, 0, Math.PI * 2);
            ctx.arc(156, 110, 10, 0, Math.PI * 2);
            ctx.fill();
            ctx.strokeStyle = '#333';
            ctx.lineWidth = 3;
            ctx.beginPath();
            ctx.arc(128, 130, 35, 0.2, Math.PI - 0.2);
            ctx.stroke();
            return new THREE.CanvasTexture(canvas);
        }

        let currentTexture = createPlaceholderTexture();
        const planeMaterial = new THREE.MeshBasicMaterial({ map: currentTexture, side: THREE.DoubleSide });
        let targetPlane = new THREE.Mesh(new THREE.PlaneGeometry(1.2, 1.2), planeMaterial);
        targetPlane.position.copy(CENTER);
        scene.add(targetPlane);

        function updateTextureFromUrl(url) {
            if (!url) {
                planeMaterial.map = createPlaceholderTexture();
                planeMaterial.needsUpdate = true;
                scene.remove(targetPlane);
                targetPlane = new THREE.Mesh(new THREE.PlaneGeometry(1.2, 1.2), planeMaterial);
                targetPlane.position.copy(CENTER);
                scene.add(targetPlane);
                return;
            }

            const loader = new THREE.TextureLoader();
            loader.crossOrigin = 'anonymous';
            loader.load(url, (texture) => {
                texture.minFilter = THREE.LinearFilter;
                texture.magFilter = THREE.LinearFilter;
                planeMaterial.map = texture;
                planeMaterial.needsUpdate = true;

                const img = texture.image;
                if (img && img.width && img.height) {
                    const aspect = img.width / img.height;
                    const maxSize = 1.4;
                    let planeWidth, planeHeight;
                    if (aspect > 1) {
                        planeWidth = maxSize;
                        planeHeight = maxSize / aspect;
                    } else {
                        planeHeight = maxSize;
                        planeWidth = maxSize * aspect;
                    }
                    scene.remove(targetPlane);
                    targetPlane = new THREE.Mesh(new THREE.PlaneGeometry(planeWidth, planeHeight), planeMaterial);
                    targetPlane.position.copy(CENTER);
                    scene.add(targetPlane);
                }
            });
        }

        if (props.imageUrl) {
            updateTextureFromUrl(props.imageUrl);
        }

        const cameraGroup = new THREE.Group();
        const bodyMat = new THREE.MeshStandardMaterial({ color: 0x6699cc, metalness: 0.5, roughness: 0.3 });
        const body = new THREE.Mesh(new THREE.BoxGeometry(0.28, 0.2, 0.35), bodyMat);
        cameraGroup.add(body);
        const lens = new THREE.Mesh(
            new THREE.CylinderGeometry(0.08, 0.1, 0.16, 16),
            new THREE.MeshStandardMaterial({ color: 0x6699cc, metalness: 0.5, roughness: 0.3 })
        );
        lens.rotation.x = Math.PI / 2;
        lens.position.z = 0.24;
        cameraGroup.add(lens);
        scene.add(cameraGroup);

        const rotationArcPoints = [];
        for (let i = 0; i <= 64; i++) {
            const angle = THREE.MathUtils.degToRad((360 * i / 64));
            rotationArcPoints.push(new THREE.Vector3(ROTATION_RADIUS * Math.sin(angle), 0.05, ROTATION_RADIUS * Math.cos(angle)));
        }
        const rotationCurve = new THREE.CatmullRomCurve3(rotationArcPoints);
        const rotationArc = new THREE.Mesh(
            new THREE.TubeGeometry(rotationCurve, 64, 0.035, 8, true),
            new THREE.MeshStandardMaterial({ color: 0x00ff88, emissive: 0x00ff88, emissiveIntensity: 0.3 })
        );
        scene.add(rotationArc);

        const rotationHandle = new THREE.Mesh(
            new THREE.SphereGeometry(0.16, 16, 16),
            new THREE.MeshStandardMaterial({ color: 0x00ff88, emissive: 0x00ff88, emissiveIntensity: 0.5 })
        );
        rotationHandle.userData.type = 'rotation';
        scene.add(rotationHandle);

        const tiltArcPoints = [];
        for (let i = 0; i <= 32; i++) {
            const angle = THREE.MathUtils.degToRad(-45 + (90 * i / 32));
            tiltArcPoints.push(new THREE.Vector3(-0.7, TILT_RADIUS * Math.sin(angle) + CENTER.y, TILT_RADIUS * Math.cos(angle)));
        }
        const tiltCurve = new THREE.CatmullRomCurve3(tiltArcPoints);
        const tiltArc = new THREE.Mesh(
            new THREE.TubeGeometry(tiltCurve, 32, 0.035, 8, false),
            new THREE.MeshStandardMaterial({ color: 0xff69b4, emissive: 0xff69b4, emissiveIntensity: 0.3 })
        );
        scene.add(tiltArc);

        const tiltHandle = new THREE.Mesh(
            new THREE.SphereGeometry(0.16, 16, 16),
            new THREE.MeshStandardMaterial({ color: 0xff69b4, emissive: 0xff69b4, emissiveIntensity: 0.5 })
        );
        tiltHandle.userData.type = 'tilt';
        scene.add(tiltHandle);

        const distanceLineGeo = new THREE.BufferGeometry();
        const distanceLine = new THREE.Line(distanceLineGeo, new THREE.LineBasicMaterial({ color: 0xffa500 }));
        scene.add(distanceLine);

        const distanceHandle = new THREE.Mesh(
            new THREE.SphereGeometry(0.16, 16, 16),
            new THREE.MeshStandardMaterial({ color: 0xffa500, emissive: 0xffa500, emissiveIntensity: 0.5 })
        );
        distanceHandle.userData.type = 'distance';
        scene.add(distanceHandle);

        function buildPromptText(rot, zoomVal, tilt) {
            const parts = [];
            if (rot !== 0) {
                const dir = rot > 0 ? 'right' : 'left';
                parts.push('Rotate ' + Math.abs(rot) + '° ' + dir);
            }
            if (zoomVal >= 6.66) parts.push('Close-up');
            else if (zoomVal >= 3.33) parts.push('Medium shot');
            else parts.push('Wide angle');
            if (tilt >= 0.66) parts.push("High angle");
            else if (tilt >= 0.33) parts.push("Elevated");
            else if (tilt <= -0.33) parts.push("Low angle");
            else parts.push("Eye level");
            return parts.length > 0 ? parts.join(' • ') : 'No camera movement';
        }

        function updatePositions() {
            const rotRad = THREE.MathUtils.degToRad(rotateDeg);
            // Map zoom 0-10 to distance: zoom 0 = far (3.0), zoom 10 = close (1.0)
            const distance = 3.0 - (zoom / 10) * 2.0;
            const tiltAngle = verticalTilt * 35;
            const tiltRad = THREE.MathUtils.degToRad(tiltAngle);

            const camX = distance * Math.sin(rotRad) * Math.cos(tiltRad);
            const camY = distance * Math.sin(tiltRad) + CENTER.y;
            const camZ = distance * Math.cos(rotRad) * Math.cos(tiltRad);

            cameraGroup.position.set(camX, camY, camZ);
            cameraGroup.lookAt(CENTER);

            rotationHandle.position.set(ROTATION_RADIUS * Math.sin(rotRad), 0.05, ROTATION_RADIUS * Math.cos(rotRad));

            const tiltHandleAngle = THREE.MathUtils.degToRad(tiltAngle);
            tiltHandle.position.set(-0.7, TILT_RADIUS * Math.sin(tiltHandleAngle) + CENTER.y, TILT_RADIUS * Math.cos(tiltHandleAngle));

            const handleDist = distance - 0.4;
            distanceHandle.position.set(
                handleDist * Math.sin(rotRad) * Math.cos(tiltRad),
                handleDist * Math.sin(tiltRad) + CENTER.y,
                handleDist * Math.cos(rotRad) * Math.cos(tiltRad)
            );
            distanceLineGeo.setFromPoints([cameraGroup.position.clone(), CENTER.clone()]);

            promptOverlay.textContent = buildPromptText(rotateDeg, zoom, verticalTilt);
        }

        function updatePropsAndTrigger() {
            const rotSnap = snapToNearest(rotateDeg, rotateSteps);
            const zoomSnap = snapToNearest(zoom, zoomSteps);
            const tiltSnap = snapToNearest(verticalTilt, tiltSteps);

            props.value = { rotate_deg: rotSnap, zoom: zoomSnap, vertical_tilt: tiltSnap };
            trigger('change', props.value);
        }

        const raycaster = new THREE.Raycaster();
        const mouse = new THREE.Vector2();
        let isDragging = false;
        let dragTarget = null;
        let dragStartMouse = new THREE.Vector2();
        let dragStartZoom = 0;
        const intersection = new THREE.Vector3();

        const canvas = renderer.domElement;

        canvas.addEventListener('mousedown', (e) => {
            const rect = canvas.getBoundingClientRect();
            mouse.x = ((e.clientX - rect.left) / rect.width) * 2 - 1;
            mouse.y = -((e.clientY - rect.top) / rect.height) * 2 + 1;

            raycaster.setFromCamera(mouse, camera);
            const intersects = raycaster.intersectObjects([rotationHandle, tiltHandle, distanceHandle]);

            if (intersects.length > 0) {
                isDragging = true;
                dragTarget = intersects[0].object;
                dragTarget.material.emissiveIntensity = 1.0;
                dragTarget.scale.setScalar(1.3);
                dragStartMouse.copy(mouse);
                dragStartZoom = zoom;
                canvas.style.cursor = 'grabbing';
            }
        });

        canvas.addEventListener('mousemove', (e) => {
            const rect = canvas.getBoundingClientRect();
            mouse.x = ((e.clientX - rect.left) / rect.width) * 2 - 1;
            mouse.y = -((e.clientY - rect.top) / rect.height) * 2 + 1;

            if (isDragging && dragTarget) {
                raycaster.setFromCamera(mouse, camera);

                if (dragTarget.userData.type === 'rotation') {
                    const plane = new THREE.Plane(new THREE.Vector3(0, 1, 0), -0.05);
                    if (raycaster.ray.intersectPlane(plane, intersection)) {
                        let angle = THREE.MathUtils.radToDeg(Math.atan2(intersection.x, intersection.z));
                        rotateDeg = THREE.MathUtils.clamp(angle, -180, 180);
                    }
                } else if (dragTarget.userData.type === 'tilt') {
                    const plane = new THREE.Plane(new THREE.Vector3(1, 0, 0), 0.7);
                    if (raycaster.ray.intersectPlane(plane, intersection)) {
                        const relY = intersection.y - CENTER.y;
                        const relZ = intersection.z;
                        const angle = THREE.MathUtils.radToDeg(Math.atan2(relY, relZ));
                        verticalTilt = THREE.MathUtils.clamp(angle / 35, -1, 1);
                    }
                } else if (dragTarget.userData.type === 'distance') {
                    const deltaY = mouse.y - dragStartMouse.y;
                    zoom = THREE.MathUtils.clamp(dragStartZoom + deltaY * 20, 0, 10);
                }
                updatePositions();
            } else {
                raycaster.setFromCamera(mouse, camera);
                const intersects = raycaster.intersectObjects([rotationHandle, tiltHandle, distanceHandle]);
                [rotationHandle, tiltHandle, distanceHandle].forEach(h => {
                    h.material.emissiveIntensity = 0.5;
                    h.scale.setScalar(1);
                });
                if (intersects.length > 0) {
                    intersects[0].object.material.emissiveIntensity = 0.8;
                    intersects[0].object.scale.setScalar(1.1);
                    canvas.style.cursor = 'grab';
                } else {
                    canvas.style.cursor = 'default';
                }
            }
        });

        const onMouseUp = () => {
            if (dragTarget) {
                dragTarget.material.emissiveIntensity = 0.5;
                dragTarget.scale.setScalar(1);

                const targetRot = snapToNearest(rotateDeg, rotateSteps);
                const targetZoom = snapToNearest(zoom, zoomSteps);
                const targetTilt = snapToNearest(verticalTilt, tiltSteps);

                const startRot = rotateDeg, startZoom = zoom, startTilt = verticalTilt;
                const startTime = Date.now();

                function animateSnap() {
                    const t = Math.min((Date.now() - startTime) / 200, 1);
                    const ease = 1 - Math.pow(1 - t, 3);

                    rotateDeg = startRot + (targetRot - startRot) * ease;
                    zoom = startZoom + (targetZoom - startZoom) * ease;
                    verticalTilt = startTilt + (targetTilt - startTilt) * ease;

                    updatePositions();
                    if (t < 1) requestAnimationFrame(animateSnap);
                    else updatePropsAndTrigger();
                }
                animateSnap();
            }
            isDragging = false;
            dragTarget = null;
            canvas.style.cursor = 'default';
        };

        canvas.addEventListener('mouseup', onMouseUp);
        canvas.addEventListener('mouseleave', onMouseUp);

        canvas.addEventListener('touchstart', (e) => {
            e.preventDefault();
            const touch = e.touches[0];
            const rect = canvas.getBoundingClientRect();
            mouse.x = ((touch.clientX - rect.left) / rect.width) * 2 - 1;
            mouse.y = -((touch.clientY - rect.top) / rect.height) * 2 + 1;

            raycaster.setFromCamera(mouse, camera);
            const intersects = raycaster.intersectObjects([rotationHandle, tiltHandle, distanceHandle]);

            if (intersects.length > 0) {
                isDragging = true;
                dragTarget = intersects[0].object;
                dragTarget.material.emissiveIntensity = 1.0;
                dragTarget.scale.setScalar(1.3);
                dragStartMouse.copy(mouse);
                dragStartZoom = zoom;
            }
        }, { passive: false });

        canvas.addEventListener('touchmove', (e) => {
            e.preventDefault();
            const touch = e.touches[0];
            const rect = canvas.getBoundingClientRect();
            mouse.x = ((touch.clientX - rect.left) / rect.width) * 2 - 1;
            mouse.y = -((touch.clientY - rect.top) / rect.height) * 2 + 1;

            if (isDragging && dragTarget) {
                raycaster.setFromCamera(mouse, camera);

                if (dragTarget.userData.type === 'rotation') {
                    const plane = new THREE.Plane(new THREE.Vector3(0, 1, 0), -0.05);
                    if (raycaster.ray.intersectPlane(plane, intersection)) {
                        let angle = THREE.MathUtils.radToDeg(Math.atan2(intersection.x, intersection.z));
                        rotateDeg = THREE.MathUtils.clamp(angle, -180, 180);
                    }
                } else if (dragTarget.userData.type === 'tilt') {
                    const plane = new THREE.Plane(new THREE.Vector3(1, 0, 0), 0.7);
                    if (raycaster.ray.intersectPlane(plane, intersection)) {
                        const relY = intersection.y - CENTER.y;
                        const relZ = intersection.z;
                        const angle = THREE.MathUtils.radToDeg(Math.atan2(relY, relZ));
                        verticalTilt = THREE.MathUtils.clamp(angle / 35, -1, 1);
                    }
                } else if (dragTarget.userData.type === 'distance') {
                    const deltaY = mouse.y - dragStartMouse.y;
                    zoom = THREE.MathUtils.clamp(dragStartZoom + deltaY * 20, 0, 10);
                }
                updatePositions();
            }
        }, { passive: false });

        canvas.addEventListener('touchend', (e) => { e.preventDefault(); onMouseUp(); }, { passive: false });
        canvas.addEventListener('touchcancel', (e) => { e.preventDefault(); onMouseUp(); }, { passive: false });

        updatePositions();

        function render() {
            requestAnimationFrame(render);
            renderer.render(scene, camera);
        }
        render();

        new ResizeObserver(() => {
            camera.aspect = wrapper.clientWidth / wrapper.clientHeight;
            camera.updateProjectionMatrix();
            renderer.setSize(wrapper.clientWidth, wrapper.clientHeight);
        }).observe(wrapper);

        wrapper._updateTexture = updateTextureFromUrl;

        let lastImageUrl = props.imageUrl;
        let lastValue = JSON.stringify(props.value);
        setInterval(() => {
            if (props.imageUrl !== lastImageUrl) {
                lastImageUrl = props.imageUrl;
                updateTextureFromUrl(props.imageUrl);
            }
            const currentValue = JSON.stringify(props.value);
            if (currentValue !== lastValue) {
                lastValue = currentValue;
                if (props.value && typeof props.value === 'object') {
                    rotateDeg = props.value.rotate_deg ?? rotateDeg;
                    zoom = props.value.zoom ?? zoom;
                    verticalTilt = props.value.vertical_tilt ?? verticalTilt;
                    updatePositions();
                }
            }
        }, 100);
    };

    initScene();
})();
"""


def create_camera_3d_component(value=None, imageUrl=None, **kwargs):
    """Create a 3D camera control component using gr.HTML."""
    if value is None:
        value = {"rotate_deg": 0, "zoom": 5.0, "vertical_tilt": 0}

    return gr.HTML(
        value=value,
        html_template=CAMERA_3D_HTML_TEMPLATE,
        js_on_load=CAMERA_3D_JS,
        imageUrl=imageUrl,
        **kwargs,
    )


# --- UI ---
css = """
:root {
  --name: citrus;
  --primary-50: #fffbeb;
  --primary-100: #fef3c7;
  --primary-200: #fde68a;
  --primary-300: #fcd34d;
  --primary-400: #fbbf24;
  --primary-500: #f59e0b;
  --primary-600: #d97706;
  --primary-700: #b45309;
  --primary-800: #92400e;
  --primary-900: #78350f;
  --primary-950: #6c370f;
  --secondary-50: #fffbeb;
  --secondary-100: #fef3c7;
  --secondary-200: #fde68a;
  --secondary-300: #fcd34d;
  --secondary-400: #fbbf24;
  --secondary-500: #f59e0b;
  --secondary-600: #d97706;
  --secondary-700: #b45309;
  --secondary-800: #92400e;
  --secondary-900: #78350f;
  --secondary-950: #6c370f;
  --neutral-50: #fafaf9;
  --neutral-100: #f5f5f4;
  --neutral-200: #e7e5e4;
  --neutral-300: #d6d3d1;
  --neutral-400: #a8a29e;
  --neutral-500: #78716c;
  --neutral-600: #57534e;
  --neutral-700: #44403c;
  --neutral-800: #292524;
  --neutral-900: #1c1917;
  --neutral-950: #0f0e0d;
  --spacing-xxs: 2px;
  --spacing-xs: 4px;
  --spacing-sm: 6px;
  --spacing-md: 8px;
  --spacing-lg: 10px;
  --spacing-xl: 14px;
  --spacing-xxl: 28px;
  --radius-xxs: 1px;
  --radius-xs: 2px;
  --radius-sm: 4px;
  --radius-md: 6px;
  --radius-lg: 8px;
  --radius-xl: 12px;
  --radius-xxl: 22px;
  --text-xxs: 9px;
  --text-xs: 10px;
  --text-sm: 12px;
  --text-md: 14px;
  --text-lg: 16px;
  --text-xl: 22px;
  --text-xxl: 26px;
  --font: 'Ubuntu', ui-sans-serif, system-ui, sans-serif;
  --font-mono: 'Roboto Mono', ui-monospace, Consolas, monospace;
  --body-background-fill: var(--background-fill-primary);
  --body-text-color: var(--neutral-800);
  --body-text-size: var(--text-md);
  --body-text-weight: 400;
  --embed-radius: var(--radius-sm);
  --color-accent: var(--primary-500);
  --color-accent-soft: var(--primary-50);
  --background-fill-primary: var(--neutral-50);
  --background-fill-secondary: var(--neutral-50);
  --border-color-accent: var(--primary-300);
  --border-color-primary: var(--neutral-200);
  --link-text-color: var(--secondary-600);
  --link-text-color-active: var(--secondary-600);
  --link-text-color-hover: var(--secondary-700);
  --link-text-color-visited: var(--secondary-500);
  --body-text-color-subdued: var(--neutral-400);
  --accordion-text-color: var(--body-text-color);
  --table-text-color: var(--body-text-color);
  --shadow-drop: rgba(0,0,0,0.05) 0px 1px 2px 0px;
  --shadow-drop-lg: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
  --shadow-inset: rgba(0,0,0,0.05) 0px 2px 4px 0px inset;
  --shadow-spread: 3px;
  --block-background-fill: var(--neutral-100);
  --block-border-color: var(--neutral-300);
  --block-border-width: 1px;
  --block-info-text-color: var(--body-text-color-subdued);
  --block-info-text-size: var(--text-sm);
  --block-info-text-weight: 400;
  --block-label-background-fill: var(--background-fill-primary);
  --block-label-border-color: var(--border-color-primary);
  --block-label-border-width: 1px;
  --block-label-shadow: none;
  --block-label-text-color: var(--neutral-500);
  --block-label-margin: 0;
  --block-label-padding: var(--spacing-sm) var(--spacing-lg);
  --block-label-radius: calc(var(--radius-sm) - 1px) 0 calc(var(--radius-sm) - 1px) 0;
  --block-label-right-radius: 0 calc(var(--radius-sm) - 1px) 0 calc(var(--radius-sm) - 1px);
  --block-label-text-size: var(--text-sm);
  --block-label-text-weight: 400;
  --block-padding: var(--spacing-xl) calc(var(--spacing-xl) + 2px);
  --block-radius: var(--radius-sm);
  --block-shadow: 0px 3px 0px 0px var(--neutral-300);
  --block-title-background-fill: none;
  --block-title-border-color: none;
  --block-title-border-width: 0px;
  --block-title-text-color: var(--neutral-500);
  --block-title-padding: 0;
  --block-title-radius: none;
  --block-title-text-size: var(--text-md);
  --block-title-text-weight: 400;
  --container-radius: var(--radius-sm);
  --form-gap-width: 0px;
  --layout-gap: var(--spacing-xxl);
  --panel-background-fill: var(--background-fill-secondary);
  --panel-border-color: var(--border-color-primary);
  --panel-border-width: 1px;
  --section-header-text-size: var(--text-md);
  --section-header-text-weight: 400;
  --border-color-accent-subdued: var(--border-color-accent);
  --code-background-fill: var(--neutral-100);
  --chatbot-text-size: var(--text-lg);
  --checkbox-background-color: var(--background-fill-primary);
  --checkbox-background-color-focus: var(--checkbox-background-color);
  --checkbox-background-color-hover: var(--checkbox-background-color);
  --checkbox-background-color-selected: var(--color-accent);
  --checkbox-border-color: var(--neutral-300);
  --checkbox-border-color-focus: var(--color-accent);
  --checkbox-border-color-hover: var(--neutral-300);
  --checkbox-border-color-selected: var(--color-accent);
  --checkbox-border-radius: var(--radius-sm);
  --checkbox-border-width: var(--input-border-width);
  --checkbox-label-background-fill: var(--neutral-200);
  --checkbox-label-background-fill-hover: var(--checkbox-label-background-fill);
  --checkbox-label-background-fill-selected: var(--primary-400);
  --checkbox-label-border-color: var(--border-color-primary);
  --checkbox-label-border-color-hover: var(--checkbox-label-border-color);
  --checkbox-label-border-color-selected: var(--primary-300);
  --checkbox-label-border-width: 2px;
  --checkbox-label-gap: var(--spacing-lg);
  --checkbox-label-padding: var(--spacing-md) calc(2 * var(--spacing-md));
  --checkbox-label-shadow: none;
  --checkbox-label-text-size: var(--text-md);
  --checkbox-label-text-weight: 400;
  --checkbox-check: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e");
  --radio-circle: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e");
  --checkbox-shadow: none;
  --checkbox-label-text-color: var(--body-text-color);
  --checkbox-label-text-color-selected: var(--checkbox-label-text-color);
  --error-background-fill: #fef2f2;
  --error-border-color: #b91c1c;
  --error-border-width: 1px;
  --error-text-color: #b91c1c;
  --error-icon-color: #b91c1c;
  --input-background-fill: var(--neutral-50);
  --input-background-fill-focus: var(--primary-50);
  --input-background-fill-hover: var(--input-background-fill);
  --input-border-color: var(--border-color-primary);
  --input-border-color-focus: var(--secondary-300);
  --input-border-color-hover: var(--input-border-color);
  --input-border-width: 1px;
  --input-padding: var(--spacing-xl);
  --input-placeholder-color: var(--neutral-400);
  --input-radius: var(--radius-sm);
  --input-shadow: 0px -1px 0px 0px var(--neutral-300);
  --input-shadow-focus: 0px -1px 0px 0px var(--primary-300);
  --input-text-size: var(--text-md);
  --input-text-weight: 400;
  --loader-color: var(--color-accent);
  --prose-text-size: var(--text-md);
  --prose-text-weight: 400;
  --prose-header-text-weight: 600;
  --slider-color: var(--primary-400);
  --stat-background-fill: var(--primary-300);
  --table-border-color: var(--neutral-300);
  --table-even-background-fill: white;
  --table-odd-background-fill: var(--neutral-50);
  --table-radius: var(--radius-sm);
  --table-row-focus: var(--color-accent-soft);
  --button-border-width: 0px;
  --button-cancel-background-fill: #ef4444;
  --button-cancel-background-fill-hover: #dc2626;
  --button-cancel-border-color: var(--button-secondary-border-color);
  --button-cancel-border-color-hover: var(--button-secondary-border-color-hover);
  --button-cancel-text-color: white;
  --button-cancel-text-color-hover: white;
  --button-cancel-shadow: 0px 3px 0px 0px rgb(248 113 113);
  --button-cancel-shadow-hover: 0px 5px 0px 0px rgb(248 113 113);
  --button-cancel-shadow-active: 0px 2px 0px 0px rgb(248 113 113);
  --button-transform-hover: translateY(-2px);
  --button-transform-active: translateY(1px);
  --button-transition: all 0.1s;
  --button-large-padding: var(--spacing-lg) calc(2 * var(--spacing-lg));
  --button-large-radius: var(--radius-md);
  --button-large-text-size: var(--text-lg);
  --button-large-text-weight: 600;
  --button-primary-background-fill: var(--primary-500);
  --button-primary-background-fill-hover: var(--button-primary-background-fill);
  --button-primary-border-color: var(--primary-500);
  --button-primary-border-color-hover: var(--primary-500);
  --button-primary-text-color: var(--button-secondary-text-color);
  --button-primary-text-color-hover: var(--button-primary-text-color);
  --button-primary-shadow: 0px 3px 0px 0px var(--primary-400);
  --button-primary-shadow-hover: 0px 5px 0px 0px var(--primary-400);
  --button-primary-shadow-active: 0px 2px 0px 0px var(--primary-400);
  --button-secondary-background-fill: var(--primary-400);
  --button-secondary-background-fill-hover: var(--button-secondary-background-fill);
  --button-secondary-border-color: var(--neutral-200);
  --button-secondary-border-color-hover: var(--neutral-200);
  --button-secondary-text-color: black;
  --button-secondary-text-color-hover: var(--button-secondary-text-color);
  --button-secondary-shadow: 0px 3px 0px 0px var(--primary-300);
  --button-secondary-shadow-hover: 0px 5px 0px 0px var(--primary-300);
  --button-secondary-shadow-active: 0px 2px 0px 0px var(--primary-300);
  --button-small-padding: var(--spacing-sm) calc(1.5 * var(--spacing-sm));
  --button-small-radius: var(--radius-md);
  --button-small-text-size: var(--text-sm);
  --button-small-text-weight: 400;
  --button-medium-padding: var(--spacing-md) calc(2 * var(--spacing-md));
  --button-medium-radius: var(--radius-md);
  --button-medium-text-size: var(--text-md);
  --button-medium-text-weight: 600;
}

:root.dark, :root .dark {
  --body-background-fill: var(--background-fill-primary);
  --body-text-color: var(--neutral-100);
  --color-accent-soft: var(--neutral-700);
  --background-fill-primary: var(--neutral-950);
  --background-fill-secondary: var(--neutral-900);
  --border-color-accent: var(--neutral-600);
  --border-color-primary: var(--neutral-700);
  --link-text-color-active: var(--secondary-500);
  --link-text-color: var(--secondary-500);
  --link-text-color-hover: var(--secondary-400);
  --link-text-color-visited: var(--secondary-600);
  --body-text-color-subdued: var(--neutral-400);
  --accordion-text-color: var(--body-text-color);
  --table-text-color: var(--body-text-color);
  --shadow-spread: 1px;
  --block-background-fill: var(--neutral-800);
  --block-border-color: var(--border-color-primary);
  --block-info-text-color: var(--body-text-color-subdued);
  --block-label-background-fill: var(--background-fill-secondary);
  --block-label-border-color: var(--border-color-primary);
  --block-label-text-color: var(--neutral-200);
  --block-shadow: 0px 3px 0px 0px var(--neutral-700);
  --block-title-text-color: var(--neutral-200);
  --panel-background-fill: var(--background-fill-secondary);
  --panel-border-color: var(--border-color-primary);
  --border-color-accent-subdued: var(--border-color-accent);
  --code-background-fill: var(--neutral-800);
  --checkbox-background-color: var(--neutral-400);
  --checkbox-background-color-focus: var(--checkbox-background-color);
  --checkbox-background-color-hover: var(--checkbox-background-color);
  --checkbox-background-color-selected: var(--primary-600);
  --checkbox-border-color: var(--neutral-700);
  --checkbox-border-color-focus: var(--color-accent);
  --checkbox-border-color-hover: var(--neutral-600);
  --checkbox-border-color-selected: var(--color-accent);
  --checkbox-border-width: var(--input-border-width);
  --checkbox-label-background-fill: var(--neutral-700);
  --checkbox-label-background-fill-hover: var(--checkbox-label-background-fill);
  --checkbox-label-background-fill-selected: var(--primary-500);
  --checkbox-label-border-color: var(--border-color-primary);
  --checkbox-label-border-color-hover: var(--checkbox-label-border-color);
  --checkbox-label-border-color-selected: var(--primary-600);
  --checkbox-label-border-width: 2px;
  --checkbox-label-text-color: var(--body-text-color);
  --checkbox-label-text-color-selected: var(--button-primary-text-color);
  --error-background-fill: var(--background-fill-primary);
  --error-border-color: #ef4444;
  --error-text-color: #fef2f2;
  --error-icon-color: #ef4444;
  --input-background-fill: var(--neutral-900);
  --input-background-fill-focus: none;
  --input-background-fill-hover: var(--input-background-fill);
  --input-border-color: var(--border-color-primary);
  --input-border-color-focus: var(--neutral-700);
  --input-border-color-hover: var(--input-border-color);
  --input-placeholder-color: var(--neutral-500);
  --input-shadow: 0px -1px 0px 0px var(--neutral-700);
  --input-shadow-focus: 0px -1px 0px 0px var(--primary-600);
  --slider-color: var(--primary-500);
  --stat-background-fill: var(--primary-500);
  --table-border-color: var(--neutral-700);
  --table-even-background-fill: var(--neutral-950);
  --table-odd-background-fill: var(--neutral-900);
  --table-row-focus: var(--color-accent-soft);
  --button-cancel-background-fill: #b91c1c;
  --button-cancel-background-fill-hover: #991b1b;
  --button-cancel-border-color: var(--button-secondary-border-color);
  --button-cancel-border-color-hover: var(--button-secondary-border-color-hover);
  --button-cancel-text-color: white;
  --button-cancel-text-color-hover: white;
  --button-cancel-shadow: 0px 3px 0px 0px rgb(220 38 38);
  --button-cancel-shadow-hover: 0px 5px 0px 0px rgb(220 38 38);
  --button-cancel-shadow-active: 0px 2px 0px 0px rgb(220 38 38);
  --button-primary-background-fill: var(--primary-600);
  --button-primary-background-fill-hover: var(--button-primary-background-fill);
  --button-primary-border-color: var(--primary-600);
  --button-primary-border-color-hover: var(--primary-500);
  --button-primary-text-color: var(--button-secondary-text-color);
  --button-primary-text-color-hover: var(--button-primary-text-color);
  --button-primary-shadow: 0px 3px 0px 0px var(--primary-700);
  --button-primary-shadow-hover: 0px 5px 0px 0px var(--primary-700);
  --button-primary-shadow-active: 0px 2px 0px 0px var(--primary-700);
  --button-secondary-background-fill: var(--primary-500);
  --button-secondary-background-fill-hover: var(--button-secondary-background-fill);
  --button-secondary-border-color: var(--neutral-600);
  --button-secondary-border-color-hover: var(--neutral-500);
  --button-secondary-text-color: var(--neutral-900);
  --button-secondary-text-color-hover: var(--button-secondary-text-color);
  --button-secondary-shadow: 0px 3px 0px 0px var(--primary-600);
  --button-secondary-shadow-hover: 0px 5px 0px 0px var(--primary-600);
  --button-secondary-shadow-active: 0px 2px 0px 0px var(--primary-600);
}

#col-container { max-width: 1100px; margin: 0 auto; }
.dark .progress-text { color: white !important; }
#camera-3d-control { min-height: 400px; }
#examples { max-width: 1100px; margin: 0 auto; }
.fillable { max-width: 1250px !important; }
"""


def reset_all() -> list:
    """Reset all camera control knobs and flags to their default values."""
    return [0, 5.0, 0, True]  # rotate_deg, zoom, vertical_tilt, is_reset


def end_reset() -> bool:
    """Mark the end of a reset cycle."""
    return False


def update_dimensions_on_upload(image: Optional[Image.Image]) -> Tuple[int, int]:
    """Compute recommended (width, height) for the output resolution."""
    if image is None:
        return 1024, 1024

    original_width, original_height = image.size

    if original_width > original_height:
        new_width = 1024
        aspect_ratio = original_height / original_width
        new_height = int(new_width * aspect_ratio)
    else:
        new_height = 1024
        aspect_ratio = original_width / original_height
        new_width = int(new_height * aspect_ratio)

    new_width = (new_width // 8) * 8
    new_height = (new_height // 8) * 8

    return new_width, new_height


with gr.Blocks() as demo:
    gr.Markdown("""
    ## 🎬 Fibo Edit — Camera Angle Control

    Fibo Edit with Multi-Angle LoRA for precise camera control ✨
    Control rotation, tilt, and zoom to generate images from any angle 🎥

    ### Fine-tuning data was created by [Lovis](https://huggingface.co/fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA) and UI by [Apolinario](https://huggingface.co/spaces/multimodalart/qwen-image-multiple-angles-3d-camera)
    
    """)

    with gr.Row():
        with gr.Column(scale=1):
            image = gr.Image(label="Input Image", type="pil", height=280)
            prev_output = gr.Image(value=None, visible=False)
            is_reset = gr.Checkbox(value=False, visible=False)
            # Hidden state to pass processed image between steps
            processed_image = gr.State(None)

            gr.Markdown("### 🎮 3D Camera Control")

            camera_3d = create_camera_3d_component(
                value={"rotate_deg": 0, "zoom": 5.0, "vertical_tilt": 0},
                elem_id="camera-3d-control",
            )

            with gr.Row():
                reset_btn = gr.Button("🔄 Reset", size="sm")
                run_btn = gr.Button("🚀 Generate", variant="primary", size="lg")

        with gr.Column(scale=1):
            result = gr.Image(label="Output Image", interactive=False, height=350)

            gr.Markdown("### 🎚️ Slider Controls")

            rotate_deg = gr.Slider(
                label="Horizontal Rotation (°)",
                minimum=-180,
                maximum=180,
                step=45,
                value=0,
                info="-180/180: back, -90: left, 0: front, 90: right",
            )
            zoom = gr.Slider(
                label="Zoom Level",
                minimum=0,
                maximum=10,
                step=1,
                value=5.0,
                info="0-3.33: wide, 3.33-6.66: medium, 6.66-10: close-up",
            )
            vertical_tilt = gr.Slider(
                label="Vertical Tilt",
                minimum=-1,
                maximum=1,
                step=0.5,
                value=0,
                info="-1: low-angle, 0: eye-level, 1: high-angle",
            )

            prompt_preview = gr.Textbox(label="Generated Prompt", interactive=False)

            with gr.Accordion("📋 Structured Caption (BRIA API)", open=False):
                structured_json = gr.JSON(label="JSON Response", container=False)

            with gr.Accordion("⚙️ Advanced Settings", open=False):
                seed = gr.Slider(
                    label="Seed",
                    minimum=0,
                    maximum=MAX_SEED,
                    step=1,
                    value=DEFAULT_SEED,
                )
                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                guidance_scale = gr.Slider(
                    label="Guidance Scale",
                    minimum=1.0,
                    maximum=10.0,
                    step=0.1,
                    value=DEFAULT_GUIDANCE_SCALE,
                )
                num_inference_steps = gr.Slider(
                    label="Inference Steps",
                    minimum=1,
                    maximum=100,
                    step=1,
                    value=DEFAULT_NUM_INFERENCE_STEPS,
                )
                height = gr.Slider(
                    label="Height", minimum=256, maximum=2048, step=8, value=1024
                )
                width = gr.Slider(
                    label="Width", minimum=256, maximum=2048, step=8, value=1024
                )

    # --- Helper Functions ---
    def update_prompt_from_sliders(rotate, zoom_val, tilt):
        prompt, _ = build_camera_prompt(rotate, zoom_val, tilt)
        return prompt

    def sync_3d_to_sliders(camera_value):
        if camera_value and isinstance(camera_value, dict):
            rot = camera_value.get("rotate_deg", 0)
            zoom_val = camera_value.get("zoom", 5.0)
            tilt = camera_value.get("vertical_tilt", 0)
            prompt, _ = build_camera_prompt(rot, zoom_val, tilt)
            return rot, zoom_val, tilt, prompt
        return gr.update(), gr.update(), gr.update(), gr.update()

    def sync_sliders_to_3d(rotate, zoom_val, tilt):
        return {"rotate_deg": rotate, "zoom": zoom_val, "vertical_tilt": tilt}

    def update_3d_image(img):
        if img is None:
            return gr.update(imageUrl=None)
        buffered = BytesIO()
        img.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        data_url = f"data:image/png;base64,{img_str}"
        return gr.update(imageUrl=data_url)

    # --- Event Handlers ---

    # Slider -> Prompt preview
    for slider in [rotate_deg, zoom, vertical_tilt]:
        slider.change(
            fn=update_prompt_from_sliders,
            inputs=[rotate_deg, zoom, vertical_tilt],
            outputs=[prompt_preview],
        )

    # 3D control -> Sliders + Prompt (no auto-inference)
    camera_3d.change(
        fn=sync_3d_to_sliders,
        inputs=[camera_3d],
        outputs=[rotate_deg, zoom, vertical_tilt, prompt_preview],
    )

    # Sliders -> 3D control (no auto-inference)
    for slider in [rotate_deg, zoom, vertical_tilt]:
        slider.release(
            fn=sync_sliders_to_3d,
            inputs=[rotate_deg, zoom, vertical_tilt],
            outputs=[camera_3d],
        )

    # Reset
    reset_btn.click(
        fn=reset_all,
        inputs=None,
        outputs=[rotate_deg, zoom, vertical_tilt, is_reset],
        queue=False,
    ).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False).then(
        fn=sync_sliders_to_3d,
        inputs=[rotate_deg, zoom, vertical_tilt],
        outputs=[camera_3d],
    )

    # Generate button - Two-stage process
    # Stage 1: Fetch structured caption from BRIA API and display it immediately
    run_event = run_btn.click(
        fn=fetch_structured_caption,
        inputs=[
            image,
            rotate_deg,
            zoom,
            vertical_tilt,
            seed,
            randomize_seed,
            prev_output,
        ],
        outputs=[seed, prompt_preview, structured_json, processed_image],
    ).then(
        # Stage 2: Generate image with Fibo Edit pipeline
        fn=generate_image_from_caption,
        inputs=[
            processed_image,
            structured_json,
            seed,
            guidance_scale,
            num_inference_steps,
        ],
        outputs=[result],
    )

    # Image upload
    image.upload(
        fn=update_dimensions_on_upload, inputs=[image], outputs=[width, height]
    ).then(
        fn=reset_all,
        inputs=None,
        outputs=[rotate_deg, zoom, vertical_tilt, is_reset],
        queue=False,
    ).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False).then(
        fn=update_3d_image, inputs=[image], outputs=[camera_3d]
    )

    image.clear(fn=lambda: gr.update(imageUrl=None), outputs=[camera_3d])

    run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])

    # Examples - Commenting out for now since we need actual example images
    # Note: With the two-stage inference process, examples would need custom handling
    # to properly chain fetch_structured_caption -> generate_image_from_caption

    # Sync 3D component when sliders change (covers example loading)
    def sync_3d_on_slider_change(img, rot, zoom_val, tilt):
        camera_value = {"rotate_deg": rot, "zoom": zoom_val, "vertical_tilt": tilt}
        if img is not None:
            buffered = BytesIO()
            img.save(buffered, format="PNG")
            img_str = base64.b64encode(buffered.getvalue()).decode()
            data_url = f"data:image/png;base64,{img_str}"
            return gr.update(value=camera_value, imageUrl=data_url)
        return gr.update(value=camera_value)

    # When any slider value changes (including from examples), sync the 3D component
    for slider in [rotate_deg, zoom, vertical_tilt]:
        slider.change(
            fn=sync_3d_on_slider_change,
            inputs=[image, rotate_deg, zoom, vertical_tilt],
            outputs=[camera_3d],
        )

    # API endpoints for the two-stage inference process
    gr.api(fetch_structured_caption, api_name="fetch_caption")
    gr.api(generate_image_from_caption, api_name="generate_image")

if __name__ == "__main__":
    head = '<script src="https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js"></script>'

    if RUN_LOCAL:
        # Local development configuration
        demo.launch(
            mcp_server=True,
            head=head,
            footer_links=["api", "gradio", "settings"],
            server_name="0.0.0.0",
            server_port=8081,
            css=css,
        )
    else:
        # HuggingFace Spaces standard configuration
        # demo.launch(head=head, debug=True, show_error=True, css=css)
        demo.launch(head=head, css=css)