Spaces:

briaai
/

fibo-edit-camera-angle

Running on Zero

App Files Files Community

fibo-edit-camera-angle / app.py

mokady

Update app.py

b87f5d5 verified 2 months ago

raw

history blame contribute delete

55 kB

	import base64
	import json
	import os
	import random
	import time
	from io import BytesIO
	from typing import Optional, Tuple

	import gradio as gr
	import numpy as np
	import requests
	import spaces
	import torch
	from PIL import Image

	from fibo_edit_pipeline import BriaFiboEditPipeline
	from utils import AngleInstruction

	# --- Configuration ---
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Run locally or on HuggingFace Spaces
	RUN_LOCAL = False

	# Model paths
	BASE_CHECKPOINT = "briaai/FIBO-Edit" # HuggingFace model ID
	LORA_CHECKPOINT = "briaai/fibo_edit_multi_angle_full_0121_full_1k" # HuggingFace LoRA model ID

	# BRIA API configuration
	BRIA_API_URL = "https://engine.prod.bria-api.com/v2/structured_prompt/generate/pro"
	BRIA_API_TOKEN = os.environ.get("BRIA_API_TOKEN")

	if not BRIA_API_TOKEN:
	raise ValueError(
	"BRIA_API_TOKEN environment variable is not set. "
	"Please add it as a HuggingFace Space secret."
	)

	# Generation defaults
	DEFAULT_NUM_INFERENCE_STEPS = 50
	DEFAULT_GUIDANCE_SCALE = 3.5
	DEFAULT_SEED = 100050

	MAX_SEED = np.iinfo(np.int32).max

	print("🚀 Starting Fibo Edit Multi-Angle LoRA Gradio App")
	print(f"Device: {device}")
	print(f"Base checkpoint: {BASE_CHECKPOINT}")
	print(f"LoRA checkpoint: {LORA_CHECKPOINT}")


	# --- Helper Functions ---
	def load_pipeline_fiboedit(
	checkpoint: str,
	lora_checkpoint: Optional[str] = None,
	lora_scale: Optional[float] = None,
	fuse_lora: bool = True,
	):
	"""
	Load the Fibo Edit pipeline using BriaFiboEditPipeline with optional LoRA weights.

	Args:
	checkpoint: HuggingFace model ID for base model
	lora_checkpoint: Optional HuggingFace model ID for LoRA weights
	lora_scale: Scale for LoRA weights when fusing (default None = 1.0)
	fuse_lora: Whether to fuse LoRA into base weights (default True)

	Returns:
	Loaded BriaFiboEditPipeline
	"""
	print(f"Loading BriaFiboEditPipeline from {checkpoint}")
	if lora_checkpoint:
	print(f" with LoRA from {lora_checkpoint}")

	# Load pipeline from HuggingFace
	print("Loading pipeline...")
	pipe = BriaFiboEditPipeline.from_pretrained(
	checkpoint,
	torch_dtype=torch.bfloat16,
	)
	pipe.to("cuda")
	print(f" Pipeline loaded from {checkpoint}")

	# Load LoRA weights if provided (PEFT format)
	if lora_checkpoint:
	print(f"Loading PEFT LoRA from {lora_checkpoint}...")
	from peft import PeftModel

	print(" Loading PEFT adapter onto transformer...")
	pipe.transformer = PeftModel.from_pretrained(
	pipe.transformer,
	lora_checkpoint,
	)
	print(" PEFT adapter loaded successfully")

	if fuse_lora:
	print(" Merging LoRA into base weights...")
	if hasattr(pipe.transformer, "merge_and_unload"):
	pipe.transformer = pipe.transformer.merge_and_unload()
	print(" LoRA merged and unloaded")
	else:
	print(" [WARN] transformer.merge_and_unload() not available")

	print("✅ Pipeline loaded successfully!")
	return pipe


	def generate_structured_caption(
	image: Image.Image, prompt: str, seed: int = 1
	) -> Optional[dict]:
	"""Generate structured caption using BRIA API."""
	buffered = BytesIO()
	image.save(buffered, format="PNG")
	image_bytes = base64.b64encode(buffered.getvalue()).decode("utf-8")

	payload = {
	"seed": seed,
	"sync": True,
	"images": [image_bytes],
	"prompt": prompt,
	}

	headers = {
	"Content-Type": "application/json",
	"api_token": BRIA_API_TOKEN,
	}

	max_retries = 3
	for attempt in range(max_retries):
	try:
	response = requests.post(
	BRIA_API_URL, json=payload, headers=headers, timeout=60
	)
	response.raise_for_status()
	data = response.json()
	structured_prompt_str = data["result"]["structured_prompt"]
	return json.loads(structured_prompt_str)
	except Exception as e:
	if attempt == max_retries - 1:
	print(f"Failed to generate structured caption: {e}")
	return None
	time.sleep(3)

	return None


	# --- Model Loading ---
	print("Loading Fibo Edit pipeline...")

	try:
	pipe = load_pipeline_fiboedit(
	checkpoint=BASE_CHECKPOINT,
	lora_checkpoint=LORA_CHECKPOINT,
	lora_scale=None,
	fuse_lora=True,
	)

	if torch.cuda.is_available():
	mem_allocated = torch.cuda.memory_allocated(0) / 1024**3
	print(f" GPU memory allocated: {mem_allocated:.2f} GB")

	except Exception as e:
	print(f"❌ Error loading pipeline: {e}")
	import traceback

	traceback.print_exc()
	raise


	def build_camera_prompt(
	rotate_deg: float = 0.0, zoom: float = 0.0, vertical_tilt: float = 0.0
	) -> str:
	"""Build a natural language camera instruction from parameters."""
	# Create AngleInstruction from camera parameters
	angle_instruction = AngleInstruction.from_camera_params(
	rotation=rotate_deg, tilt=vertical_tilt, zoom=zoom
	)

	# Generate natural language description
	view_map = {
	"back view": "view from the opposite side",
	"back-left quarter view": "rotate 135 degrees left",
	"back-right quarter view": "rotate 135 degrees right",
	"front view": "keep the front view",
	"front-left quarter view": "rotate 45 degrees left",
	"front-right quarter view": "rotate 45 degrees right",
	"left side view": "rotate 90 degrees left",
	"right side view": "rotate 90 degrees right",
	}

	shot_map = {
	"elevated shot": "with an elevated viewing angle",
	"eye-level shot": "with an eye-level viewing angle",
	"high-angle shot": "with a high-angle viewing angle",
	"low-angle shot": "with a low-angle viewing angle",
	}

	zoom_map = {
	"close-up": "and make it a close-up shot",
	"medium shot": "", # Omit medium shot
	"wide shot": "and make it a wide shot",
	}

	view_text = view_map[angle_instruction.view.value]
	shot_text = shot_map[angle_instruction.shot.value]
	zoom_text = zoom_map[angle_instruction.zoom.value]

	# Construct the natural language prompt starting with "Change the viewing angle"
	parts = [view_text, shot_text]
	if zoom_text: # Only add zoom if not empty (medium shot is omitted)
	parts.append(zoom_text)
	natural_prompt = "Change the viewing angle: " + ", ".join(parts)

	return natural_prompt, angle_instruction


	def fetch_structured_caption(
	image: Optional[Image.Image] = None,
	rotate_deg: float = 0.0,
	zoom: float = 0.0,
	vertical_tilt: float = 0.0,
	seed: int = 0,
	randomize_seed: bool = True,
	prev_output: Optional[Image.Image] = None,
	) -> Tuple[int, str, dict, Image.Image]:
	"""Fetch structured caption from BRIA API."""

	# Build natural language prompt and angle instruction
	natural_prompt, angle_instruction = build_camera_prompt(
	rotate_deg, zoom, vertical_tilt
	)
	print(f"Natural Language Prompt: {natural_prompt}")
	print(f"Angle Instruction: {str(angle_instruction)}")

	if randomize_seed:
	seed = random.randint(0, MAX_SEED)

	# Get input image
	if image is not None:
	if isinstance(image, Image.Image):
	input_image = image.convert("RGB")
	elif hasattr(image, "name"):
	input_image = Image.open(image.name).convert("RGB")
	else:
	input_image = image
	elif prev_output:
	input_image = prev_output.convert("RGB")
	else:
	raise gr.Error("Please upload an image first.")

	# Generate structured caption using BRIA API
	print("Generating structured caption from BRIA API...")
	structured_caption = generate_structured_caption(
	input_image, natural_prompt, seed=seed
	)

	if structured_caption is None:
	raise gr.Error("Failed to generate structured caption from BRIA API")

	# Replace edit_instruction with angle instruction string
	structured_caption["edit_instruction"] = str(angle_instruction)

	print(
	f"Structured caption received: {json.dumps(structured_caption, ensure_ascii=False)}"
	)

	return seed, natural_prompt, structured_caption, input_image


	@spaces.GPU(duration=240)
	def generate_image_from_caption(
	input_image: Image.Image,
	structured_caption: dict,
	seed: int,
	guidance_scale: float = 3.5,
	num_inference_steps: int = 50,
	) -> Image.Image:
	"""Generate image using Fibo Edit pipeline with structured caption."""

	structured_prompt = json.dumps(structured_caption, ensure_ascii=False)
	print("Generating image with structured prompt...")

	generator = torch.Generator(device=device).manual_seed(seed)

	result = pipe(
	image=input_image,
	prompt=structured_prompt,
	guidance_scale=guidance_scale,
	num_inference_steps=num_inference_steps,
	generator=generator,
	num_images_per_prompt=1,
	).images[0]

	return result


	# --- 3D Camera Control Component ---
	# Using gr.HTML directly with templates (Gradio 6 style)

	CAMERA_3D_HTML_TEMPLATE = """
	<div id="camera-control-wrapper" style="width: 100%; height: 400px; position: relative; background: #1a1a1a; border-radius: 12px; overflow: hidden;">
	<div id="prompt-overlay" style="position: absolute; bottom: 10px; left: 50%; transform: translateX(-50%); background: rgba(0,0,0,0.8); padding: 8px 16px; border-radius: 8px; font-family: monospace; font-size: 11px; color: #00ff88; white-space: nowrap; z-index: 10; max-width: 90%; overflow: hidden; text-overflow: ellipsis;"></div>
	<div id="control-legend" style="position: absolute; top: 10px; left: 10px; background: rgba(0,0,0,0.7); padding: 8px 12px; border-radius: 8px; font-family: system-ui; font-size: 11px; color: #fff; z-index: 10;">
	<div style="margin-bottom: 4px;"><span style="color: #00ff88;">●</span> Rotation (↔)</div>
	<div style="margin-bottom: 4px;"><span style="color: #ff69b4;">●</span> Vertical Tilt (↕)</div>
	<div><span style="color: #ffa500;">●</span> Distance/Zoom</div>
	</div>
	</div>
	"""

	CAMERA_3D_JS = """
	(() => {
	const wrapper = element.querySelector('#camera-control-wrapper');
	const promptOverlay = element.querySelector('#prompt-overlay');

	const initScene = () => {
	if (typeof THREE === 'undefined') {
	setTimeout(initScene, 100);
	return;
	}

	const scene = new THREE.Scene();
	scene.background = new THREE.Color(0x1a1a1a);

	const camera = new THREE.PerspectiveCamera(50, wrapper.clientWidth / wrapper.clientHeight, 0.1, 1000);
	camera.position.set(4, 3, 4);
	camera.lookAt(0, 0.75, 0);

	const renderer = new THREE.WebGLRenderer({ antialias: true });
	renderer.setSize(wrapper.clientWidth, wrapper.clientHeight);
	renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
	wrapper.insertBefore(renderer.domElement, wrapper.firstChild);

	scene.add(new THREE.AmbientLight(0xffffff, 0.6));
	const dirLight = new THREE.DirectionalLight(0xffffff, 0.6);
	dirLight.position.set(5, 10, 5);
	scene.add(dirLight);

	scene.add(new THREE.GridHelper(6, 12, 0x333333, 0x222222));

	const CENTER = new THREE.Vector3(0, 0.75, 0);
	const BASE_DISTANCE = 2.0;
	const ROTATION_RADIUS = 2.2;
	const TILT_RADIUS = 1.6;

	let rotateDeg = props.value?.rotate_deg \|\| 0;
	let zoom = props.value?.zoom \|\| 5.0;
	let verticalTilt = props.value?.vertical_tilt \|\| 0;

	const rotateSteps = [-180, -135, -90, -45, 0, 45, 90, 135, 180];
	const zoomSteps = [0, 5, 10];
	const tiltSteps = [-1, -0.5, 0, 0.5, 1];

	function snapToNearest(value, steps) {
	return steps.reduce((prev, curr) => Math.abs(curr - value) < Math.abs(prev - value) ? curr : prev);
	}

	function createPlaceholderTexture() {
	const canvas = document.createElement('canvas');
	canvas.width = 256;
	canvas.height = 256;
	const ctx = canvas.getContext('2d');
	ctx.fillStyle = '#3a3a4a';
	ctx.fillRect(0, 0, 256, 256);
	ctx.fillStyle = '#ffcc99';
	ctx.beginPath();
	ctx.arc(128, 128, 80, 0, Math.PI * 2);
	ctx.fill();
	ctx.fillStyle = '#333';
	ctx.beginPath();
	ctx.arc(100, 110, 10, 0, Math.PI * 2);
	ctx.arc(156, 110, 10, 0, Math.PI * 2);
	ctx.fill();
	ctx.strokeStyle = '#333';
	ctx.lineWidth = 3;
	ctx.beginPath();
	ctx.arc(128, 130, 35, 0.2, Math.PI - 0.2);
	ctx.stroke();
	return new THREE.CanvasTexture(canvas);
	}

	let currentTexture = createPlaceholderTexture();
	const planeMaterial = new THREE.MeshBasicMaterial({ map: currentTexture, side: THREE.DoubleSide });
	let targetPlane = new THREE.Mesh(new THREE.PlaneGeometry(1.2, 1.2), planeMaterial);
	targetPlane.position.copy(CENTER);
	scene.add(targetPlane);

	function updateTextureFromUrl(url) {
	if (!url) {
	planeMaterial.map = createPlaceholderTexture();
	planeMaterial.needsUpdate = true;
	scene.remove(targetPlane);
	targetPlane = new THREE.Mesh(new THREE.PlaneGeometry(1.2, 1.2), planeMaterial);
	targetPlane.position.copy(CENTER);
	scene.add(targetPlane);
	return;
	}

	const loader = new THREE.TextureLoader();
	loader.crossOrigin = 'anonymous';
	loader.load(url, (texture) => {
	texture.minFilter = THREE.LinearFilter;
	texture.magFilter = THREE.LinearFilter;
	planeMaterial.map = texture;
	planeMaterial.needsUpdate = true;

	const img = texture.image;
	if (img && img.width && img.height) {
	const aspect = img.width / img.height;
	const maxSize = 1.4;
	let planeWidth, planeHeight;
	if (aspect > 1) {
	planeWidth = maxSize;
	planeHeight = maxSize / aspect;
	} else {
	planeHeight = maxSize;
	planeWidth = maxSize * aspect;
	}
	scene.remove(targetPlane);
	targetPlane = new THREE.Mesh(new THREE.PlaneGeometry(planeWidth, planeHeight), planeMaterial);
	targetPlane.position.copy(CENTER);
	scene.add(targetPlane);
	}
	});
	}

	if (props.imageUrl) {
	updateTextureFromUrl(props.imageUrl);
	}

	const cameraGroup = new THREE.Group();
	const bodyMat = new THREE.MeshStandardMaterial({ color: 0x6699cc, metalness: 0.5, roughness: 0.3 });
	const body = new THREE.Mesh(new THREE.BoxGeometry(0.28, 0.2, 0.35), bodyMat);
	cameraGroup.add(body);
	const lens = new THREE.Mesh(
	new THREE.CylinderGeometry(0.08, 0.1, 0.16, 16),
	new THREE.MeshStandardMaterial({ color: 0x6699cc, metalness: 0.5, roughness: 0.3 })
	);
	lens.rotation.x = Math.PI / 2;
	lens.position.z = 0.24;
	cameraGroup.add(lens);
	scene.add(cameraGroup);

	const rotationArcPoints = [];
	for (let i = 0; i <= 64; i++) {
	const angle = THREE.MathUtils.degToRad((360 * i / 64));
	rotationArcPoints.push(new THREE.Vector3(ROTATION_RADIUS * Math.sin(angle), 0.05, ROTATION_RADIUS * Math.cos(angle)));
	}
	const rotationCurve = new THREE.CatmullRomCurve3(rotationArcPoints);
	const rotationArc = new THREE.Mesh(
	new THREE.TubeGeometry(rotationCurve, 64, 0.035, 8, true),
	new THREE.MeshStandardMaterial({ color: 0x00ff88, emissive: 0x00ff88, emissiveIntensity: 0.3 })
	);
	scene.add(rotationArc);

	const rotationHandle = new THREE.Mesh(
	new THREE.SphereGeometry(0.16, 16, 16),
	new THREE.MeshStandardMaterial({ color: 0x00ff88, emissive: 0x00ff88, emissiveIntensity: 0.5 })
	);
	rotationHandle.userData.type = 'rotation';
	scene.add(rotationHandle);

	const tiltArcPoints = [];
	for (let i = 0; i <= 32; i++) {
	const angle = THREE.MathUtils.degToRad(-45 + (90 * i / 32));
	tiltArcPoints.push(new THREE.Vector3(-0.7, TILT_RADIUS * Math.sin(angle) + CENTER.y, TILT_RADIUS * Math.cos(angle)));
	}
	const tiltCurve = new THREE.CatmullRomCurve3(tiltArcPoints);
	const tiltArc = new THREE.Mesh(
	new THREE.TubeGeometry(tiltCurve, 32, 0.035, 8, false),
	new THREE.MeshStandardMaterial({ color: 0xff69b4, emissive: 0xff69b4, emissiveIntensity: 0.3 })
	);
	scene.add(tiltArc);

	const tiltHandle = new THREE.Mesh(
	new THREE.SphereGeometry(0.16, 16, 16),
	new THREE.MeshStandardMaterial({ color: 0xff69b4, emissive: 0xff69b4, emissiveIntensity: 0.5 })
	);
	tiltHandle.userData.type = 'tilt';
	scene.add(tiltHandle);

	const distanceLineGeo = new THREE.BufferGeometry();
	const distanceLine = new THREE.Line(distanceLineGeo, new THREE.LineBasicMaterial({ color: 0xffa500 }));
	scene.add(distanceLine);

	const distanceHandle = new THREE.Mesh(
	new THREE.SphereGeometry(0.16, 16, 16),
	new THREE.MeshStandardMaterial({ color: 0xffa500, emissive: 0xffa500, emissiveIntensity: 0.5 })
	);
	distanceHandle.userData.type = 'distance';
	scene.add(distanceHandle);

	function buildPromptText(rot, zoomVal, tilt) {
	const parts = [];
	if (rot !== 0) {
	const dir = rot > 0 ? 'right' : 'left';
	parts.push('Rotate ' + Math.abs(rot) + '° ' + dir);
	}
	if (zoomVal >= 6.66) parts.push('Close-up');
	else if (zoomVal >= 3.33) parts.push('Medium shot');
	else parts.push('Wide angle');
	if (tilt >= 0.66) parts.push("High angle");
	else if (tilt >= 0.33) parts.push("Elevated");
	else if (tilt <= -0.33) parts.push("Low angle");
	else parts.push("Eye level");
	return parts.length > 0 ? parts.join(' • ') : 'No camera movement';
	}

	function updatePositions() {
	const rotRad = THREE.MathUtils.degToRad(rotateDeg);
	// Map zoom 0-10 to distance: zoom 0 = far (3.0), zoom 10 = close (1.0)
	const distance = 3.0 - (zoom / 10) * 2.0;
	const tiltAngle = verticalTilt * 35;
	const tiltRad = THREE.MathUtils.degToRad(tiltAngle);

	const camX = distance * Math.sin(rotRad) * Math.cos(tiltRad);
	const camY = distance * Math.sin(tiltRad) + CENTER.y;
	const camZ = distance * Math.cos(rotRad) * Math.cos(tiltRad);

	cameraGroup.position.set(camX, camY, camZ);
	cameraGroup.lookAt(CENTER);

	rotationHandle.position.set(ROTATION_RADIUS * Math.sin(rotRad), 0.05, ROTATION_RADIUS * Math.cos(rotRad));

	const tiltHandleAngle = THREE.MathUtils.degToRad(tiltAngle);
	tiltHandle.position.set(-0.7, TILT_RADIUS * Math.sin(tiltHandleAngle) + CENTER.y, TILT_RADIUS * Math.cos(tiltHandleAngle));

	const handleDist = distance - 0.4;
	distanceHandle.position.set(
	handleDist * Math.sin(rotRad) * Math.cos(tiltRad),
	handleDist * Math.sin(tiltRad) + CENTER.y,
	handleDist * Math.cos(rotRad) * Math.cos(tiltRad)
	);
	distanceLineGeo.setFromPoints([cameraGroup.position.clone(), CENTER.clone()]);

	promptOverlay.textContent = buildPromptText(rotateDeg, zoom, verticalTilt);
	}

	function updatePropsAndTrigger() {
	const rotSnap = snapToNearest(rotateDeg, rotateSteps);
	const zoomSnap = snapToNearest(zoom, zoomSteps);
	const tiltSnap = snapToNearest(verticalTilt, tiltSteps);

	props.value = { rotate_deg: rotSnap, zoom: zoomSnap, vertical_tilt: tiltSnap };
	trigger('change', props.value);
	}

	const raycaster = new THREE.Raycaster();
	const mouse = new THREE.Vector2();
	let isDragging = false;
	let dragTarget = null;
	let dragStartMouse = new THREE.Vector2();
	let dragStartZoom = 0;
	const intersection = new THREE.Vector3();

	const canvas = renderer.domElement;

	canvas.addEventListener('mousedown', (e) => {
	const rect = canvas.getBoundingClientRect();
	mouse.x = ((e.clientX - rect.left) / rect.width) * 2 - 1;
	mouse.y = -((e.clientY - rect.top) / rect.height) * 2 + 1;

	raycaster.setFromCamera(mouse, camera);
	const intersects = raycaster.intersectObjects([rotationHandle, tiltHandle, distanceHandle]);

	if (intersects.length > 0) {
	isDragging = true;
	dragTarget = intersects[0].object;
	dragTarget.material.emissiveIntensity = 1.0;
	dragTarget.scale.setScalar(1.3);
	dragStartMouse.copy(mouse);
	dragStartZoom = zoom;
	canvas.style.cursor = 'grabbing';
	}
	});

	canvas.addEventListener('mousemove', (e) => {
	const rect = canvas.getBoundingClientRect();
	mouse.x = ((e.clientX - rect.left) / rect.width) * 2 - 1;
	mouse.y = -((e.clientY - rect.top) / rect.height) * 2 + 1;

	if (isDragging && dragTarget) {
	raycaster.setFromCamera(mouse, camera);

	if (dragTarget.userData.type === 'rotation') {
	const plane = new THREE.Plane(new THREE.Vector3(0, 1, 0), -0.05);
	if (raycaster.ray.intersectPlane(plane, intersection)) {
	let angle = THREE.MathUtils.radToDeg(Math.atan2(intersection.x, intersection.z));
	rotateDeg = THREE.MathUtils.clamp(angle, -180, 180);
	}
	} else if (dragTarget.userData.type === 'tilt') {
	const plane = new THREE.Plane(new THREE.Vector3(1, 0, 0), 0.7);
	if (raycaster.ray.intersectPlane(plane, intersection)) {
	const relY = intersection.y - CENTER.y;
	const relZ = intersection.z;
	const angle = THREE.MathUtils.radToDeg(Math.atan2(relY, relZ));
	verticalTilt = THREE.MathUtils.clamp(angle / 35, -1, 1);
	}
	} else if (dragTarget.userData.type === 'distance') {
	const deltaY = mouse.y - dragStartMouse.y;
	zoom = THREE.MathUtils.clamp(dragStartZoom + deltaY * 20, 0, 10);
	}
	updatePositions();
	} else {
	raycaster.setFromCamera(mouse, camera);
	const intersects = raycaster.intersectObjects([rotationHandle, tiltHandle, distanceHandle]);
	[rotationHandle, tiltHandle, distanceHandle].forEach(h => {
	h.material.emissiveIntensity = 0.5;
	h.scale.setScalar(1);
	});
	if (intersects.length > 0) {
	intersects[0].object.material.emissiveIntensity = 0.8;
	intersects[0].object.scale.setScalar(1.1);
	canvas.style.cursor = 'grab';
	} else {
	canvas.style.cursor = 'default';
	}
	}
	});

	const onMouseUp = () => {
	if (dragTarget) {
	dragTarget.material.emissiveIntensity = 0.5;
	dragTarget.scale.setScalar(1);

	const targetRot = snapToNearest(rotateDeg, rotateSteps);
	const targetZoom = snapToNearest(zoom, zoomSteps);
	const targetTilt = snapToNearest(verticalTilt, tiltSteps);

	const startRot = rotateDeg, startZoom = zoom, startTilt = verticalTilt;
	const startTime = Date.now();

	function animateSnap() {
	const t = Math.min((Date.now() - startTime) / 200, 1);
	const ease = 1 - Math.pow(1 - t, 3);

	rotateDeg = startRot + (targetRot - startRot) * ease;
	zoom = startZoom + (targetZoom - startZoom) * ease;
	verticalTilt = startTilt + (targetTilt - startTilt) * ease;

	updatePositions();
	if (t < 1) requestAnimationFrame(animateSnap);
	else updatePropsAndTrigger();
	}
	animateSnap();
	}
	isDragging = false;
	dragTarget = null;
	canvas.style.cursor = 'default';
	};

	canvas.addEventListener('mouseup', onMouseUp);
	canvas.addEventListener('mouseleave', onMouseUp);

	canvas.addEventListener('touchstart', (e) => {
	e.preventDefault();
	const touch = e.touches[0];
	const rect = canvas.getBoundingClientRect();
	mouse.x = ((touch.clientX - rect.left) / rect.width) * 2 - 1;
	mouse.y = -((touch.clientY - rect.top) / rect.height) * 2 + 1;

	raycaster.setFromCamera(mouse, camera);
	const intersects = raycaster.intersectObjects([rotationHandle, tiltHandle, distanceHandle]);

	if (intersects.length > 0) {
	isDragging = true;
	dragTarget = intersects[0].object;
	dragTarget.material.emissiveIntensity = 1.0;
	dragTarget.scale.setScalar(1.3);
	dragStartMouse.copy(mouse);
	dragStartZoom = zoom;
	}
	}, { passive: false });

	canvas.addEventListener('touchmove', (e) => {
	e.preventDefault();
	const touch = e.touches[0];
	const rect = canvas.getBoundingClientRect();
	mouse.x = ((touch.clientX - rect.left) / rect.width) * 2 - 1;
	mouse.y = -((touch.clientY - rect.top) / rect.height) * 2 + 1;

	if (isDragging && dragTarget) {
	raycaster.setFromCamera(mouse, camera);

	if (dragTarget.userData.type === 'rotation') {
	const plane = new THREE.Plane(new THREE.Vector3(0, 1, 0), -0.05);
	if (raycaster.ray.intersectPlane(plane, intersection)) {
	let angle = THREE.MathUtils.radToDeg(Math.atan2(intersection.x, intersection.z));
	rotateDeg = THREE.MathUtils.clamp(angle, -180, 180);
	}
	} else if (dragTarget.userData.type === 'tilt') {
	const plane = new THREE.Plane(new THREE.Vector3(1, 0, 0), 0.7);
	if (raycaster.ray.intersectPlane(plane, intersection)) {
	const relY = intersection.y - CENTER.y;
	const relZ = intersection.z;
	const angle = THREE.MathUtils.radToDeg(Math.atan2(relY, relZ));
	verticalTilt = THREE.MathUtils.clamp(angle / 35, -1, 1);
	}
	} else if (dragTarget.userData.type === 'distance') {
	const deltaY = mouse.y - dragStartMouse.y;
	zoom = THREE.MathUtils.clamp(dragStartZoom + deltaY * 20, 0, 10);
	}
	updatePositions();
	}
	}, { passive: false });

	canvas.addEventListener('touchend', (e) => { e.preventDefault(); onMouseUp(); }, { passive: false });
	canvas.addEventListener('touchcancel', (e) => { e.preventDefault(); onMouseUp(); }, { passive: false });

	updatePositions();

	function render() {
	requestAnimationFrame(render);
	renderer.render(scene, camera);
	}
	render();

	new ResizeObserver(() => {
	camera.aspect = wrapper.clientWidth / wrapper.clientHeight;
	camera.updateProjectionMatrix();
	renderer.setSize(wrapper.clientWidth, wrapper.clientHeight);
	}).observe(wrapper);

	wrapper._updateTexture = updateTextureFromUrl;

	let lastImageUrl = props.imageUrl;
	let lastValue = JSON.stringify(props.value);
	setInterval(() => {
	if (props.imageUrl !== lastImageUrl) {
	lastImageUrl = props.imageUrl;
	updateTextureFromUrl(props.imageUrl);
	}
	const currentValue = JSON.stringify(props.value);
	if (currentValue !== lastValue) {
	lastValue = currentValue;
	if (props.value && typeof props.value === 'object') {
	rotateDeg = props.value.rotate_deg ?? rotateDeg;
	zoom = props.value.zoom ?? zoom;
	verticalTilt = props.value.vertical_tilt ?? verticalTilt;
	updatePositions();
	}
	}
	}, 100);
	};

	initScene();
	})();
	"""


	def create_camera_3d_component(value=None, imageUrl=None, **kwargs):
	"""Create a 3D camera control component using gr.HTML."""
	if value is None:
	value = {"rotate_deg": 0, "zoom": 5.0, "vertical_tilt": 0}

	return gr.HTML(
	value=value,
	html_template=CAMERA_3D_HTML_TEMPLATE,
	js_on_load=CAMERA_3D_JS,
	imageUrl=imageUrl,
	**kwargs,
	)


	# --- UI ---
	css = """
	:root {
	--name: citrus;
	--primary-50: #fffbeb;
	--primary-100: #fef3c7;
	--primary-200: #fde68a;
	--primary-300: #fcd34d;
	--primary-400: #fbbf24;
	--primary-500: #f59e0b;
	--primary-600: #d97706;
	--primary-700: #b45309;
	--primary-800: #92400e;
	--primary-900: #78350f;
	--primary-950: #6c370f;
	--secondary-50: #fffbeb;
	--secondary-100: #fef3c7;
	--secondary-200: #fde68a;
	--secondary-300: #fcd34d;
	--secondary-400: #fbbf24;
	--secondary-500: #f59e0b;
	--secondary-600: #d97706;
	--secondary-700: #b45309;
	--secondary-800: #92400e;
	--secondary-900: #78350f;
	--secondary-950: #6c370f;
	--neutral-50: #fafaf9;
	--neutral-100: #f5f5f4;
	--neutral-200: #e7e5e4;
	--neutral-300: #d6d3d1;
	--neutral-400: #a8a29e;
	--neutral-500: #78716c;
	--neutral-600: #57534e;
	--neutral-700: #44403c;
	--neutral-800: #292524;
	--neutral-900: #1c1917;
	--neutral-950: #0f0e0d;
	--spacing-xxs: 2px;
	--spacing-xs: 4px;
	--spacing-sm: 6px;
	--spacing-md: 8px;
	--spacing-lg: 10px;
	--spacing-xl: 14px;
	--spacing-xxl: 28px;
	--radius-xxs: 1px;
	--radius-xs: 2px;
	--radius-sm: 4px;
	--radius-md: 6px;
	--radius-lg: 8px;
	--radius-xl: 12px;
	--radius-xxl: 22px;
	--text-xxs: 9px;
	--text-xs: 10px;
	--text-sm: 12px;
	--text-md: 14px;
	--text-lg: 16px;
	--text-xl: 22px;
	--text-xxl: 26px;
	--font: 'Ubuntu', ui-sans-serif, system-ui, sans-serif;
	--font-mono: 'Roboto Mono', ui-monospace, Consolas, monospace;
	--body-background-fill: var(--background-fill-primary);
	--body-text-color: var(--neutral-800);
	--body-text-size: var(--text-md);
	--body-text-weight: 400;
	--embed-radius: var(--radius-sm);
	--color-accent: var(--primary-500);
	--color-accent-soft: var(--primary-50);
	--background-fill-primary: var(--neutral-50);
	--background-fill-secondary: var(--neutral-50);
	--border-color-accent: var(--primary-300);
	--border-color-primary: var(--neutral-200);
	--link-text-color: var(--secondary-600);
	--link-text-color-active: var(--secondary-600);
	--link-text-color-hover: var(--secondary-700);
	--link-text-color-visited: var(--secondary-500);
	--body-text-color-subdued: var(--neutral-400);
	--accordion-text-color: var(--body-text-color);
	--table-text-color: var(--body-text-color);
	--shadow-drop: rgba(0,0,0,0.05) 0px 1px 2px 0px;
	--shadow-drop-lg: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
	--shadow-inset: rgba(0,0,0,0.05) 0px 2px 4px 0px inset;
	--shadow-spread: 3px;
	--block-background-fill: var(--neutral-100);
	--block-border-color: var(--neutral-300);
	--block-border-width: 1px;
	--block-info-text-color: var(--body-text-color-subdued);
	--block-info-text-size: var(--text-sm);
	--block-info-text-weight: 400;
	--block-label-background-fill: var(--background-fill-primary);
	--block-label-border-color: var(--border-color-primary);
	--block-label-border-width: 1px;
	--block-label-shadow: none;
	--block-label-text-color: var(--neutral-500);
	--block-label-margin: 0;
	--block-label-padding: var(--spacing-sm) var(--spacing-lg);
	--block-label-radius: calc(var(--radius-sm) - 1px) 0 calc(var(--radius-sm) - 1px) 0;
	--block-label-right-radius: 0 calc(var(--radius-sm) - 1px) 0 calc(var(--radius-sm) - 1px);
	--block-label-text-size: var(--text-sm);
	--block-label-text-weight: 400;
	--block-padding: var(--spacing-xl) calc(var(--spacing-xl) + 2px);
	--block-radius: var(--radius-sm);
	--block-shadow: 0px 3px 0px 0px var(--neutral-300);
	--block-title-background-fill: none;
	--block-title-border-color: none;
	--block-title-border-width: 0px;
	--block-title-text-color: var(--neutral-500);
	--block-title-padding: 0;
	--block-title-radius: none;
	--block-title-text-size: var(--text-md);
	--block-title-text-weight: 400;
	--container-radius: var(--radius-sm);
	--form-gap-width: 0px;
	--layout-gap: var(--spacing-xxl);
	--panel-background-fill: var(--background-fill-secondary);
	--panel-border-color: var(--border-color-primary);
	--panel-border-width: 1px;
	--section-header-text-size: var(--text-md);
	--section-header-text-weight: 400;
	--border-color-accent-subdued: var(--border-color-accent);
	--code-background-fill: var(--neutral-100);
	--chatbot-text-size: var(--text-lg);
	--checkbox-background-color: var(--background-fill-primary);
	--checkbox-background-color-focus: var(--checkbox-background-color);
	--checkbox-background-color-hover: var(--checkbox-background-color);
	--checkbox-background-color-selected: var(--color-accent);
	--checkbox-border-color: var(--neutral-300);
	--checkbox-border-color-focus: var(--color-accent);
	--checkbox-border-color-hover: var(--neutral-300);
	--checkbox-border-color-selected: var(--color-accent);
	--checkbox-border-radius: var(--radius-sm);
	--checkbox-border-width: var(--input-border-width);
	--checkbox-label-background-fill: var(--neutral-200);
	--checkbox-label-background-fill-hover: var(--checkbox-label-background-fill);
	--checkbox-label-background-fill-selected: var(--primary-400);
	--checkbox-label-border-color: var(--border-color-primary);
	--checkbox-label-border-color-hover: var(--checkbox-label-border-color);
	--checkbox-label-border-color-selected: var(--primary-300);
	--checkbox-label-border-width: 2px;
	--checkbox-label-gap: var(--spacing-lg);
	--checkbox-label-padding: var(--spacing-md) calc(2 * var(--spacing-md));
	--checkbox-label-shadow: none;
	--checkbox-label-text-size: var(--text-md);
	--checkbox-label-text-weight: 400;
	--checkbox-check: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e");
	--radio-circle: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e");
	--checkbox-shadow: none;
	--checkbox-label-text-color: var(--body-text-color);
	--checkbox-label-text-color-selected: var(--checkbox-label-text-color);
	--error-background-fill: #fef2f2;
	--error-border-color: #b91c1c;
	--error-border-width: 1px;
	--error-text-color: #b91c1c;
	--error-icon-color: #b91c1c;
	--input-background-fill: var(--neutral-50);
	--input-background-fill-focus: var(--primary-50);
	--input-background-fill-hover: var(--input-background-fill);
	--input-border-color: var(--border-color-primary);
	--input-border-color-focus: var(--secondary-300);
	--input-border-color-hover: var(--input-border-color);
	--input-border-width: 1px;
	--input-padding: var(--spacing-xl);
	--input-placeholder-color: var(--neutral-400);
	--input-radius: var(--radius-sm);
	--input-shadow: 0px -1px 0px 0px var(--neutral-300);
	--input-shadow-focus: 0px -1px 0px 0px var(--primary-300);
	--input-text-size: var(--text-md);
	--input-text-weight: 400;
	--loader-color: var(--color-accent);
	--prose-text-size: var(--text-md);
	--prose-text-weight: 400;
	--prose-header-text-weight: 600;
	--slider-color: var(--primary-400);
	--stat-background-fill: var(--primary-300);
	--table-border-color: var(--neutral-300);
	--table-even-background-fill: white;
	--table-odd-background-fill: var(--neutral-50);
	--table-radius: var(--radius-sm);
	--table-row-focus: var(--color-accent-soft);
	--button-border-width: 0px;
	--button-cancel-background-fill: #ef4444;
	--button-cancel-background-fill-hover: #dc2626;
	--button-cancel-border-color: var(--button-secondary-border-color);
	--button-cancel-border-color-hover: var(--button-secondary-border-color-hover);
	--button-cancel-text-color: white;
	--button-cancel-text-color-hover: white;
	--button-cancel-shadow: 0px 3px 0px 0px rgb(248 113 113);
	--button-cancel-shadow-hover: 0px 5px 0px 0px rgb(248 113 113);
	--button-cancel-shadow-active: 0px 2px 0px 0px rgb(248 113 113);
	--button-transform-hover: translateY(-2px);
	--button-transform-active: translateY(1px);
	--button-transition: all 0.1s;
	--button-large-padding: var(--spacing-lg) calc(2 * var(--spacing-lg));
	--button-large-radius: var(--radius-md);
	--button-large-text-size: var(--text-lg);
	--button-large-text-weight: 600;
	--button-primary-background-fill: var(--primary-500);
	--button-primary-background-fill-hover: var(--button-primary-background-fill);
	--button-primary-border-color: var(--primary-500);
	--button-primary-border-color-hover: var(--primary-500);
	--button-primary-text-color: var(--button-secondary-text-color);
	--button-primary-text-color-hover: var(--button-primary-text-color);
	--button-primary-shadow: 0px 3px 0px 0px var(--primary-400);
	--button-primary-shadow-hover: 0px 5px 0px 0px var(--primary-400);
	--button-primary-shadow-active: 0px 2px 0px 0px var(--primary-400);
	--button-secondary-background-fill: var(--primary-400);
	--button-secondary-background-fill-hover: var(--button-secondary-background-fill);
	--button-secondary-border-color: var(--neutral-200);
	--button-secondary-border-color-hover: var(--neutral-200);
	--button-secondary-text-color: black;
	--button-secondary-text-color-hover: var(--button-secondary-text-color);
	--button-secondary-shadow: 0px 3px 0px 0px var(--primary-300);
	--button-secondary-shadow-hover: 0px 5px 0px 0px var(--primary-300);
	--button-secondary-shadow-active: 0px 2px 0px 0px var(--primary-300);
	--button-small-padding: var(--spacing-sm) calc(1.5 * var(--spacing-sm));
	--button-small-radius: var(--radius-md);
	--button-small-text-size: var(--text-sm);
	--button-small-text-weight: 400;
	--button-medium-padding: var(--spacing-md) calc(2 * var(--spacing-md));
	--button-medium-radius: var(--radius-md);
	--button-medium-text-size: var(--text-md);
	--button-medium-text-weight: 600;
	}

	:root.dark, :root .dark {
	--body-background-fill: var(--background-fill-primary);
	--body-text-color: var(--neutral-100);
	--color-accent-soft: var(--neutral-700);
	--background-fill-primary: var(--neutral-950);
	--background-fill-secondary: var(--neutral-900);
	--border-color-accent: var(--neutral-600);
	--border-color-primary: var(--neutral-700);
	--link-text-color-active: var(--secondary-500);
	--link-text-color: var(--secondary-500);
	--link-text-color-hover: var(--secondary-400);
	--link-text-color-visited: var(--secondary-600);
	--body-text-color-subdued: var(--neutral-400);
	--accordion-text-color: var(--body-text-color);
	--table-text-color: var(--body-text-color);
	--shadow-spread: 1px;
	--block-background-fill: var(--neutral-800);
	--block-border-color: var(--border-color-primary);
	--block-info-text-color: var(--body-text-color-subdued);
	--block-label-background-fill: var(--background-fill-secondary);
	--block-label-border-color: var(--border-color-primary);
	--block-label-text-color: var(--neutral-200);
	--block-shadow: 0px 3px 0px 0px var(--neutral-700);
	--block-title-text-color: var(--neutral-200);
	--panel-background-fill: var(--background-fill-secondary);
	--panel-border-color: var(--border-color-primary);
	--border-color-accent-subdued: var(--border-color-accent);
	--code-background-fill: var(--neutral-800);
	--checkbox-background-color: var(--neutral-400);
	--checkbox-background-color-focus: var(--checkbox-background-color);
	--checkbox-background-color-hover: var(--checkbox-background-color);
	--checkbox-background-color-selected: var(--primary-600);
	--checkbox-border-color: var(--neutral-700);
	--checkbox-border-color-focus: var(--color-accent);
	--checkbox-border-color-hover: var(--neutral-600);
	--checkbox-border-color-selected: var(--color-accent);
	--checkbox-border-width: var(--input-border-width);
	--checkbox-label-background-fill: var(--neutral-700);
	--checkbox-label-background-fill-hover: var(--checkbox-label-background-fill);
	--checkbox-label-background-fill-selected: var(--primary-500);
	--checkbox-label-border-color: var(--border-color-primary);
	--checkbox-label-border-color-hover: var(--checkbox-label-border-color);
	--checkbox-label-border-color-selected: var(--primary-600);
	--checkbox-label-border-width: 2px;
	--checkbox-label-text-color: var(--body-text-color);
	--checkbox-label-text-color-selected: var(--button-primary-text-color);
	--error-background-fill: var(--background-fill-primary);
	--error-border-color: #ef4444;
	--error-text-color: #fef2f2;
	--error-icon-color: #ef4444;
	--input-background-fill: var(--neutral-900);
	--input-background-fill-focus: none;
	--input-background-fill-hover: var(--input-background-fill);
	--input-border-color: var(--border-color-primary);
	--input-border-color-focus: var(--neutral-700);
	--input-border-color-hover: var(--input-border-color);
	--input-placeholder-color: var(--neutral-500);
	--input-shadow: 0px -1px 0px 0px var(--neutral-700);
	--input-shadow-focus: 0px -1px 0px 0px var(--primary-600);
	--slider-color: var(--primary-500);
	--stat-background-fill: var(--primary-500);
	--table-border-color: var(--neutral-700);
	--table-even-background-fill: var(--neutral-950);
	--table-odd-background-fill: var(--neutral-900);
	--table-row-focus: var(--color-accent-soft);
	--button-cancel-background-fill: #b91c1c;
	--button-cancel-background-fill-hover: #991b1b;
	--button-cancel-border-color: var(--button-secondary-border-color);
	--button-cancel-border-color-hover: var(--button-secondary-border-color-hover);
	--button-cancel-text-color: white;
	--button-cancel-text-color-hover: white;
	--button-cancel-shadow: 0px 3px 0px 0px rgb(220 38 38);
	--button-cancel-shadow-hover: 0px 5px 0px 0px rgb(220 38 38);
	--button-cancel-shadow-active: 0px 2px 0px 0px rgb(220 38 38);
	--button-primary-background-fill: var(--primary-600);
	--button-primary-background-fill-hover: var(--button-primary-background-fill);
	--button-primary-border-color: var(--primary-600);
	--button-primary-border-color-hover: var(--primary-500);
	--button-primary-text-color: var(--button-secondary-text-color);
	--button-primary-text-color-hover: var(--button-primary-text-color);
	--button-primary-shadow: 0px 3px 0px 0px var(--primary-700);
	--button-primary-shadow-hover: 0px 5px 0px 0px var(--primary-700);
	--button-primary-shadow-active: 0px 2px 0px 0px var(--primary-700);
	--button-secondary-background-fill: var(--primary-500);
	--button-secondary-background-fill-hover: var(--button-secondary-background-fill);
	--button-secondary-border-color: var(--neutral-600);
	--button-secondary-border-color-hover: var(--neutral-500);
	--button-secondary-text-color: var(--neutral-900);
	--button-secondary-text-color-hover: var(--button-secondary-text-color);
	--button-secondary-shadow: 0px 3px 0px 0px var(--primary-600);
	--button-secondary-shadow-hover: 0px 5px 0px 0px var(--primary-600);
	--button-secondary-shadow-active: 0px 2px 0px 0px var(--primary-600);
	}

	#col-container { max-width: 1100px; margin: 0 auto; }
	.dark .progress-text { color: white !important; }
	#camera-3d-control { min-height: 400px; }
	#examples { max-width: 1100px; margin: 0 auto; }
	.fillable { max-width: 1250px !important; }
	"""


	def reset_all() -> list:
	"""Reset all camera control knobs and flags to their default values."""
	return [0, 5.0, 0, True] # rotate_deg, zoom, vertical_tilt, is_reset


	def end_reset() -> bool:
	"""Mark the end of a reset cycle."""
	return False


	def update_dimensions_on_upload(image: Optional[Image.Image]) -> Tuple[int, int]:
	"""Compute recommended (width, height) for the output resolution."""
	if image is None:
	return 1024, 1024

	original_width, original_height = image.size

	if original_width > original_height:
	new_width = 1024
	aspect_ratio = original_height / original_width
	new_height = int(new_width * aspect_ratio)
	else:
	new_height = 1024
	aspect_ratio = original_width / original_height
	new_width = int(new_height * aspect_ratio)

	new_width = (new_width // 8) * 8
	new_height = (new_height // 8) * 8

	return new_width, new_height


	with gr.Blocks() as demo:
	gr.Markdown("""
	## 🎬 Fibo Edit — Camera Angle Control

	Fibo Edit with Multi-Angle LoRA for precise camera control ✨
	Control rotation, tilt, and zoom to generate images from any angle 🎥

	### Fine-tuning data was created by [Lovis](https://huggingface.co/fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA) and UI by [Apolinario](https://huggingface.co/spaces/multimodalart/qwen-image-multiple-angles-3d-camera)

	""")

	with gr.Row():
	with gr.Column(scale=1):
	image = gr.Image(label="Input Image", type="pil", height=280)
	prev_output = gr.Image(value=None, visible=False)
	is_reset = gr.Checkbox(value=False, visible=False)
	# Hidden state to pass processed image between steps
	processed_image = gr.State(None)

	gr.Markdown("### 🎮 3D Camera Control")

	camera_3d = create_camera_3d_component(
	value={"rotate_deg": 0, "zoom": 5.0, "vertical_tilt": 0},
	elem_id="camera-3d-control",
	)

	with gr.Row():
	reset_btn = gr.Button("🔄 Reset", size="sm")
	run_btn = gr.Button("🚀 Generate", variant="primary", size="lg")

	with gr.Column(scale=1):
	result = gr.Image(label="Output Image", interactive=False, height=350)

	gr.Markdown("### 🎚️ Slider Controls")

	rotate_deg = gr.Slider(
	label="Horizontal Rotation (°)",
	minimum=-180,
	maximum=180,
	step=45,
	value=0,
	info="-180/180: back, -90: left, 0: front, 90: right",
	)
	zoom = gr.Slider(
	label="Zoom Level",
	minimum=0,
	maximum=10,
	step=1,
	value=5.0,
	info="0-3.33: wide, 3.33-6.66: medium, 6.66-10: close-up",
	)
	vertical_tilt = gr.Slider(
	label="Vertical Tilt",
	minimum=-1,
	maximum=1,
	step=0.5,
	value=0,
	info="-1: low-angle, 0: eye-level, 1: high-angle",
	)

	prompt_preview = gr.Textbox(label="Generated Prompt", interactive=False)

	with gr.Accordion("📋 Structured Caption (BRIA API)", open=False):
	structured_json = gr.JSON(label="JSON Response", container=False)

	with gr.Accordion("⚙️ Advanced Settings", open=False):
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=DEFAULT_SEED,
	)
	randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=1.0,
	maximum=10.0,
	step=0.1,
	value=DEFAULT_GUIDANCE_SCALE,
	)
	num_inference_steps = gr.Slider(
	label="Inference Steps",
	minimum=1,
	maximum=100,
	step=1,
	value=DEFAULT_NUM_INFERENCE_STEPS,
	)
	height = gr.Slider(
	label="Height", minimum=256, maximum=2048, step=8, value=1024
	)
	width = gr.Slider(
	label="Width", minimum=256, maximum=2048, step=8, value=1024
	)

	# --- Helper Functions ---
	def update_prompt_from_sliders(rotate, zoom_val, tilt):
	prompt, _ = build_camera_prompt(rotate, zoom_val, tilt)
	return prompt

	def sync_3d_to_sliders(camera_value):
	if camera_value and isinstance(camera_value, dict):
	rot = camera_value.get("rotate_deg", 0)
	zoom_val = camera_value.get("zoom", 5.0)
	tilt = camera_value.get("vertical_tilt", 0)
	prompt, _ = build_camera_prompt(rot, zoom_val, tilt)
	return rot, zoom_val, tilt, prompt
	return gr.update(), gr.update(), gr.update(), gr.update()

	def sync_sliders_to_3d(rotate, zoom_val, tilt):
	return {"rotate_deg": rotate, "zoom": zoom_val, "vertical_tilt": tilt}

	def update_3d_image(img):
	if img is None:
	return gr.update(imageUrl=None)
	buffered = BytesIO()
	img.save(buffered, format="PNG")
	img_str = base64.b64encode(buffered.getvalue()).decode()
	data_url = f"data:image/png;base64,{img_str}"
	return gr.update(imageUrl=data_url)

	# --- Event Handlers ---

	# Slider -> Prompt preview
	for slider in [rotate_deg, zoom, vertical_tilt]:
	slider.change(
	fn=update_prompt_from_sliders,
	inputs=[rotate_deg, zoom, vertical_tilt],
	outputs=[prompt_preview],
	)

	# 3D control -> Sliders + Prompt (no auto-inference)
	camera_3d.change(
	fn=sync_3d_to_sliders,
	inputs=[camera_3d],
	outputs=[rotate_deg, zoom, vertical_tilt, prompt_preview],
	)

	# Sliders -> 3D control (no auto-inference)
	for slider in [rotate_deg, zoom, vertical_tilt]:
	slider.release(
	fn=sync_sliders_to_3d,
	inputs=[rotate_deg, zoom, vertical_tilt],
	outputs=[camera_3d],
	)

	# Reset
	reset_btn.click(
	fn=reset_all,
	inputs=None,
	outputs=[rotate_deg, zoom, vertical_tilt, is_reset],
	queue=False,
	).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False).then(
	fn=sync_sliders_to_3d,
	inputs=[rotate_deg, zoom, vertical_tilt],
	outputs=[camera_3d],
	)

	# Generate button - Two-stage process
	# Stage 1: Fetch structured caption from BRIA API and display it immediately
	run_event = run_btn.click(
	fn=fetch_structured_caption,
	inputs=[
	image,
	rotate_deg,
	zoom,
	vertical_tilt,
	seed,
	randomize_seed,
	prev_output,
	],
	outputs=[seed, prompt_preview, structured_json, processed_image],
	).then(
	# Stage 2: Generate image with Fibo Edit pipeline
	fn=generate_image_from_caption,
	inputs=[
	processed_image,
	structured_json,
	seed,
	guidance_scale,
	num_inference_steps,
	],
	outputs=[result],
	)

	# Image upload
	image.upload(
	fn=update_dimensions_on_upload, inputs=[image], outputs=[width, height]
	).then(
	fn=reset_all,
	inputs=None,
	outputs=[rotate_deg, zoom, vertical_tilt, is_reset],
	queue=False,
	).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False).then(
	fn=update_3d_image, inputs=[image], outputs=[camera_3d]
	)

	image.clear(fn=lambda: gr.update(imageUrl=None), outputs=[camera_3d])

	run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])

	# Examples - Commenting out for now since we need actual example images
	# Note: With the two-stage inference process, examples would need custom handling
	# to properly chain fetch_structured_caption -> generate_image_from_caption

	# Sync 3D component when sliders change (covers example loading)
	def sync_3d_on_slider_change(img, rot, zoom_val, tilt):
	camera_value = {"rotate_deg": rot, "zoom": zoom_val, "vertical_tilt": tilt}
	if img is not None:
	buffered = BytesIO()
	img.save(buffered, format="PNG")
	img_str = base64.b64encode(buffered.getvalue()).decode()
	data_url = f"data:image/png;base64,{img_str}"
	return gr.update(value=camera_value, imageUrl=data_url)
	return gr.update(value=camera_value)

	# When any slider value changes (including from examples), sync the 3D component
	for slider in [rotate_deg, zoom, vertical_tilt]:
	slider.change(
	fn=sync_3d_on_slider_change,
	inputs=[image, rotate_deg, zoom, vertical_tilt],
	outputs=[camera_3d],
	)

	# API endpoints for the two-stage inference process
	gr.api(fetch_structured_caption, api_name="fetch_caption")
	gr.api(generate_image_from_caption, api_name="generate_image")

	if __name__ == "__main__":
	head = '<script src="https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js"></script>'

	if RUN_LOCAL:
	# Local development configuration
	demo.launch(
	mcp_server=True,
	head=head,
	footer_links=["api", "gradio", "settings"],
	server_name="0.0.0.0",
	server_port=8081,
	css=css,
	)
	else:
	# HuggingFace Spaces standard configuration
	# demo.launch(head=head, debug=True, show_error=True, css=css)
	demo.launch(head=head, css=css)