mokady's picture
Update app.py
b87f5d5 verified
import base64
import json
import os
import random
import time
from io import BytesIO
from typing import Optional, Tuple
import gradio as gr
import numpy as np
import requests
import spaces
import torch
from PIL import Image
from fibo_edit_pipeline import BriaFiboEditPipeline
from utils import AngleInstruction
# --- Configuration ---
device = "cuda" if torch.cuda.is_available() else "cpu"
# Run locally or on HuggingFace Spaces
RUN_LOCAL = False
# Model paths
BASE_CHECKPOINT = "briaai/FIBO-Edit" # HuggingFace model ID
LORA_CHECKPOINT = "briaai/fibo_edit_multi_angle_full_0121_full_1k" # HuggingFace LoRA model ID
# BRIA API configuration
BRIA_API_URL = "https://engine.prod.bria-api.com/v2/structured_prompt/generate/pro"
BRIA_API_TOKEN = os.environ.get("BRIA_API_TOKEN")
if not BRIA_API_TOKEN:
raise ValueError(
"BRIA_API_TOKEN environment variable is not set. "
"Please add it as a HuggingFace Space secret."
)
# Generation defaults
DEFAULT_NUM_INFERENCE_STEPS = 50
DEFAULT_GUIDANCE_SCALE = 3.5
DEFAULT_SEED = 100050
MAX_SEED = np.iinfo(np.int32).max
print("🚀 Starting Fibo Edit Multi-Angle LoRA Gradio App")
print(f"Device: {device}")
print(f"Base checkpoint: {BASE_CHECKPOINT}")
print(f"LoRA checkpoint: {LORA_CHECKPOINT}")
# --- Helper Functions ---
def load_pipeline_fiboedit(
checkpoint: str,
lora_checkpoint: Optional[str] = None,
lora_scale: Optional[float] = None,
fuse_lora: bool = True,
):
"""
Load the Fibo Edit pipeline using BriaFiboEditPipeline with optional LoRA weights.
Args:
checkpoint: HuggingFace model ID for base model
lora_checkpoint: Optional HuggingFace model ID for LoRA weights
lora_scale: Scale for LoRA weights when fusing (default None = 1.0)
fuse_lora: Whether to fuse LoRA into base weights (default True)
Returns:
Loaded BriaFiboEditPipeline
"""
print(f"Loading BriaFiboEditPipeline from {checkpoint}")
if lora_checkpoint:
print(f" with LoRA from {lora_checkpoint}")
# Load pipeline from HuggingFace
print("Loading pipeline...")
pipe = BriaFiboEditPipeline.from_pretrained(
checkpoint,
torch_dtype=torch.bfloat16,
)
pipe.to("cuda")
print(f" Pipeline loaded from {checkpoint}")
# Load LoRA weights if provided (PEFT format)
if lora_checkpoint:
print(f"Loading PEFT LoRA from {lora_checkpoint}...")
from peft import PeftModel
print(" Loading PEFT adapter onto transformer...")
pipe.transformer = PeftModel.from_pretrained(
pipe.transformer,
lora_checkpoint,
)
print(" PEFT adapter loaded successfully")
if fuse_lora:
print(" Merging LoRA into base weights...")
if hasattr(pipe.transformer, "merge_and_unload"):
pipe.transformer = pipe.transformer.merge_and_unload()
print(" LoRA merged and unloaded")
else:
print(" [WARN] transformer.merge_and_unload() not available")
print("✅ Pipeline loaded successfully!")
return pipe
def generate_structured_caption(
image: Image.Image, prompt: str, seed: int = 1
) -> Optional[dict]:
"""Generate structured caption using BRIA API."""
buffered = BytesIO()
image.save(buffered, format="PNG")
image_bytes = base64.b64encode(buffered.getvalue()).decode("utf-8")
payload = {
"seed": seed,
"sync": True,
"images": [image_bytes],
"prompt": prompt,
}
headers = {
"Content-Type": "application/json",
"api_token": BRIA_API_TOKEN,
}
max_retries = 3
for attempt in range(max_retries):
try:
response = requests.post(
BRIA_API_URL, json=payload, headers=headers, timeout=60
)
response.raise_for_status()
data = response.json()
structured_prompt_str = data["result"]["structured_prompt"]
return json.loads(structured_prompt_str)
except Exception as e:
if attempt == max_retries - 1:
print(f"Failed to generate structured caption: {e}")
return None
time.sleep(3)
return None
# --- Model Loading ---
print("Loading Fibo Edit pipeline...")
try:
pipe = load_pipeline_fiboedit(
checkpoint=BASE_CHECKPOINT,
lora_checkpoint=LORA_CHECKPOINT,
lora_scale=None,
fuse_lora=True,
)
if torch.cuda.is_available():
mem_allocated = torch.cuda.memory_allocated(0) / 1024**3
print(f" GPU memory allocated: {mem_allocated:.2f} GB")
except Exception as e:
print(f"❌ Error loading pipeline: {e}")
import traceback
traceback.print_exc()
raise
def build_camera_prompt(
rotate_deg: float = 0.0, zoom: float = 0.0, vertical_tilt: float = 0.0
) -> str:
"""Build a natural language camera instruction from parameters."""
# Create AngleInstruction from camera parameters
angle_instruction = AngleInstruction.from_camera_params(
rotation=rotate_deg, tilt=vertical_tilt, zoom=zoom
)
# Generate natural language description
view_map = {
"back view": "view from the opposite side",
"back-left quarter view": "rotate 135 degrees left",
"back-right quarter view": "rotate 135 degrees right",
"front view": "keep the front view",
"front-left quarter view": "rotate 45 degrees left",
"front-right quarter view": "rotate 45 degrees right",
"left side view": "rotate 90 degrees left",
"right side view": "rotate 90 degrees right",
}
shot_map = {
"elevated shot": "with an elevated viewing angle",
"eye-level shot": "with an eye-level viewing angle",
"high-angle shot": "with a high-angle viewing angle",
"low-angle shot": "with a low-angle viewing angle",
}
zoom_map = {
"close-up": "and make it a close-up shot",
"medium shot": "", # Omit medium shot
"wide shot": "and make it a wide shot",
}
view_text = view_map[angle_instruction.view.value]
shot_text = shot_map[angle_instruction.shot.value]
zoom_text = zoom_map[angle_instruction.zoom.value]
# Construct the natural language prompt starting with "Change the viewing angle"
parts = [view_text, shot_text]
if zoom_text: # Only add zoom if not empty (medium shot is omitted)
parts.append(zoom_text)
natural_prompt = "Change the viewing angle: " + ", ".join(parts)
return natural_prompt, angle_instruction
def fetch_structured_caption(
image: Optional[Image.Image] = None,
rotate_deg: float = 0.0,
zoom: float = 0.0,
vertical_tilt: float = 0.0,
seed: int = 0,
randomize_seed: bool = True,
prev_output: Optional[Image.Image] = None,
) -> Tuple[int, str, dict, Image.Image]:
"""Fetch structured caption from BRIA API."""
# Build natural language prompt and angle instruction
natural_prompt, angle_instruction = build_camera_prompt(
rotate_deg, zoom, vertical_tilt
)
print(f"Natural Language Prompt: {natural_prompt}")
print(f"Angle Instruction: {str(angle_instruction)}")
if randomize_seed:
seed = random.randint(0, MAX_SEED)
# Get input image
if image is not None:
if isinstance(image, Image.Image):
input_image = image.convert("RGB")
elif hasattr(image, "name"):
input_image = Image.open(image.name).convert("RGB")
else:
input_image = image
elif prev_output:
input_image = prev_output.convert("RGB")
else:
raise gr.Error("Please upload an image first.")
# Generate structured caption using BRIA API
print("Generating structured caption from BRIA API...")
structured_caption = generate_structured_caption(
input_image, natural_prompt, seed=seed
)
if structured_caption is None:
raise gr.Error("Failed to generate structured caption from BRIA API")
# Replace edit_instruction with angle instruction string
structured_caption["edit_instruction"] = str(angle_instruction)
print(
f"Structured caption received: {json.dumps(structured_caption, ensure_ascii=False)}"
)
return seed, natural_prompt, structured_caption, input_image
@spaces.GPU(duration=240)
def generate_image_from_caption(
input_image: Image.Image,
structured_caption: dict,
seed: int,
guidance_scale: float = 3.5,
num_inference_steps: int = 50,
) -> Image.Image:
"""Generate image using Fibo Edit pipeline with structured caption."""
structured_prompt = json.dumps(structured_caption, ensure_ascii=False)
print("Generating image with structured prompt...")
generator = torch.Generator(device=device).manual_seed(seed)
result = pipe(
image=input_image,
prompt=structured_prompt,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
generator=generator,
num_images_per_prompt=1,
).images[0]
return result
# --- 3D Camera Control Component ---
# Using gr.HTML directly with templates (Gradio 6 style)
CAMERA_3D_HTML_TEMPLATE = """
<div id="camera-control-wrapper" style="width: 100%; height: 400px; position: relative; background: #1a1a1a; border-radius: 12px; overflow: hidden;">
<div id="prompt-overlay" style="position: absolute; bottom: 10px; left: 50%; transform: translateX(-50%); background: rgba(0,0,0,0.8); padding: 8px 16px; border-radius: 8px; font-family: monospace; font-size: 11px; color: #00ff88; white-space: nowrap; z-index: 10; max-width: 90%; overflow: hidden; text-overflow: ellipsis;"></div>
<div id="control-legend" style="position: absolute; top: 10px; left: 10px; background: rgba(0,0,0,0.7); padding: 8px 12px; border-radius: 8px; font-family: system-ui; font-size: 11px; color: #fff; z-index: 10;">
<div style="margin-bottom: 4px;"><span style="color: #00ff88;">●</span> Rotation (↔)</div>
<div style="margin-bottom: 4px;"><span style="color: #ff69b4;">●</span> Vertical Tilt (↕)</div>
<div><span style="color: #ffa500;">●</span> Distance/Zoom</div>
</div>
</div>
"""
CAMERA_3D_JS = """
(() => {
const wrapper = element.querySelector('#camera-control-wrapper');
const promptOverlay = element.querySelector('#prompt-overlay');
const initScene = () => {
if (typeof THREE === 'undefined') {
setTimeout(initScene, 100);
return;
}
const scene = new THREE.Scene();
scene.background = new THREE.Color(0x1a1a1a);
const camera = new THREE.PerspectiveCamera(50, wrapper.clientWidth / wrapper.clientHeight, 0.1, 1000);
camera.position.set(4, 3, 4);
camera.lookAt(0, 0.75, 0);
const renderer = new THREE.WebGLRenderer({ antialias: true });
renderer.setSize(wrapper.clientWidth, wrapper.clientHeight);
renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
wrapper.insertBefore(renderer.domElement, wrapper.firstChild);
scene.add(new THREE.AmbientLight(0xffffff, 0.6));
const dirLight = new THREE.DirectionalLight(0xffffff, 0.6);
dirLight.position.set(5, 10, 5);
scene.add(dirLight);
scene.add(new THREE.GridHelper(6, 12, 0x333333, 0x222222));
const CENTER = new THREE.Vector3(0, 0.75, 0);
const BASE_DISTANCE = 2.0;
const ROTATION_RADIUS = 2.2;
const TILT_RADIUS = 1.6;
let rotateDeg = props.value?.rotate_deg || 0;
let zoom = props.value?.zoom || 5.0;
let verticalTilt = props.value?.vertical_tilt || 0;
const rotateSteps = [-180, -135, -90, -45, 0, 45, 90, 135, 180];
const zoomSteps = [0, 5, 10];
const tiltSteps = [-1, -0.5, 0, 0.5, 1];
function snapToNearest(value, steps) {
return steps.reduce((prev, curr) => Math.abs(curr - value) < Math.abs(prev - value) ? curr : prev);
}
function createPlaceholderTexture() {
const canvas = document.createElement('canvas');
canvas.width = 256;
canvas.height = 256;
const ctx = canvas.getContext('2d');
ctx.fillStyle = '#3a3a4a';
ctx.fillRect(0, 0, 256, 256);
ctx.fillStyle = '#ffcc99';
ctx.beginPath();
ctx.arc(128, 128, 80, 0, Math.PI * 2);
ctx.fill();
ctx.fillStyle = '#333';
ctx.beginPath();
ctx.arc(100, 110, 10, 0, Math.PI * 2);
ctx.arc(156, 110, 10, 0, Math.PI * 2);
ctx.fill();
ctx.strokeStyle = '#333';
ctx.lineWidth = 3;
ctx.beginPath();
ctx.arc(128, 130, 35, 0.2, Math.PI - 0.2);
ctx.stroke();
return new THREE.CanvasTexture(canvas);
}
let currentTexture = createPlaceholderTexture();
const planeMaterial = new THREE.MeshBasicMaterial({ map: currentTexture, side: THREE.DoubleSide });
let targetPlane = new THREE.Mesh(new THREE.PlaneGeometry(1.2, 1.2), planeMaterial);
targetPlane.position.copy(CENTER);
scene.add(targetPlane);
function updateTextureFromUrl(url) {
if (!url) {
planeMaterial.map = createPlaceholderTexture();
planeMaterial.needsUpdate = true;
scene.remove(targetPlane);
targetPlane = new THREE.Mesh(new THREE.PlaneGeometry(1.2, 1.2), planeMaterial);
targetPlane.position.copy(CENTER);
scene.add(targetPlane);
return;
}
const loader = new THREE.TextureLoader();
loader.crossOrigin = 'anonymous';
loader.load(url, (texture) => {
texture.minFilter = THREE.LinearFilter;
texture.magFilter = THREE.LinearFilter;
planeMaterial.map = texture;
planeMaterial.needsUpdate = true;
const img = texture.image;
if (img && img.width && img.height) {
const aspect = img.width / img.height;
const maxSize = 1.4;
let planeWidth, planeHeight;
if (aspect > 1) {
planeWidth = maxSize;
planeHeight = maxSize / aspect;
} else {
planeHeight = maxSize;
planeWidth = maxSize * aspect;
}
scene.remove(targetPlane);
targetPlane = new THREE.Mesh(new THREE.PlaneGeometry(planeWidth, planeHeight), planeMaterial);
targetPlane.position.copy(CENTER);
scene.add(targetPlane);
}
});
}
if (props.imageUrl) {
updateTextureFromUrl(props.imageUrl);
}
const cameraGroup = new THREE.Group();
const bodyMat = new THREE.MeshStandardMaterial({ color: 0x6699cc, metalness: 0.5, roughness: 0.3 });
const body = new THREE.Mesh(new THREE.BoxGeometry(0.28, 0.2, 0.35), bodyMat);
cameraGroup.add(body);
const lens = new THREE.Mesh(
new THREE.CylinderGeometry(0.08, 0.1, 0.16, 16),
new THREE.MeshStandardMaterial({ color: 0x6699cc, metalness: 0.5, roughness: 0.3 })
);
lens.rotation.x = Math.PI / 2;
lens.position.z = 0.24;
cameraGroup.add(lens);
scene.add(cameraGroup);
const rotationArcPoints = [];
for (let i = 0; i <= 64; i++) {
const angle = THREE.MathUtils.degToRad((360 * i / 64));
rotationArcPoints.push(new THREE.Vector3(ROTATION_RADIUS * Math.sin(angle), 0.05, ROTATION_RADIUS * Math.cos(angle)));
}
const rotationCurve = new THREE.CatmullRomCurve3(rotationArcPoints);
const rotationArc = new THREE.Mesh(
new THREE.TubeGeometry(rotationCurve, 64, 0.035, 8, true),
new THREE.MeshStandardMaterial({ color: 0x00ff88, emissive: 0x00ff88, emissiveIntensity: 0.3 })
);
scene.add(rotationArc);
const rotationHandle = new THREE.Mesh(
new THREE.SphereGeometry(0.16, 16, 16),
new THREE.MeshStandardMaterial({ color: 0x00ff88, emissive: 0x00ff88, emissiveIntensity: 0.5 })
);
rotationHandle.userData.type = 'rotation';
scene.add(rotationHandle);
const tiltArcPoints = [];
for (let i = 0; i <= 32; i++) {
const angle = THREE.MathUtils.degToRad(-45 + (90 * i / 32));
tiltArcPoints.push(new THREE.Vector3(-0.7, TILT_RADIUS * Math.sin(angle) + CENTER.y, TILT_RADIUS * Math.cos(angle)));
}
const tiltCurve = new THREE.CatmullRomCurve3(tiltArcPoints);
const tiltArc = new THREE.Mesh(
new THREE.TubeGeometry(tiltCurve, 32, 0.035, 8, false),
new THREE.MeshStandardMaterial({ color: 0xff69b4, emissive: 0xff69b4, emissiveIntensity: 0.3 })
);
scene.add(tiltArc);
const tiltHandle = new THREE.Mesh(
new THREE.SphereGeometry(0.16, 16, 16),
new THREE.MeshStandardMaterial({ color: 0xff69b4, emissive: 0xff69b4, emissiveIntensity: 0.5 })
);
tiltHandle.userData.type = 'tilt';
scene.add(tiltHandle);
const distanceLineGeo = new THREE.BufferGeometry();
const distanceLine = new THREE.Line(distanceLineGeo, new THREE.LineBasicMaterial({ color: 0xffa500 }));
scene.add(distanceLine);
const distanceHandle = new THREE.Mesh(
new THREE.SphereGeometry(0.16, 16, 16),
new THREE.MeshStandardMaterial({ color: 0xffa500, emissive: 0xffa500, emissiveIntensity: 0.5 })
);
distanceHandle.userData.type = 'distance';
scene.add(distanceHandle);
function buildPromptText(rot, zoomVal, tilt) {
const parts = [];
if (rot !== 0) {
const dir = rot > 0 ? 'right' : 'left';
parts.push('Rotate ' + Math.abs(rot) + '° ' + dir);
}
if (zoomVal >= 6.66) parts.push('Close-up');
else if (zoomVal >= 3.33) parts.push('Medium shot');
else parts.push('Wide angle');
if (tilt >= 0.66) parts.push("High angle");
else if (tilt >= 0.33) parts.push("Elevated");
else if (tilt <= -0.33) parts.push("Low angle");
else parts.push("Eye level");
return parts.length > 0 ? parts.join(' • ') : 'No camera movement';
}
function updatePositions() {
const rotRad = THREE.MathUtils.degToRad(rotateDeg);
// Map zoom 0-10 to distance: zoom 0 = far (3.0), zoom 10 = close (1.0)
const distance = 3.0 - (zoom / 10) * 2.0;
const tiltAngle = verticalTilt * 35;
const tiltRad = THREE.MathUtils.degToRad(tiltAngle);
const camX = distance * Math.sin(rotRad) * Math.cos(tiltRad);
const camY = distance * Math.sin(tiltRad) + CENTER.y;
const camZ = distance * Math.cos(rotRad) * Math.cos(tiltRad);
cameraGroup.position.set(camX, camY, camZ);
cameraGroup.lookAt(CENTER);
rotationHandle.position.set(ROTATION_RADIUS * Math.sin(rotRad), 0.05, ROTATION_RADIUS * Math.cos(rotRad));
const tiltHandleAngle = THREE.MathUtils.degToRad(tiltAngle);
tiltHandle.position.set(-0.7, TILT_RADIUS * Math.sin(tiltHandleAngle) + CENTER.y, TILT_RADIUS * Math.cos(tiltHandleAngle));
const handleDist = distance - 0.4;
distanceHandle.position.set(
handleDist * Math.sin(rotRad) * Math.cos(tiltRad),
handleDist * Math.sin(tiltRad) + CENTER.y,
handleDist * Math.cos(rotRad) * Math.cos(tiltRad)
);
distanceLineGeo.setFromPoints([cameraGroup.position.clone(), CENTER.clone()]);
promptOverlay.textContent = buildPromptText(rotateDeg, zoom, verticalTilt);
}
function updatePropsAndTrigger() {
const rotSnap = snapToNearest(rotateDeg, rotateSteps);
const zoomSnap = snapToNearest(zoom, zoomSteps);
const tiltSnap = snapToNearest(verticalTilt, tiltSteps);
props.value = { rotate_deg: rotSnap, zoom: zoomSnap, vertical_tilt: tiltSnap };
trigger('change', props.value);
}
const raycaster = new THREE.Raycaster();
const mouse = new THREE.Vector2();
let isDragging = false;
let dragTarget = null;
let dragStartMouse = new THREE.Vector2();
let dragStartZoom = 0;
const intersection = new THREE.Vector3();
const canvas = renderer.domElement;
canvas.addEventListener('mousedown', (e) => {
const rect = canvas.getBoundingClientRect();
mouse.x = ((e.clientX - rect.left) / rect.width) * 2 - 1;
mouse.y = -((e.clientY - rect.top) / rect.height) * 2 + 1;
raycaster.setFromCamera(mouse, camera);
const intersects = raycaster.intersectObjects([rotationHandle, tiltHandle, distanceHandle]);
if (intersects.length > 0) {
isDragging = true;
dragTarget = intersects[0].object;
dragTarget.material.emissiveIntensity = 1.0;
dragTarget.scale.setScalar(1.3);
dragStartMouse.copy(mouse);
dragStartZoom = zoom;
canvas.style.cursor = 'grabbing';
}
});
canvas.addEventListener('mousemove', (e) => {
const rect = canvas.getBoundingClientRect();
mouse.x = ((e.clientX - rect.left) / rect.width) * 2 - 1;
mouse.y = -((e.clientY - rect.top) / rect.height) * 2 + 1;
if (isDragging && dragTarget) {
raycaster.setFromCamera(mouse, camera);
if (dragTarget.userData.type === 'rotation') {
const plane = new THREE.Plane(new THREE.Vector3(0, 1, 0), -0.05);
if (raycaster.ray.intersectPlane(plane, intersection)) {
let angle = THREE.MathUtils.radToDeg(Math.atan2(intersection.x, intersection.z));
rotateDeg = THREE.MathUtils.clamp(angle, -180, 180);
}
} else if (dragTarget.userData.type === 'tilt') {
const plane = new THREE.Plane(new THREE.Vector3(1, 0, 0), 0.7);
if (raycaster.ray.intersectPlane(plane, intersection)) {
const relY = intersection.y - CENTER.y;
const relZ = intersection.z;
const angle = THREE.MathUtils.radToDeg(Math.atan2(relY, relZ));
verticalTilt = THREE.MathUtils.clamp(angle / 35, -1, 1);
}
} else if (dragTarget.userData.type === 'distance') {
const deltaY = mouse.y - dragStartMouse.y;
zoom = THREE.MathUtils.clamp(dragStartZoom + deltaY * 20, 0, 10);
}
updatePositions();
} else {
raycaster.setFromCamera(mouse, camera);
const intersects = raycaster.intersectObjects([rotationHandle, tiltHandle, distanceHandle]);
[rotationHandle, tiltHandle, distanceHandle].forEach(h => {
h.material.emissiveIntensity = 0.5;
h.scale.setScalar(1);
});
if (intersects.length > 0) {
intersects[0].object.material.emissiveIntensity = 0.8;
intersects[0].object.scale.setScalar(1.1);
canvas.style.cursor = 'grab';
} else {
canvas.style.cursor = 'default';
}
}
});
const onMouseUp = () => {
if (dragTarget) {
dragTarget.material.emissiveIntensity = 0.5;
dragTarget.scale.setScalar(1);
const targetRot = snapToNearest(rotateDeg, rotateSteps);
const targetZoom = snapToNearest(zoom, zoomSteps);
const targetTilt = snapToNearest(verticalTilt, tiltSteps);
const startRot = rotateDeg, startZoom = zoom, startTilt = verticalTilt;
const startTime = Date.now();
function animateSnap() {
const t = Math.min((Date.now() - startTime) / 200, 1);
const ease = 1 - Math.pow(1 - t, 3);
rotateDeg = startRot + (targetRot - startRot) * ease;
zoom = startZoom + (targetZoom - startZoom) * ease;
verticalTilt = startTilt + (targetTilt - startTilt) * ease;
updatePositions();
if (t < 1) requestAnimationFrame(animateSnap);
else updatePropsAndTrigger();
}
animateSnap();
}
isDragging = false;
dragTarget = null;
canvas.style.cursor = 'default';
};
canvas.addEventListener('mouseup', onMouseUp);
canvas.addEventListener('mouseleave', onMouseUp);
canvas.addEventListener('touchstart', (e) => {
e.preventDefault();
const touch = e.touches[0];
const rect = canvas.getBoundingClientRect();
mouse.x = ((touch.clientX - rect.left) / rect.width) * 2 - 1;
mouse.y = -((touch.clientY - rect.top) / rect.height) * 2 + 1;
raycaster.setFromCamera(mouse, camera);
const intersects = raycaster.intersectObjects([rotationHandle, tiltHandle, distanceHandle]);
if (intersects.length > 0) {
isDragging = true;
dragTarget = intersects[0].object;
dragTarget.material.emissiveIntensity = 1.0;
dragTarget.scale.setScalar(1.3);
dragStartMouse.copy(mouse);
dragStartZoom = zoom;
}
}, { passive: false });
canvas.addEventListener('touchmove', (e) => {
e.preventDefault();
const touch = e.touches[0];
const rect = canvas.getBoundingClientRect();
mouse.x = ((touch.clientX - rect.left) / rect.width) * 2 - 1;
mouse.y = -((touch.clientY - rect.top) / rect.height) * 2 + 1;
if (isDragging && dragTarget) {
raycaster.setFromCamera(mouse, camera);
if (dragTarget.userData.type === 'rotation') {
const plane = new THREE.Plane(new THREE.Vector3(0, 1, 0), -0.05);
if (raycaster.ray.intersectPlane(plane, intersection)) {
let angle = THREE.MathUtils.radToDeg(Math.atan2(intersection.x, intersection.z));
rotateDeg = THREE.MathUtils.clamp(angle, -180, 180);
}
} else if (dragTarget.userData.type === 'tilt') {
const plane = new THREE.Plane(new THREE.Vector3(1, 0, 0), 0.7);
if (raycaster.ray.intersectPlane(plane, intersection)) {
const relY = intersection.y - CENTER.y;
const relZ = intersection.z;
const angle = THREE.MathUtils.radToDeg(Math.atan2(relY, relZ));
verticalTilt = THREE.MathUtils.clamp(angle / 35, -1, 1);
}
} else if (dragTarget.userData.type === 'distance') {
const deltaY = mouse.y - dragStartMouse.y;
zoom = THREE.MathUtils.clamp(dragStartZoom + deltaY * 20, 0, 10);
}
updatePositions();
}
}, { passive: false });
canvas.addEventListener('touchend', (e) => { e.preventDefault(); onMouseUp(); }, { passive: false });
canvas.addEventListener('touchcancel', (e) => { e.preventDefault(); onMouseUp(); }, { passive: false });
updatePositions();
function render() {
requestAnimationFrame(render);
renderer.render(scene, camera);
}
render();
new ResizeObserver(() => {
camera.aspect = wrapper.clientWidth / wrapper.clientHeight;
camera.updateProjectionMatrix();
renderer.setSize(wrapper.clientWidth, wrapper.clientHeight);
}).observe(wrapper);
wrapper._updateTexture = updateTextureFromUrl;
let lastImageUrl = props.imageUrl;
let lastValue = JSON.stringify(props.value);
setInterval(() => {
if (props.imageUrl !== lastImageUrl) {
lastImageUrl = props.imageUrl;
updateTextureFromUrl(props.imageUrl);
}
const currentValue = JSON.stringify(props.value);
if (currentValue !== lastValue) {
lastValue = currentValue;
if (props.value && typeof props.value === 'object') {
rotateDeg = props.value.rotate_deg ?? rotateDeg;
zoom = props.value.zoom ?? zoom;
verticalTilt = props.value.vertical_tilt ?? verticalTilt;
updatePositions();
}
}
}, 100);
};
initScene();
})();
"""
def create_camera_3d_component(value=None, imageUrl=None, **kwargs):
"""Create a 3D camera control component using gr.HTML."""
if value is None:
value = {"rotate_deg": 0, "zoom": 5.0, "vertical_tilt": 0}
return gr.HTML(
value=value,
html_template=CAMERA_3D_HTML_TEMPLATE,
js_on_load=CAMERA_3D_JS,
imageUrl=imageUrl,
**kwargs,
)
# --- UI ---
css = """
:root {
--name: citrus;
--primary-50: #fffbeb;
--primary-100: #fef3c7;
--primary-200: #fde68a;
--primary-300: #fcd34d;
--primary-400: #fbbf24;
--primary-500: #f59e0b;
--primary-600: #d97706;
--primary-700: #b45309;
--primary-800: #92400e;
--primary-900: #78350f;
--primary-950: #6c370f;
--secondary-50: #fffbeb;
--secondary-100: #fef3c7;
--secondary-200: #fde68a;
--secondary-300: #fcd34d;
--secondary-400: #fbbf24;
--secondary-500: #f59e0b;
--secondary-600: #d97706;
--secondary-700: #b45309;
--secondary-800: #92400e;
--secondary-900: #78350f;
--secondary-950: #6c370f;
--neutral-50: #fafaf9;
--neutral-100: #f5f5f4;
--neutral-200: #e7e5e4;
--neutral-300: #d6d3d1;
--neutral-400: #a8a29e;
--neutral-500: #78716c;
--neutral-600: #57534e;
--neutral-700: #44403c;
--neutral-800: #292524;
--neutral-900: #1c1917;
--neutral-950: #0f0e0d;
--spacing-xxs: 2px;
--spacing-xs: 4px;
--spacing-sm: 6px;
--spacing-md: 8px;
--spacing-lg: 10px;
--spacing-xl: 14px;
--spacing-xxl: 28px;
--radius-xxs: 1px;
--radius-xs: 2px;
--radius-sm: 4px;
--radius-md: 6px;
--radius-lg: 8px;
--radius-xl: 12px;
--radius-xxl: 22px;
--text-xxs: 9px;
--text-xs: 10px;
--text-sm: 12px;
--text-md: 14px;
--text-lg: 16px;
--text-xl: 22px;
--text-xxl: 26px;
--font: 'Ubuntu', ui-sans-serif, system-ui, sans-serif;
--font-mono: 'Roboto Mono', ui-monospace, Consolas, monospace;
--body-background-fill: var(--background-fill-primary);
--body-text-color: var(--neutral-800);
--body-text-size: var(--text-md);
--body-text-weight: 400;
--embed-radius: var(--radius-sm);
--color-accent: var(--primary-500);
--color-accent-soft: var(--primary-50);
--background-fill-primary: var(--neutral-50);
--background-fill-secondary: var(--neutral-50);
--border-color-accent: var(--primary-300);
--border-color-primary: var(--neutral-200);
--link-text-color: var(--secondary-600);
--link-text-color-active: var(--secondary-600);
--link-text-color-hover: var(--secondary-700);
--link-text-color-visited: var(--secondary-500);
--body-text-color-subdued: var(--neutral-400);
--accordion-text-color: var(--body-text-color);
--table-text-color: var(--body-text-color);
--shadow-drop: rgba(0,0,0,0.05) 0px 1px 2px 0px;
--shadow-drop-lg: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
--shadow-inset: rgba(0,0,0,0.05) 0px 2px 4px 0px inset;
--shadow-spread: 3px;
--block-background-fill: var(--neutral-100);
--block-border-color: var(--neutral-300);
--block-border-width: 1px;
--block-info-text-color: var(--body-text-color-subdued);
--block-info-text-size: var(--text-sm);
--block-info-text-weight: 400;
--block-label-background-fill: var(--background-fill-primary);
--block-label-border-color: var(--border-color-primary);
--block-label-border-width: 1px;
--block-label-shadow: none;
--block-label-text-color: var(--neutral-500);
--block-label-margin: 0;
--block-label-padding: var(--spacing-sm) var(--spacing-lg);
--block-label-radius: calc(var(--radius-sm) - 1px) 0 calc(var(--radius-sm) - 1px) 0;
--block-label-right-radius: 0 calc(var(--radius-sm) - 1px) 0 calc(var(--radius-sm) - 1px);
--block-label-text-size: var(--text-sm);
--block-label-text-weight: 400;
--block-padding: var(--spacing-xl) calc(var(--spacing-xl) + 2px);
--block-radius: var(--radius-sm);
--block-shadow: 0px 3px 0px 0px var(--neutral-300);
--block-title-background-fill: none;
--block-title-border-color: none;
--block-title-border-width: 0px;
--block-title-text-color: var(--neutral-500);
--block-title-padding: 0;
--block-title-radius: none;
--block-title-text-size: var(--text-md);
--block-title-text-weight: 400;
--container-radius: var(--radius-sm);
--form-gap-width: 0px;
--layout-gap: var(--spacing-xxl);
--panel-background-fill: var(--background-fill-secondary);
--panel-border-color: var(--border-color-primary);
--panel-border-width: 1px;
--section-header-text-size: var(--text-md);
--section-header-text-weight: 400;
--border-color-accent-subdued: var(--border-color-accent);
--code-background-fill: var(--neutral-100);
--chatbot-text-size: var(--text-lg);
--checkbox-background-color: var(--background-fill-primary);
--checkbox-background-color-focus: var(--checkbox-background-color);
--checkbox-background-color-hover: var(--checkbox-background-color);
--checkbox-background-color-selected: var(--color-accent);
--checkbox-border-color: var(--neutral-300);
--checkbox-border-color-focus: var(--color-accent);
--checkbox-border-color-hover: var(--neutral-300);
--checkbox-border-color-selected: var(--color-accent);
--checkbox-border-radius: var(--radius-sm);
--checkbox-border-width: var(--input-border-width);
--checkbox-label-background-fill: var(--neutral-200);
--checkbox-label-background-fill-hover: var(--checkbox-label-background-fill);
--checkbox-label-background-fill-selected: var(--primary-400);
--checkbox-label-border-color: var(--border-color-primary);
--checkbox-label-border-color-hover: var(--checkbox-label-border-color);
--checkbox-label-border-color-selected: var(--primary-300);
--checkbox-label-border-width: 2px;
--checkbox-label-gap: var(--spacing-lg);
--checkbox-label-padding: var(--spacing-md) calc(2 * var(--spacing-md));
--checkbox-label-shadow: none;
--checkbox-label-text-size: var(--text-md);
--checkbox-label-text-weight: 400;
--checkbox-check: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3cpath d='M12.207 4.793a1 1 0 010 1.414l-5 5a1 1 0 01-1.414 0l-2-2a1 1 0 011.414-1.414L6.5 9.086l4.293-4.293a1 1 0 011.414 0z'/%3e%3c/svg%3e");
--radio-circle: url("data:image/svg+xml,%3csvg viewBox='0 0 16 16' fill='white' xmlns='http://www.w3.org/2000/svg'%3e%3ccircle cx='8' cy='8' r='3'/%3e%3c/svg%3e");
--checkbox-shadow: none;
--checkbox-label-text-color: var(--body-text-color);
--checkbox-label-text-color-selected: var(--checkbox-label-text-color);
--error-background-fill: #fef2f2;
--error-border-color: #b91c1c;
--error-border-width: 1px;
--error-text-color: #b91c1c;
--error-icon-color: #b91c1c;
--input-background-fill: var(--neutral-50);
--input-background-fill-focus: var(--primary-50);
--input-background-fill-hover: var(--input-background-fill);
--input-border-color: var(--border-color-primary);
--input-border-color-focus: var(--secondary-300);
--input-border-color-hover: var(--input-border-color);
--input-border-width: 1px;
--input-padding: var(--spacing-xl);
--input-placeholder-color: var(--neutral-400);
--input-radius: var(--radius-sm);
--input-shadow: 0px -1px 0px 0px var(--neutral-300);
--input-shadow-focus: 0px -1px 0px 0px var(--primary-300);
--input-text-size: var(--text-md);
--input-text-weight: 400;
--loader-color: var(--color-accent);
--prose-text-size: var(--text-md);
--prose-text-weight: 400;
--prose-header-text-weight: 600;
--slider-color: var(--primary-400);
--stat-background-fill: var(--primary-300);
--table-border-color: var(--neutral-300);
--table-even-background-fill: white;
--table-odd-background-fill: var(--neutral-50);
--table-radius: var(--radius-sm);
--table-row-focus: var(--color-accent-soft);
--button-border-width: 0px;
--button-cancel-background-fill: #ef4444;
--button-cancel-background-fill-hover: #dc2626;
--button-cancel-border-color: var(--button-secondary-border-color);
--button-cancel-border-color-hover: var(--button-secondary-border-color-hover);
--button-cancel-text-color: white;
--button-cancel-text-color-hover: white;
--button-cancel-shadow: 0px 3px 0px 0px rgb(248 113 113);
--button-cancel-shadow-hover: 0px 5px 0px 0px rgb(248 113 113);
--button-cancel-shadow-active: 0px 2px 0px 0px rgb(248 113 113);
--button-transform-hover: translateY(-2px);
--button-transform-active: translateY(1px);
--button-transition: all 0.1s;
--button-large-padding: var(--spacing-lg) calc(2 * var(--spacing-lg));
--button-large-radius: var(--radius-md);
--button-large-text-size: var(--text-lg);
--button-large-text-weight: 600;
--button-primary-background-fill: var(--primary-500);
--button-primary-background-fill-hover: var(--button-primary-background-fill);
--button-primary-border-color: var(--primary-500);
--button-primary-border-color-hover: var(--primary-500);
--button-primary-text-color: var(--button-secondary-text-color);
--button-primary-text-color-hover: var(--button-primary-text-color);
--button-primary-shadow: 0px 3px 0px 0px var(--primary-400);
--button-primary-shadow-hover: 0px 5px 0px 0px var(--primary-400);
--button-primary-shadow-active: 0px 2px 0px 0px var(--primary-400);
--button-secondary-background-fill: var(--primary-400);
--button-secondary-background-fill-hover: var(--button-secondary-background-fill);
--button-secondary-border-color: var(--neutral-200);
--button-secondary-border-color-hover: var(--neutral-200);
--button-secondary-text-color: black;
--button-secondary-text-color-hover: var(--button-secondary-text-color);
--button-secondary-shadow: 0px 3px 0px 0px var(--primary-300);
--button-secondary-shadow-hover: 0px 5px 0px 0px var(--primary-300);
--button-secondary-shadow-active: 0px 2px 0px 0px var(--primary-300);
--button-small-padding: var(--spacing-sm) calc(1.5 * var(--spacing-sm));
--button-small-radius: var(--radius-md);
--button-small-text-size: var(--text-sm);
--button-small-text-weight: 400;
--button-medium-padding: var(--spacing-md) calc(2 * var(--spacing-md));
--button-medium-radius: var(--radius-md);
--button-medium-text-size: var(--text-md);
--button-medium-text-weight: 600;
}
:root.dark, :root .dark {
--body-background-fill: var(--background-fill-primary);
--body-text-color: var(--neutral-100);
--color-accent-soft: var(--neutral-700);
--background-fill-primary: var(--neutral-950);
--background-fill-secondary: var(--neutral-900);
--border-color-accent: var(--neutral-600);
--border-color-primary: var(--neutral-700);
--link-text-color-active: var(--secondary-500);
--link-text-color: var(--secondary-500);
--link-text-color-hover: var(--secondary-400);
--link-text-color-visited: var(--secondary-600);
--body-text-color-subdued: var(--neutral-400);
--accordion-text-color: var(--body-text-color);
--table-text-color: var(--body-text-color);
--shadow-spread: 1px;
--block-background-fill: var(--neutral-800);
--block-border-color: var(--border-color-primary);
--block-info-text-color: var(--body-text-color-subdued);
--block-label-background-fill: var(--background-fill-secondary);
--block-label-border-color: var(--border-color-primary);
--block-label-text-color: var(--neutral-200);
--block-shadow: 0px 3px 0px 0px var(--neutral-700);
--block-title-text-color: var(--neutral-200);
--panel-background-fill: var(--background-fill-secondary);
--panel-border-color: var(--border-color-primary);
--border-color-accent-subdued: var(--border-color-accent);
--code-background-fill: var(--neutral-800);
--checkbox-background-color: var(--neutral-400);
--checkbox-background-color-focus: var(--checkbox-background-color);
--checkbox-background-color-hover: var(--checkbox-background-color);
--checkbox-background-color-selected: var(--primary-600);
--checkbox-border-color: var(--neutral-700);
--checkbox-border-color-focus: var(--color-accent);
--checkbox-border-color-hover: var(--neutral-600);
--checkbox-border-color-selected: var(--color-accent);
--checkbox-border-width: var(--input-border-width);
--checkbox-label-background-fill: var(--neutral-700);
--checkbox-label-background-fill-hover: var(--checkbox-label-background-fill);
--checkbox-label-background-fill-selected: var(--primary-500);
--checkbox-label-border-color: var(--border-color-primary);
--checkbox-label-border-color-hover: var(--checkbox-label-border-color);
--checkbox-label-border-color-selected: var(--primary-600);
--checkbox-label-border-width: 2px;
--checkbox-label-text-color: var(--body-text-color);
--checkbox-label-text-color-selected: var(--button-primary-text-color);
--error-background-fill: var(--background-fill-primary);
--error-border-color: #ef4444;
--error-text-color: #fef2f2;
--error-icon-color: #ef4444;
--input-background-fill: var(--neutral-900);
--input-background-fill-focus: none;
--input-background-fill-hover: var(--input-background-fill);
--input-border-color: var(--border-color-primary);
--input-border-color-focus: var(--neutral-700);
--input-border-color-hover: var(--input-border-color);
--input-placeholder-color: var(--neutral-500);
--input-shadow: 0px -1px 0px 0px var(--neutral-700);
--input-shadow-focus: 0px -1px 0px 0px var(--primary-600);
--slider-color: var(--primary-500);
--stat-background-fill: var(--primary-500);
--table-border-color: var(--neutral-700);
--table-even-background-fill: var(--neutral-950);
--table-odd-background-fill: var(--neutral-900);
--table-row-focus: var(--color-accent-soft);
--button-cancel-background-fill: #b91c1c;
--button-cancel-background-fill-hover: #991b1b;
--button-cancel-border-color: var(--button-secondary-border-color);
--button-cancel-border-color-hover: var(--button-secondary-border-color-hover);
--button-cancel-text-color: white;
--button-cancel-text-color-hover: white;
--button-cancel-shadow: 0px 3px 0px 0px rgb(220 38 38);
--button-cancel-shadow-hover: 0px 5px 0px 0px rgb(220 38 38);
--button-cancel-shadow-active: 0px 2px 0px 0px rgb(220 38 38);
--button-primary-background-fill: var(--primary-600);
--button-primary-background-fill-hover: var(--button-primary-background-fill);
--button-primary-border-color: var(--primary-600);
--button-primary-border-color-hover: var(--primary-500);
--button-primary-text-color: var(--button-secondary-text-color);
--button-primary-text-color-hover: var(--button-primary-text-color);
--button-primary-shadow: 0px 3px 0px 0px var(--primary-700);
--button-primary-shadow-hover: 0px 5px 0px 0px var(--primary-700);
--button-primary-shadow-active: 0px 2px 0px 0px var(--primary-700);
--button-secondary-background-fill: var(--primary-500);
--button-secondary-background-fill-hover: var(--button-secondary-background-fill);
--button-secondary-border-color: var(--neutral-600);
--button-secondary-border-color-hover: var(--neutral-500);
--button-secondary-text-color: var(--neutral-900);
--button-secondary-text-color-hover: var(--button-secondary-text-color);
--button-secondary-shadow: 0px 3px 0px 0px var(--primary-600);
--button-secondary-shadow-hover: 0px 5px 0px 0px var(--primary-600);
--button-secondary-shadow-active: 0px 2px 0px 0px var(--primary-600);
}
#col-container { max-width: 1100px; margin: 0 auto; }
.dark .progress-text { color: white !important; }
#camera-3d-control { min-height: 400px; }
#examples { max-width: 1100px; margin: 0 auto; }
.fillable { max-width: 1250px !important; }
"""
def reset_all() -> list:
"""Reset all camera control knobs and flags to their default values."""
return [0, 5.0, 0, True] # rotate_deg, zoom, vertical_tilt, is_reset
def end_reset() -> bool:
"""Mark the end of a reset cycle."""
return False
def update_dimensions_on_upload(image: Optional[Image.Image]) -> Tuple[int, int]:
"""Compute recommended (width, height) for the output resolution."""
if image is None:
return 1024, 1024
original_width, original_height = image.size
if original_width > original_height:
new_width = 1024
aspect_ratio = original_height / original_width
new_height = int(new_width * aspect_ratio)
else:
new_height = 1024
aspect_ratio = original_width / original_height
new_width = int(new_height * aspect_ratio)
new_width = (new_width // 8) * 8
new_height = (new_height // 8) * 8
return new_width, new_height
with gr.Blocks() as demo:
gr.Markdown("""
## 🎬 Fibo Edit — Camera Angle Control
Fibo Edit with Multi-Angle LoRA for precise camera control ✨
Control rotation, tilt, and zoom to generate images from any angle 🎥
### Fine-tuning data was created by [Lovis](https://huggingface.co/fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA) and UI by [Apolinario](https://huggingface.co/spaces/multimodalart/qwen-image-multiple-angles-3d-camera)
""")
with gr.Row():
with gr.Column(scale=1):
image = gr.Image(label="Input Image", type="pil", height=280)
prev_output = gr.Image(value=None, visible=False)
is_reset = gr.Checkbox(value=False, visible=False)
# Hidden state to pass processed image between steps
processed_image = gr.State(None)
gr.Markdown("### 🎮 3D Camera Control")
camera_3d = create_camera_3d_component(
value={"rotate_deg": 0, "zoom": 5.0, "vertical_tilt": 0},
elem_id="camera-3d-control",
)
with gr.Row():
reset_btn = gr.Button("🔄 Reset", size="sm")
run_btn = gr.Button("🚀 Generate", variant="primary", size="lg")
with gr.Column(scale=1):
result = gr.Image(label="Output Image", interactive=False, height=350)
gr.Markdown("### 🎚️ Slider Controls")
rotate_deg = gr.Slider(
label="Horizontal Rotation (°)",
minimum=-180,
maximum=180,
step=45,
value=0,
info="-180/180: back, -90: left, 0: front, 90: right",
)
zoom = gr.Slider(
label="Zoom Level",
minimum=0,
maximum=10,
step=1,
value=5.0,
info="0-3.33: wide, 3.33-6.66: medium, 6.66-10: close-up",
)
vertical_tilt = gr.Slider(
label="Vertical Tilt",
minimum=-1,
maximum=1,
step=0.5,
value=0,
info="-1: low-angle, 0: eye-level, 1: high-angle",
)
prompt_preview = gr.Textbox(label="Generated Prompt", interactive=False)
with gr.Accordion("📋 Structured Caption (BRIA API)", open=False):
structured_json = gr.JSON(label="JSON Response", container=False)
with gr.Accordion("⚙️ Advanced Settings", open=False):
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=DEFAULT_SEED,
)
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=1.0,
maximum=10.0,
step=0.1,
value=DEFAULT_GUIDANCE_SCALE,
)
num_inference_steps = gr.Slider(
label="Inference Steps",
minimum=1,
maximum=100,
step=1,
value=DEFAULT_NUM_INFERENCE_STEPS,
)
height = gr.Slider(
label="Height", minimum=256, maximum=2048, step=8, value=1024
)
width = gr.Slider(
label="Width", minimum=256, maximum=2048, step=8, value=1024
)
# --- Helper Functions ---
def update_prompt_from_sliders(rotate, zoom_val, tilt):
prompt, _ = build_camera_prompt(rotate, zoom_val, tilt)
return prompt
def sync_3d_to_sliders(camera_value):
if camera_value and isinstance(camera_value, dict):
rot = camera_value.get("rotate_deg", 0)
zoom_val = camera_value.get("zoom", 5.0)
tilt = camera_value.get("vertical_tilt", 0)
prompt, _ = build_camera_prompt(rot, zoom_val, tilt)
return rot, zoom_val, tilt, prompt
return gr.update(), gr.update(), gr.update(), gr.update()
def sync_sliders_to_3d(rotate, zoom_val, tilt):
return {"rotate_deg": rotate, "zoom": zoom_val, "vertical_tilt": tilt}
def update_3d_image(img):
if img is None:
return gr.update(imageUrl=None)
buffered = BytesIO()
img.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
data_url = f"data:image/png;base64,{img_str}"
return gr.update(imageUrl=data_url)
# --- Event Handlers ---
# Slider -> Prompt preview
for slider in [rotate_deg, zoom, vertical_tilt]:
slider.change(
fn=update_prompt_from_sliders,
inputs=[rotate_deg, zoom, vertical_tilt],
outputs=[prompt_preview],
)
# 3D control -> Sliders + Prompt (no auto-inference)
camera_3d.change(
fn=sync_3d_to_sliders,
inputs=[camera_3d],
outputs=[rotate_deg, zoom, vertical_tilt, prompt_preview],
)
# Sliders -> 3D control (no auto-inference)
for slider in [rotate_deg, zoom, vertical_tilt]:
slider.release(
fn=sync_sliders_to_3d,
inputs=[rotate_deg, zoom, vertical_tilt],
outputs=[camera_3d],
)
# Reset
reset_btn.click(
fn=reset_all,
inputs=None,
outputs=[rotate_deg, zoom, vertical_tilt, is_reset],
queue=False,
).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False).then(
fn=sync_sliders_to_3d,
inputs=[rotate_deg, zoom, vertical_tilt],
outputs=[camera_3d],
)
# Generate button - Two-stage process
# Stage 1: Fetch structured caption from BRIA API and display it immediately
run_event = run_btn.click(
fn=fetch_structured_caption,
inputs=[
image,
rotate_deg,
zoom,
vertical_tilt,
seed,
randomize_seed,
prev_output,
],
outputs=[seed, prompt_preview, structured_json, processed_image],
).then(
# Stage 2: Generate image with Fibo Edit pipeline
fn=generate_image_from_caption,
inputs=[
processed_image,
structured_json,
seed,
guidance_scale,
num_inference_steps,
],
outputs=[result],
)
# Image upload
image.upload(
fn=update_dimensions_on_upload, inputs=[image], outputs=[width, height]
).then(
fn=reset_all,
inputs=None,
outputs=[rotate_deg, zoom, vertical_tilt, is_reset],
queue=False,
).then(fn=end_reset, inputs=None, outputs=[is_reset], queue=False).then(
fn=update_3d_image, inputs=[image], outputs=[camera_3d]
)
image.clear(fn=lambda: gr.update(imageUrl=None), outputs=[camera_3d])
run_event.then(lambda img, *_: img, inputs=[result], outputs=[prev_output])
# Examples - Commenting out for now since we need actual example images
# Note: With the two-stage inference process, examples would need custom handling
# to properly chain fetch_structured_caption -> generate_image_from_caption
# Sync 3D component when sliders change (covers example loading)
def sync_3d_on_slider_change(img, rot, zoom_val, tilt):
camera_value = {"rotate_deg": rot, "zoom": zoom_val, "vertical_tilt": tilt}
if img is not None:
buffered = BytesIO()
img.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
data_url = f"data:image/png;base64,{img_str}"
return gr.update(value=camera_value, imageUrl=data_url)
return gr.update(value=camera_value)
# When any slider value changes (including from examples), sync the 3D component
for slider in [rotate_deg, zoom, vertical_tilt]:
slider.change(
fn=sync_3d_on_slider_change,
inputs=[image, rotate_deg, zoom, vertical_tilt],
outputs=[camera_3d],
)
# API endpoints for the two-stage inference process
gr.api(fetch_structured_caption, api_name="fetch_caption")
gr.api(generate_image_from_caption, api_name="generate_image")
if __name__ == "__main__":
head = '<script src="https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js"></script>'
if RUN_LOCAL:
# Local development configuration
demo.launch(
mcp_server=True,
head=head,
footer_links=["api", "gradio", "settings"],
server_name="0.0.0.0",
server_port=8081,
css=css,
)
else:
# HuggingFace Spaces standard configuration
# demo.launch(head=head, debug=True, show_error=True, css=css)
demo.launch(head=head, css=css)