ysharma's picture
ysharma HF Staff
Update app.py
f8f0519 verified
"""
Agentic Coding : 3D Camera View Generator
- Qwen Image Edit + Lightning LoRA + Multi-Angle LoRA
- gr.HTML custom component (Gradio 6)
- ZeroGPU (HuggingFace Spaces)
"""
import gradio as gr
import numpy as np
import random
import torch
import base64
import spaces
from io import BytesIO
from PIL import Image
from diffusers import QwenImageEditPlusPipeline
MAX_SEED = np.iinfo(np.int32).max
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
# ── Model Loading on ZEROGPU
pipe = QwenImageEditPlusPipeline.from_pretrained(
"Qwen/Qwen-Image-Edit-2511",
torch_dtype=dtype,
).to(device)
pipe.load_lora_weights(
"lightx2v/Qwen-Image-Edit-2511-Lightning",
weight_name="Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors",
adapter_name="lightning",
)
pipe.load_lora_weights(
"fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA",
weight_name="qwen-image-edit-2511-multiple-angles-lora.safetensors",
adapter_name="angles",
)
pipe.set_adapters(["lightning", "angles"], adapter_weights=[1.0, 1.0])
# ── Camera parameter tables ────────────────────────────────────────────────────
AZIMUTH_MAP = {
0: "front view",
45: "front-right quarter view",
90: "right side view",
135: "back-right quarter view",
180: "back view",
225: "back-left quarter view",
270: "left side view",
315: "front-left quarter view",
}
ELEVATION_MAP = {
-30: "low-angle shot",
0: "eye-level shot",
30: "elevated shot",
60: "high-angle shot",
}
DISTANCE_MAP = {
0.6: "close-up",
1.0: "medium shot",
1.8: "wide shot",
}
# Default viewer state β€” plain dict, no custom class needed
DEFAULT_CAM_VALUE = {"img": "", "az": 0.0, "el": 0.0, "dist": 1.0}
def snap_to_nearest(value, steps):
return min(steps, key=lambda x: abs(x - value))
def build_camera_prompt(azimuth, elevation, distance):
az = snap_to_nearest(azimuth, list(AZIMUTH_MAP.keys()))
el = snap_to_nearest(elevation, list(ELEVATION_MAP.keys()))
dist = snap_to_nearest(distance, list(DISTANCE_MAP.keys()))
return f"<sks> {AZIMUTH_MAP[az]} {ELEVATION_MAP[el]} {DISTANCE_MAP[dist]}"
def pil_to_data_url(img: Image.Image) -> str:
buf = BytesIO()
fmt = getattr(img, "format", None)
if fmt and fmt.upper() == "WEBP":
img.save(buf, format="WEBP")
mime = "image/webp"
else:
img.save(buf, format="PNG")
mime = "image/png"
b64 = base64.b64encode(buf.getvalue()).decode()
return f"data:{mime};base64,{b64}"
# ── Inference ──────────────────────────────────────────────────────────────────
@spaces.GPU(duration=120)
def infer_camera_edit(
image, azimuth, elevation, distance,
seed, randomize_seed, guidance_scale,
num_inference_steps, height, width,
):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator(device=device).manual_seed(seed)
prompt = build_camera_prompt(azimuth, elevation, distance)
result = pipe(
image=image,
prompt=prompt,
height=height,
width=width,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
generator=generator,
).images[0]
return result, seed, prompt
# ── gr.HTML templates ──────────────────────────────────────────────────────────
# Using plain gr.HTML (no subclass) with a dict value.
#
# Gradio 6 passes the dict as `value` to the template; all keys (img, az, el,
# dist) are accessible as value.img, value.az, etc. in both ${} and {{}} syntax.
HTML_TEMPLATE = """
<div class="cv-wrap">
{{#if value.img}}
<img class="cv-img" src="{{value.img}}">
{{else}}
<div class="cv-empty">
<svg class="cv-empty-icon" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="1.25">
<path stroke-linecap="round" stroke-linejoin="round" d="M6.827 6.175A2.31 2.31 0 015.186 7.23c-.38.054-.757.112-1.134.175C2.999 7.58 2.25 8.507 2.25 9.574V18a2.25 2.25 0 002.25 2.25h15A2.25 2.25 0 0021.75 18V9.574c0-1.067-.75-1.994-1.802-2.169a47.865 47.865 0 00-1.134-.175 2.31 2.31 0 01-1.64-1.055l-.822-1.316a2.192 2.192 0 00-1.736-1.039 48.774 48.774 0 00-5.232 0 2.192 2.192 0 00-1.736 1.039l-.821 1.316z" />
<path stroke-linecap="round" stroke-linejoin="round" d="M16.5 12.75a4.5 4.5 0 11-9 0 4.5 4.5 0 019 0zM18.75 10.5h.008v.008h-.008V10.5z" />
</svg>
<p class="cv-empty-title">No image loaded</p>
<p class="cv-empty-sub">Upload an image on the left, then hover here to see camera controls</p>
</div>
{{/if}}
<div class="cv-hud">
<div class="cv-readout">
<span class="cv-lbl">Az</span><span class="cv-val">${value.az}&deg;</span>
<span class="cv-sep">/</span>
<span class="cv-lbl">El</span><span class="cv-val">${value.el}&deg;</span>
<span class="cv-sep">/</span>
<span class="cv-lbl">Dist</span><span class="cv-val">${value.dist}&times;</span>
</div>
<div class="cv-controls">
<div class="cv-dpad">
<button class="cv-btn cv-up" data-action="el-plus" title="Elevate">&#9650;</button>
<button class="cv-btn cv-left" data-action="az-minus" title="Rotate Left">&#9664;</button>
<div class="cv-dot"></div>
<button class="cv-btn cv-right" data-action="az-plus" title="Rotate Right">&#9654;</button>
<button class="cv-btn cv-down" data-action="el-minus" title="Lower">&#9660;</button>
</div>
<div class="cv-zoom">
<button class="cv-zbtn" data-action="dist-minus" title="Zoom In">+</button>
<button class="cv-zbtn" data-action="dist-plus" title="Zoom Out">&minus;</button>
</div>
</div>
</div>
</div>
"""
CSS_TEMPLATE = """
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
/* ── Image well ── dark neutral so images pop, same treatment as any
professional image editor / camera app preview area. Not a stylistic
choice but a functional one: images render best against dark. */
.cv-wrap {
position: relative;
width: 100%; height: 500px;
background: #1c1c1e;
border-radius: 12px;
overflow: hidden;
display: flex; align-items: center; justify-content: center;
}
.cv-img {
max-width: 100%; max-height: 100%;
object-fit: contain; display: block;
}
/* empty state */
.cv-empty {
text-align: center; user-select: none;
display: flex; flex-direction: column; align-items: center; gap: 14px;
}
.cv-empty-icon {
width: 52px; height: 52px;
color: rgba(255,255,255,0.2);
}
.cv-empty-title {
font-size: 15px; font-weight: 500; letter-spacing: -0.01em;
color: rgba(255,255,255,0.45);
}
.cv-empty-sub {
font-size: 13px; max-width: 230px; line-height: 1.65;
color: rgba(255,255,255,0.25);
}
/* HUD β€” fades in on hover via CSS, no JS needed */
.cv-hud {
position: absolute; bottom: 16px; right: 16px;
display: flex; flex-direction: column; align-items: flex-end; gap: 8px;
opacity: 0; transition: opacity 0.16s ease; pointer-events: auto;
}
.cv-wrap:hover .cv-hud { opacity: 1; }
/* coordinate readout β€” white card floating over image */
.cv-readout {
display: flex; align-items: center; gap: 8px;
background: rgba(255,255,255,0.96);
border-radius: 7px; padding: 5px 13px;
font-size: 12px; white-space: nowrap;
box-shadow: 0 2px 12px rgba(0,0,0,0.25);
}
.cv-lbl { color: #9ca3af; font-size: 10px; text-transform: uppercase; letter-spacing: 0.04em; }
.cv-val { color: #111827; font-weight: 600; font-variant-numeric: tabular-nums; }
.cv-sep { color: #d1d5db; margin: 0 2px; }
/* controls panel β€” white card, same treatment as readout */
.cv-controls {
display: flex; align-items: center; gap: 8px;
background: rgba(255,255,255,0.96);
border-radius: 10px; padding: 8px 10px;
box-shadow: 0 2px 12px rgba(0,0,0,0.25);
}
/* d-pad */
.cv-dpad {
display: grid;
grid-template-columns: repeat(3, 32px);
grid-template-rows: repeat(3, 32px);
gap: 3px;
}
.cv-btn {
width: 32px; height: 32px;
border: 1px solid #e5e7eb; border-radius: 6px;
background: #ffffff; color: #6b7280;
font-size: 10px; cursor: pointer;
display: flex; align-items: center; justify-content: center;
transition: background 0.1s, border-color 0.1s, color 0.1s, transform 0.08s;
padding: 0; line-height: 1;
}
/* orange matches Gradio Default theme primary */
.cv-btn:hover {
background: #fff7ed; border-color: #f97316; color: #f97316;
transform: scale(1.1);
}
.cv-btn:active { transform: scale(0.92); background: #ffedd5; }
.cv-up { grid-column:2; grid-row:1; }
.cv-left { grid-column:1; grid-row:2; }
.cv-dot {
grid-column:2; grid-row:2;
width:32px; height:32px; border-radius:50%;
background: #f9fafb; border: 1px solid #e5e7eb;
}
.cv-right { grid-column:3; grid-row:2; }
.cv-down { grid-column:2; grid-row:3; }
/* zoom column */
.cv-zoom { display: flex; flex-direction: column; gap: 3px; }
.cv-zbtn {
width: 32px; height: 38px;
border: 1px solid #e5e7eb; border-radius: 6px;
background: #ffffff; color: #6b7280;
font-size: 16px; font-weight: 400; cursor: pointer;
display: flex; align-items: center; justify-content: center;
transition: background 0.1s, border-color 0.1s, color 0.1s, transform 0.08s;
padding: 0; line-height: 1;
}
.cv-zbtn:hover {
background: #fff7ed; border-color: #f97316; color: #f97316;
transform: scale(1.1);
}
.cv-zbtn:active { transform: scale(0.92); background: #ffedd5; }
"""
JS_ON_LOAD = """
const DIST_STEPS = [0.6, 1.0, 1.8];
function snapDist(d) {
return DIST_STEPS.reduce((p, c) => Math.abs(c - d) < Math.abs(p - d) ? c : p);
}
function shiftDist(d, dir) {
const idx = DIST_STEPS.indexOf(snapDist(Number(d)));
return DIST_STEPS[Math.max(0, Math.min(DIST_STEPS.length - 1, idx + dir))];
}
// Delegated click listener β€” attached once, survives template re-renders.
element.addEventListener('click', function(e) {
const btn = e.target.closest('[data-action]');
if (!btn) return;
const v = Object.assign({}, props.value);
let az = Number(v.az) || 0;
let el = Number(v.el) || 0;
let dist = Number(v.dist) || 1.0;
switch (btn.dataset.action) {
case 'az-minus': az = (az - 45 + 360) % 360; break;
case 'az-plus': az = (az + 45) % 360; break;
case 'el-plus': el = Math.min(60, el + 30); break;
case 'el-minus': el = Math.max(-30, el - 30); break;
case 'dist-minus': dist = shiftDist(dist, -1); break;
case 'dist-plus': dist = shiftDist(dist, +1); break;
}
props.value = { ...v, az, el, dist };
trigger('submit');
});
"""
# ── Global Gradio CSS ──────────────────────────────────────────────────────────
GLOBAL_CSS = """
/* ── Row: never let the two columns wrap ── */
/* Gradio 6 renders rows as flex containers with class "flex" */
.gradio-container .flex.flex-row,
.gradio-container .row {
flex-wrap: nowrap !important;
}
/* ── Header ── */
.app-heading { padding: 28px 0 20px; }
.app-heading h1 {
font-size: clamp(24px, 3.5vw, 36px);
font-weight: 700;
letter-spacing: -0.02em;
line-height: 1.1;
color: #111827;
margin: 0 0 10px;
}
.app-heading .chips {
display: flex; flex-wrap: wrap; gap: 6px;
}
.app-heading .chip {
display: inline-flex; align-items: center; gap: 5px;
padding: 3px 10px;
background: #fff7ed;
border: 1px solid #fed7aa;
border-radius: 999px;
font-size: 12px; font-weight: 500;
color: #c2410c;
line-height: 1.5;
}
.app-heading .chip svg {
width: 12px; height: 12px; opacity: 0.7;
}
/* ── Controls column β€” subtle card to separate it from viewer ── */
.controls-col > .block,
.controls-col > .form {
background: #fafafa !important;
}
/* ── Camera viewer column label ── */
.viewer-label {
font-size: 13px; font-weight: 600;
color: #374151;
margin-bottom: 8px;
display: flex; align-items: center; gap: 8px;
}
.viewer-label .hint {
font-weight: 400; color: #9ca3af; font-size: 12px;
}
/* ── Status display ── replaces the plain textbox look */
.status-row {
display: flex; align-items: center; gap: 8px;
padding: 8px 12px;
background: #f9fafb;
border: 1px solid #e5e7eb;
border-radius: 8px;
margin-top: 6px;
font-size: 12px;
font-family: ui-monospace, "Cascadia Code", "Source Code Pro", monospace;
color: #6b7280;
min-height: 38px;
}
/* status textbox β€” reduce visual weight */
.status-box textarea {
font-family: ui-monospace, "Cascadia Code", "Source Code Pro", monospace !important;
font-size: 12px !important;
color: #374151 !important;
background: #f9fafb !important;
border-color: #e5e7eb !important;
resize: none !important;
}
/* ── Prompt box ── */
.prompt-box textarea {
font-family: ui-monospace, "Cascadia Code", "Source Code Pro", monospace !important;
font-size: 12px !important;
color: #6b7280 !important;
}
"""
GRADIO_THEME = gr.themes.Default()
# ── App ────────────────────────────────────────────────────────────────────────
def create_app():
# FIX: theme and css are now passed to launch(), not gr.Blocks()
with gr.Blocks(title="3D Camera View Generator") as demo:
gr.HTML("""
<div class="app-heading">
<h1>3D Camera View Generator</h1>
<div class="chips">
<span class="chip">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor"><path d="M9.653 16.915l-.005-.003-.019-.01a20.759 20.759 0 01-1.162-.682 22.045 22.045 0 01-2.582-2.085c-1.034-1.036-2.035-2.329-2.535-3.765-.583-1.683-.322-3.498.985-4.82C5.576 4.29 7.319 3.75 9 3.75c.921 0 1.85.205 2.704.596L13 3.25l1.304 1.304L13 5.858a6.001 6.001 0 010 8.284l-.707.707-2.64-2.64z"/></svg>
Qwen Image Edit 2511
</span>
<span class="chip">⚑ Lightning LoRA</span>
<span class="chip">πŸ“ Multi-Angle LoRA</span>
</div>
</div>
""")
with gr.Row():
# ── Left column ──────────────────────────────────────────────────
with gr.Column(scale=4, min_width=200, elem_classes=["controls-col"]):
image_input = gr.Image(
label="Source Image",
type="pil",
height=320,
)
prompt_box = gr.Textbox(
label="Active Camera Prompt",
value="<sks> front view eye-level shot medium shot",
interactive=False,
lines=1,
elem_classes=["prompt-box"],
)
with gr.Accordion("βš™ Generation Settings", open=False):
seed_slider = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed")
rand_seed_cb = gr.Checkbox(True, label="Randomise seed each generation")
guidance_sl = gr.Slider(1.0, 20.0, value=1.0, step=0.1, label="Guidance Scale (keep ≀1 for Lightning LoRA)")
steps_sl = gr.Slider(1, 50, value=4, step=1, label="Inference Steps")
width_sl = gr.Slider(256, 1024, value=1024, step=32, label="Width (px)")
height_sl = gr.Slider(256, 1024, value=1024, step=32, label="Height (px)")
# ── Right column ─────────────────────────────────────────────────
with gr.Column(scale=6, min_width=280):
gr.HTML("""
<div class="viewer-label">
Camera View
<span class="hint">β€” hover to reveal orbit controls</span>
</div>
""")
# FIX: plain gr.HTML with dict value β€” no subclass, no inspect error
cam_view = gr.HTML(
value=DEFAULT_CAM_VALUE,
html_template=HTML_TEMPLATE,
css_template=CSS_TEMPLATE,
js_on_load=JS_ON_LOAD,
apply_default_css=False,
)
status_box = gr.Textbox(
label="Status",
value="Ready β€” upload an image to begin",
interactive=False,
lines=1,
elem_classes=["status-box"],
)
gallery_state = gr.State([])
with gr.Accordion("πŸ–Ό Generated Views", open=False):
gallery = gr.Gallery(
label="",
show_label=False,
columns=4,
height="auto",
object_fit="cover",
allow_preview=True,
)
# ── Helpers ──────────────────────────────────────────────────────────
def _coerce_view(v):
"""Extract (az, el, dist) safely from a dict or default."""
if isinstance(v, dict):
return float(v.get("az", 0)), float(v.get("el", 0)), float(v.get("dist", 1.0))
return 0.0, 0.0, 1.0
def _auto_dimensions(img):
if img is None:
return 1024, 1024
w, h = img.size
ar = w / h
if ar > 1:
nw = 1024
nh = round(1024 / ar / 32) * 32
else:
nh = 1024
nw = round(1024 * ar / 32) * 32
return max(256, min(1024, nw)), max(256, min(1024, nh))
# ── Event handlers ────────────────────────────────────────────────────
def on_image_upload(img, current_view):
nw, nh = _auto_dimensions(img)
if img is None:
return DEFAULT_CAM_VALUE.copy(), nw, nh, "No image"
az, el, dist = _coerce_view(current_view)
return (
{"img": pil_to_data_url(img), "az": az, "el": el, "dist": dist},
nw,
nh,
"Image loaded β€” hover the viewer and click an arrow to generate",
)
def on_camera_submit(
current_view, src_img,
seed_val, rand_seed, guidance, steps, h, w,
gallery_imgs,
):
try:
az, el, dist = _coerce_view(current_view)
prompt = build_camera_prompt(az, el, dist)
if src_img is None:
return current_view, prompt, "⚠ Upload an image first", gallery_imgs, gallery_imgs
gen_img, final_seed, final_prompt = infer_camera_edit(
image=src_img,
azimuth=az, elevation=el, distance=dist,
seed=seed_val, randomize_seed=rand_seed,
guidance_scale=guidance,
num_inference_steps=int(steps),
height=int(h), width=int(w),
)
new_view = {"img": pil_to_data_url(gen_img), "az": az, "el": el, "dist": dist}
gallery_imgs = list(gallery_imgs) + [gen_img]
status = f"βœ“ {final_prompt} | seed {final_seed}"
return new_view, final_prompt, status, gallery_imgs, gallery_imgs
except Exception as exc:
return current_view, "", f"βœ— {str(exc)}", gallery_imgs, gallery_imgs
image_input.upload(
fn=on_image_upload,
inputs=[image_input, cam_view],
outputs=[cam_view, width_sl, height_sl, status_box],
)
cam_view.submit(
fn=on_camera_submit,
inputs=[
cam_view, image_input,
seed_slider, rand_seed_cb, guidance_sl, steps_sl,
height_sl, width_sl,
gallery_state,
],
outputs=[cam_view, prompt_box, status_box, gallery_state, gallery],
)
return demo
if __name__ == "__main__":
demo = create_app()
# FIX: theme and css passed to launch() as required by Gradio 6.0
demo.launch(
debug=True,
theme=GRADIO_THEME,
css=GLOBAL_CSS,
)