""" Agentic Coding : 3D Camera View Generator - Qwen Image Edit + Lightning LoRA + Multi-Angle LoRA - gr.HTML custom component (Gradio 6) - ZeroGPU (HuggingFace Spaces) """ import gradio as gr import numpy as np import random import torch import base64 import spaces from io import BytesIO from PIL import Image from diffusers import QwenImageEditPlusPipeline MAX_SEED = np.iinfo(np.int32).max dtype = torch.bfloat16 device = "cuda" if torch.cuda.is_available() else "cpu" # ── Model Loading on ZEROGPU pipe = QwenImageEditPlusPipeline.from_pretrained( "Qwen/Qwen-Image-Edit-2511", torch_dtype=dtype, ).to(device) pipe.load_lora_weights( "lightx2v/Qwen-Image-Edit-2511-Lightning", weight_name="Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors", adapter_name="lightning", ) pipe.load_lora_weights( "fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA", weight_name="qwen-image-edit-2511-multiple-angles-lora.safetensors", adapter_name="angles", ) pipe.set_adapters(["lightning", "angles"], adapter_weights=[1.0, 1.0]) # ── Camera parameter tables ──────────────────────────────────────────────────── AZIMUTH_MAP = { 0: "front view", 45: "front-right quarter view", 90: "right side view", 135: "back-right quarter view", 180: "back view", 225: "back-left quarter view", 270: "left side view", 315: "front-left quarter view", } ELEVATION_MAP = { -30: "low-angle shot", 0: "eye-level shot", 30: "elevated shot", 60: "high-angle shot", } DISTANCE_MAP = { 0.6: "close-up", 1.0: "medium shot", 1.8: "wide shot", } # Default viewer state — plain dict, no custom class needed DEFAULT_CAM_VALUE = {"img": "", "az": 0.0, "el": 0.0, "dist": 1.0} def snap_to_nearest(value, steps): return min(steps, key=lambda x: abs(x - value)) def build_camera_prompt(azimuth, elevation, distance): az = snap_to_nearest(azimuth, list(AZIMUTH_MAP.keys())) el = snap_to_nearest(elevation, list(ELEVATION_MAP.keys())) dist = snap_to_nearest(distance, list(DISTANCE_MAP.keys())) return f" {AZIMUTH_MAP[az]} {ELEVATION_MAP[el]} {DISTANCE_MAP[dist]}" def pil_to_data_url(img: Image.Image) -> str: buf = BytesIO() fmt = getattr(img, "format", None) if fmt and fmt.upper() == "WEBP": img.save(buf, format="WEBP") mime = "image/webp" else: img.save(buf, format="PNG") mime = "image/png" b64 = base64.b64encode(buf.getvalue()).decode() return f"data:{mime};base64,{b64}" # ── Inference ────────────────────────────────────────────────────────────────── @spaces.GPU(duration=120) def infer_camera_edit( image, azimuth, elevation, distance, seed, randomize_seed, guidance_scale, num_inference_steps, height, width, ): if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator(device=device).manual_seed(seed) prompt = build_camera_prompt(azimuth, elevation, distance) result = pipe( image=image, prompt=prompt, height=height, width=width, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, generator=generator, ).images[0] return result, seed, prompt # ── gr.HTML templates ────────────────────────────────────────────────────────── # Using plain gr.HTML (no subclass) with a dict value. # # Gradio 6 passes the dict as `value` to the template; all keys (img, az, el, # dist) are accessible as value.img, value.az, etc. in both ${} and {{}} syntax. HTML_TEMPLATE = """

No image loaded

Upload an image on the left, then hover here to see camera controls

Az${value.az}° / El${value.el}° / Dist${value.dist}×

""" CSS_TEMPLATE = """ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } /* ── Image well ── dark neutral so images pop, same treatment as any professional image editor / camera app preview area. Not a stylistic choice but a functional one: images render best against dark. */ .cv-wrap { position: relative; width: 100%; height: 500px; background: #1c1c1e; border-radius: 12px; overflow: hidden; display: flex; align-items: center; justify-content: center; } .cv-img { max-width: 100%; max-height: 100%; object-fit: contain; display: block; } /* empty state */ .cv-empty { text-align: center; user-select: none; display: flex; flex-direction: column; align-items: center; gap: 14px; } .cv-empty-icon { width: 52px; height: 52px; color: rgba(255,255,255,0.2); } .cv-empty-title { font-size: 15px; font-weight: 500; letter-spacing: -0.01em; color: rgba(255,255,255,0.45); } .cv-empty-sub { font-size: 13px; max-width: 230px; line-height: 1.65; color: rgba(255,255,255,0.25); } /* HUD — fades in on hover via CSS, no JS needed */ .cv-hud { position: absolute; bottom: 16px; right: 16px; display: flex; flex-direction: column; align-items: flex-end; gap: 8px; opacity: 0; transition: opacity 0.16s ease; pointer-events: auto; } .cv-wrap:hover .cv-hud { opacity: 1; } /* coordinate readout — white card floating over image */ .cv-readout { display: flex; align-items: center; gap: 8px; background: rgba(255,255,255,0.96); border-radius: 7px; padding: 5px 13px; font-size: 12px; white-space: nowrap; box-shadow: 0 2px 12px rgba(0,0,0,0.25); } .cv-lbl { color: #9ca3af; font-size: 10px; text-transform: uppercase; letter-spacing: 0.04em; } .cv-val { color: #111827; font-weight: 600; font-variant-numeric: tabular-nums; } .cv-sep { color: #d1d5db; margin: 0 2px; } /* controls panel — white card, same treatment as readout */ .cv-controls { display: flex; align-items: center; gap: 8px; background: rgba(255,255,255,0.96); border-radius: 10px; padding: 8px 10px; box-shadow: 0 2px 12px rgba(0,0,0,0.25); } /* d-pad */ .cv-dpad { display: grid; grid-template-columns: repeat(3, 32px); grid-template-rows: repeat(3, 32px); gap: 3px; } .cv-btn { width: 32px; height: 32px; border: 1px solid #e5e7eb; border-radius: 6px; background: #ffffff; color: #6b7280; font-size: 10px; cursor: pointer; display: flex; align-items: center; justify-content: center; transition: background 0.1s, border-color 0.1s, color 0.1s, transform 0.08s; padding: 0; line-height: 1; } /* orange matches Gradio Default theme primary */ .cv-btn:hover { background: #fff7ed; border-color: #f97316; color: #f97316; transform: scale(1.1); } .cv-btn:active { transform: scale(0.92); background: #ffedd5; } .cv-up { grid-column:2; grid-row:1; } .cv-left { grid-column:1; grid-row:2; } .cv-dot { grid-column:2; grid-row:2; width:32px; height:32px; border-radius:50%; background: #f9fafb; border: 1px solid #e5e7eb; } .cv-right { grid-column:3; grid-row:2; } .cv-down { grid-column:2; grid-row:3; } /* zoom column */ .cv-zoom { display: flex; flex-direction: column; gap: 3px; } .cv-zbtn { width: 32px; height: 38px; border: 1px solid #e5e7eb; border-radius: 6px; background: #ffffff; color: #6b7280; font-size: 16px; font-weight: 400; cursor: pointer; display: flex; align-items: center; justify-content: center; transition: background 0.1s, border-color 0.1s, color 0.1s, transform 0.08s; padding: 0; line-height: 1; } .cv-zbtn:hover { background: #fff7ed; border-color: #f97316; color: #f97316; transform: scale(1.1); } .cv-zbtn:active { transform: scale(0.92); background: #ffedd5; } """ JS_ON_LOAD = """ const DIST_STEPS = [0.6, 1.0, 1.8]; function snapDist(d) { return DIST_STEPS.reduce((p, c) => Math.abs(c - d) < Math.abs(p - d) ? c : p); } function shiftDist(d, dir) { const idx = DIST_STEPS.indexOf(snapDist(Number(d))); return DIST_STEPS[Math.max(0, Math.min(DIST_STEPS.length - 1, idx + dir))]; } // Delegated click listener — attached once, survives template re-renders. element.addEventListener('click', function(e) { const btn = e.target.closest('[data-action]'); if (!btn) return; const v = Object.assign({}, props.value); let az = Number(v.az) || 0; let el = Number(v.el) || 0; let dist = Number(v.dist) || 1.0; switch (btn.dataset.action) { case 'az-minus': az = (az - 45 + 360) % 360; break; case 'az-plus': az = (az + 45) % 360; break; case 'el-plus': el = Math.min(60, el + 30); break; case 'el-minus': el = Math.max(-30, el - 30); break; case 'dist-minus': dist = shiftDist(dist, -1); break; case 'dist-plus': dist = shiftDist(dist, +1); break; } props.value = { ...v, az, el, dist }; trigger('submit'); }); """ # ── Global Gradio CSS ────────────────────────────────────────────────────────── GLOBAL_CSS = """ /* ── Row: never let the two columns wrap ── */ /* Gradio 6 renders rows as flex containers with class "flex" */ .gradio-container .flex.flex-row, .gradio-container .row { flex-wrap: nowrap !important; } /* ── Header ── */ .app-heading { padding: 28px 0 20px; } .app-heading h1 { font-size: clamp(24px, 3.5vw, 36px); font-weight: 700; letter-spacing: -0.02em; line-height: 1.1; color: #111827; margin: 0 0 10px; } .app-heading .chips { display: flex; flex-wrap: wrap; gap: 6px; } .app-heading .chip { display: inline-flex; align-items: center; gap: 5px; padding: 3px 10px; background: #fff7ed; border: 1px solid #fed7aa; border-radius: 999px; font-size: 12px; font-weight: 500; color: #c2410c; line-height: 1.5; } .app-heading .chip svg { width: 12px; height: 12px; opacity: 0.7; } /* ── Controls column — subtle card to separate it from viewer ── */ .controls-col > .block, .controls-col > .form { background: #fafafa !important; } /* ── Camera viewer column label ── */ .viewer-label { font-size: 13px; font-weight: 600; color: #374151; margin-bottom: 8px; display: flex; align-items: center; gap: 8px; } .viewer-label .hint { font-weight: 400; color: #9ca3af; font-size: 12px; } /* ── Status display ── replaces the plain textbox look */ .status-row { display: flex; align-items: center; gap: 8px; padding: 8px 12px; background: #f9fafb; border: 1px solid #e5e7eb; border-radius: 8px; margin-top: 6px; font-size: 12px; font-family: ui-monospace, "Cascadia Code", "Source Code Pro", monospace; color: #6b7280; min-height: 38px; } /* status textbox — reduce visual weight */ .status-box textarea { font-family: ui-monospace, "Cascadia Code", "Source Code Pro", monospace !important; font-size: 12px !important; color: #374151 !important; background: #f9fafb !important; border-color: #e5e7eb !important; resize: none !important; } /* ── Prompt box ── */ .prompt-box textarea { font-family: ui-monospace, "Cascadia Code", "Source Code Pro", monospace !important; font-size: 12px !important; color: #6b7280 !important; } """ GRADIO_THEME = gr.themes.Default() # ── App ──────────────────────────────────────────────────────────────────────── def create_app(): # FIX: theme and css are now passed to launch(), not gr.Blocks() with gr.Blocks(title="3D Camera View Generator") as demo: gr.HTML("""

3D Camera View Generator

Qwen Image Edit 2511 ⚡ Lightning LoRA 📐 Multi-Angle LoRA

""") with gr.Row(): # ── Left column ────────────────────────────────────────────────── with gr.Column(scale=4, min_width=200, elem_classes=["controls-col"]): image_input = gr.Image( label="Source Image", type="pil", height=320, ) prompt_box = gr.Textbox( label="Active Camera Prompt", value=" front view eye-level shot medium shot", interactive=False, lines=1, elem_classes=["prompt-box"], ) with gr.Accordion("⚙ Generation Settings", open=False): seed_slider = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed") rand_seed_cb = gr.Checkbox(True, label="Randomise seed each generation") guidance_sl = gr.Slider(1.0, 20.0, value=1.0, step=0.1, label="Guidance Scale (keep ≤1 for Lightning LoRA)") steps_sl = gr.Slider(1, 50, value=4, step=1, label="Inference Steps") width_sl = gr.Slider(256, 1024, value=1024, step=32, label="Width (px)") height_sl = gr.Slider(256, 1024, value=1024, step=32, label="Height (px)") # ── Right column ───────────────────────────────────────────────── with gr.Column(scale=6, min_width=280): gr.HTML("""

Camera View — hover to reveal orbit controls

""") # FIX: plain gr.HTML with dict value — no subclass, no inspect error cam_view = gr.HTML( value=DEFAULT_CAM_VALUE, html_template=HTML_TEMPLATE, css_template=CSS_TEMPLATE, js_on_load=JS_ON_LOAD, apply_default_css=False, ) status_box = gr.Textbox( label="Status", value="Ready — upload an image to begin", interactive=False, lines=1, elem_classes=["status-box"], ) gallery_state = gr.State([]) with gr.Accordion("🖼 Generated Views", open=False): gallery = gr.Gallery( label="", show_label=False, columns=4, height="auto", object_fit="cover", allow_preview=True, ) # ── Helpers ────────────────────────────────────────────────────────── def _coerce_view(v): """Extract (az, el, dist) safely from a dict or default.""" if isinstance(v, dict): return float(v.get("az", 0)), float(v.get("el", 0)), float(v.get("dist", 1.0)) return 0.0, 0.0, 1.0 def _auto_dimensions(img): if img is None: return 1024, 1024 w, h = img.size ar = w / h if ar > 1: nw = 1024 nh = round(1024 / ar / 32) * 32 else: nh = 1024 nw = round(1024 * ar / 32) * 32 return max(256, min(1024, nw)), max(256, min(1024, nh)) # ── Event handlers ──────────────────────────────────────────────────── def on_image_upload(img, current_view): nw, nh = _auto_dimensions(img) if img is None: return DEFAULT_CAM_VALUE.copy(), nw, nh, "No image" az, el, dist = _coerce_view(current_view) return ( {"img": pil_to_data_url(img), "az": az, "el": el, "dist": dist}, nw, nh, "Image loaded — hover the viewer and click an arrow to generate", ) def on_camera_submit( current_view, src_img, seed_val, rand_seed, guidance, steps, h, w, gallery_imgs, ): try: az, el, dist = _coerce_view(current_view) prompt = build_camera_prompt(az, el, dist) if src_img is None: return current_view, prompt, "⚠ Upload an image first", gallery_imgs, gallery_imgs gen_img, final_seed, final_prompt = infer_camera_edit( image=src_img, azimuth=az, elevation=el, distance=dist, seed=seed_val, randomize_seed=rand_seed, guidance_scale=guidance, num_inference_steps=int(steps), height=int(h), width=int(w), ) new_view = {"img": pil_to_data_url(gen_img), "az": az, "el": el, "dist": dist} gallery_imgs = list(gallery_imgs) + [gen_img] status = f"✓ {final_prompt} | seed {final_seed}" return new_view, final_prompt, status, gallery_imgs, gallery_imgs except Exception as exc: return current_view, "", f"✗ {str(exc)}", gallery_imgs, gallery_imgs image_input.upload( fn=on_image_upload, inputs=[image_input, cam_view], outputs=[cam_view, width_sl, height_sl, status_box], ) cam_view.submit( fn=on_camera_submit, inputs=[ cam_view, image_input, seed_slider, rand_seed_cb, guidance_sl, steps_sl, height_sl, width_sl, gallery_state, ], outputs=[cam_view, prompt_box, status_box, gallery_state, gallery], ) return demo if __name__ == "__main__": demo = create_app() # FIX: theme and css passed to launch() as required by Gradio 6.0 demo.launch( debug=True, theme=GRADIO_THEME, css=GLOBAL_CSS, )