Spaces:
Running on Zero
Running on Zero
| """ | |
| Agentic Coding : 3D Camera View Generator | |
| - Qwen Image Edit + Lightning LoRA + Multi-Angle LoRA | |
| - gr.HTML custom component (Gradio 6) | |
| - ZeroGPU (HuggingFace Spaces) | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| import random | |
| import torch | |
| import base64 | |
| import spaces | |
| from io import BytesIO | |
| from PIL import Image | |
| from diffusers import QwenImageEditPlusPipeline | |
| MAX_SEED = np.iinfo(np.int32).max | |
| dtype = torch.bfloat16 | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # ββ Model Loading on ZEROGPU | |
| pipe = QwenImageEditPlusPipeline.from_pretrained( | |
| "Qwen/Qwen-Image-Edit-2511", | |
| torch_dtype=dtype, | |
| ).to(device) | |
| pipe.load_lora_weights( | |
| "lightx2v/Qwen-Image-Edit-2511-Lightning", | |
| weight_name="Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors", | |
| adapter_name="lightning", | |
| ) | |
| pipe.load_lora_weights( | |
| "fal/Qwen-Image-Edit-2511-Multiple-Angles-LoRA", | |
| weight_name="qwen-image-edit-2511-multiple-angles-lora.safetensors", | |
| adapter_name="angles", | |
| ) | |
| pipe.set_adapters(["lightning", "angles"], adapter_weights=[1.0, 1.0]) | |
| # ββ Camera parameter tables ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| AZIMUTH_MAP = { | |
| 0: "front view", | |
| 45: "front-right quarter view", | |
| 90: "right side view", | |
| 135: "back-right quarter view", | |
| 180: "back view", | |
| 225: "back-left quarter view", | |
| 270: "left side view", | |
| 315: "front-left quarter view", | |
| } | |
| ELEVATION_MAP = { | |
| -30: "low-angle shot", | |
| 0: "eye-level shot", | |
| 30: "elevated shot", | |
| 60: "high-angle shot", | |
| } | |
| DISTANCE_MAP = { | |
| 0.6: "close-up", | |
| 1.0: "medium shot", | |
| 1.8: "wide shot", | |
| } | |
| # Default viewer state β plain dict, no custom class needed | |
| DEFAULT_CAM_VALUE = {"img": "", "az": 0.0, "el": 0.0, "dist": 1.0} | |
| def snap_to_nearest(value, steps): | |
| return min(steps, key=lambda x: abs(x - value)) | |
| def build_camera_prompt(azimuth, elevation, distance): | |
| az = snap_to_nearest(azimuth, list(AZIMUTH_MAP.keys())) | |
| el = snap_to_nearest(elevation, list(ELEVATION_MAP.keys())) | |
| dist = snap_to_nearest(distance, list(DISTANCE_MAP.keys())) | |
| return f"<sks> {AZIMUTH_MAP[az]} {ELEVATION_MAP[el]} {DISTANCE_MAP[dist]}" | |
| def pil_to_data_url(img: Image.Image) -> str: | |
| buf = BytesIO() | |
| fmt = getattr(img, "format", None) | |
| if fmt and fmt.upper() == "WEBP": | |
| img.save(buf, format="WEBP") | |
| mime = "image/webp" | |
| else: | |
| img.save(buf, format="PNG") | |
| mime = "image/png" | |
| b64 = base64.b64encode(buf.getvalue()).decode() | |
| return f"data:{mime};base64,{b64}" | |
| # ββ Inference ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def infer_camera_edit( | |
| image, azimuth, elevation, distance, | |
| seed, randomize_seed, guidance_scale, | |
| num_inference_steps, height, width, | |
| ): | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| generator = torch.Generator(device=device).manual_seed(seed) | |
| prompt = build_camera_prompt(azimuth, elevation, distance) | |
| result = pipe( | |
| image=image, | |
| prompt=prompt, | |
| height=height, | |
| width=width, | |
| guidance_scale=guidance_scale, | |
| num_inference_steps=num_inference_steps, | |
| generator=generator, | |
| ).images[0] | |
| return result, seed, prompt | |
| # ββ gr.HTML templates ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Using plain gr.HTML (no subclass) with a dict value. | |
| # | |
| # Gradio 6 passes the dict as `value` to the template; all keys (img, az, el, | |
| # dist) are accessible as value.img, value.az, etc. in both ${} and {{}} syntax. | |
| HTML_TEMPLATE = """ | |
| <div class="cv-wrap"> | |
| {{#if value.img}} | |
| <img class="cv-img" src="{{value.img}}"> | |
| {{else}} | |
| <div class="cv-empty"> | |
| <svg class="cv-empty-icon" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="1.25"> | |
| <path stroke-linecap="round" stroke-linejoin="round" d="M6.827 6.175A2.31 2.31 0 015.186 7.23c-.38.054-.757.112-1.134.175C2.999 7.58 2.25 8.507 2.25 9.574V18a2.25 2.25 0 002.25 2.25h15A2.25 2.25 0 0021.75 18V9.574c0-1.067-.75-1.994-1.802-2.169a47.865 47.865 0 00-1.134-.175 2.31 2.31 0 01-1.64-1.055l-.822-1.316a2.192 2.192 0 00-1.736-1.039 48.774 48.774 0 00-5.232 0 2.192 2.192 0 00-1.736 1.039l-.821 1.316z" /> | |
| <path stroke-linecap="round" stroke-linejoin="round" d="M16.5 12.75a4.5 4.5 0 11-9 0 4.5 4.5 0 019 0zM18.75 10.5h.008v.008h-.008V10.5z" /> | |
| </svg> | |
| <p class="cv-empty-title">No image loaded</p> | |
| <p class="cv-empty-sub">Upload an image on the left, then hover here to see camera controls</p> | |
| </div> | |
| {{/if}} | |
| <div class="cv-hud"> | |
| <div class="cv-readout"> | |
| <span class="cv-lbl">Az</span><span class="cv-val">${value.az}°</span> | |
| <span class="cv-sep">/</span> | |
| <span class="cv-lbl">El</span><span class="cv-val">${value.el}°</span> | |
| <span class="cv-sep">/</span> | |
| <span class="cv-lbl">Dist</span><span class="cv-val">${value.dist}×</span> | |
| </div> | |
| <div class="cv-controls"> | |
| <div class="cv-dpad"> | |
| <button class="cv-btn cv-up" data-action="el-plus" title="Elevate">▲</button> | |
| <button class="cv-btn cv-left" data-action="az-minus" title="Rotate Left">◀</button> | |
| <div class="cv-dot"></div> | |
| <button class="cv-btn cv-right" data-action="az-plus" title="Rotate Right">▶</button> | |
| <button class="cv-btn cv-down" data-action="el-minus" title="Lower">▼</button> | |
| </div> | |
| <div class="cv-zoom"> | |
| <button class="cv-zbtn" data-action="dist-minus" title="Zoom In">+</button> | |
| <button class="cv-zbtn" data-action="dist-plus" title="Zoom Out">−</button> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| CSS_TEMPLATE = """ | |
| *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } | |
| /* ββ Image well ββ dark neutral so images pop, same treatment as any | |
| professional image editor / camera app preview area. Not a stylistic | |
| choice but a functional one: images render best against dark. */ | |
| .cv-wrap { | |
| position: relative; | |
| width: 100%; height: 500px; | |
| background: #1c1c1e; | |
| border-radius: 12px; | |
| overflow: hidden; | |
| display: flex; align-items: center; justify-content: center; | |
| } | |
| .cv-img { | |
| max-width: 100%; max-height: 100%; | |
| object-fit: contain; display: block; | |
| } | |
| /* empty state */ | |
| .cv-empty { | |
| text-align: center; user-select: none; | |
| display: flex; flex-direction: column; align-items: center; gap: 14px; | |
| } | |
| .cv-empty-icon { | |
| width: 52px; height: 52px; | |
| color: rgba(255,255,255,0.2); | |
| } | |
| .cv-empty-title { | |
| font-size: 15px; font-weight: 500; letter-spacing: -0.01em; | |
| color: rgba(255,255,255,0.45); | |
| } | |
| .cv-empty-sub { | |
| font-size: 13px; max-width: 230px; line-height: 1.65; | |
| color: rgba(255,255,255,0.25); | |
| } | |
| /* HUD β fades in on hover via CSS, no JS needed */ | |
| .cv-hud { | |
| position: absolute; bottom: 16px; right: 16px; | |
| display: flex; flex-direction: column; align-items: flex-end; gap: 8px; | |
| opacity: 0; transition: opacity 0.16s ease; pointer-events: auto; | |
| } | |
| .cv-wrap:hover .cv-hud { opacity: 1; } | |
| /* coordinate readout β white card floating over image */ | |
| .cv-readout { | |
| display: flex; align-items: center; gap: 8px; | |
| background: rgba(255,255,255,0.96); | |
| border-radius: 7px; padding: 5px 13px; | |
| font-size: 12px; white-space: nowrap; | |
| box-shadow: 0 2px 12px rgba(0,0,0,0.25); | |
| } | |
| .cv-lbl { color: #9ca3af; font-size: 10px; text-transform: uppercase; letter-spacing: 0.04em; } | |
| .cv-val { color: #111827; font-weight: 600; font-variant-numeric: tabular-nums; } | |
| .cv-sep { color: #d1d5db; margin: 0 2px; } | |
| /* controls panel β white card, same treatment as readout */ | |
| .cv-controls { | |
| display: flex; align-items: center; gap: 8px; | |
| background: rgba(255,255,255,0.96); | |
| border-radius: 10px; padding: 8px 10px; | |
| box-shadow: 0 2px 12px rgba(0,0,0,0.25); | |
| } | |
| /* d-pad */ | |
| .cv-dpad { | |
| display: grid; | |
| grid-template-columns: repeat(3, 32px); | |
| grid-template-rows: repeat(3, 32px); | |
| gap: 3px; | |
| } | |
| .cv-btn { | |
| width: 32px; height: 32px; | |
| border: 1px solid #e5e7eb; border-radius: 6px; | |
| background: #ffffff; color: #6b7280; | |
| font-size: 10px; cursor: pointer; | |
| display: flex; align-items: center; justify-content: center; | |
| transition: background 0.1s, border-color 0.1s, color 0.1s, transform 0.08s; | |
| padding: 0; line-height: 1; | |
| } | |
| /* orange matches Gradio Default theme primary */ | |
| .cv-btn:hover { | |
| background: #fff7ed; border-color: #f97316; color: #f97316; | |
| transform: scale(1.1); | |
| } | |
| .cv-btn:active { transform: scale(0.92); background: #ffedd5; } | |
| .cv-up { grid-column:2; grid-row:1; } | |
| .cv-left { grid-column:1; grid-row:2; } | |
| .cv-dot { | |
| grid-column:2; grid-row:2; | |
| width:32px; height:32px; border-radius:50%; | |
| background: #f9fafb; border: 1px solid #e5e7eb; | |
| } | |
| .cv-right { grid-column:3; grid-row:2; } | |
| .cv-down { grid-column:2; grid-row:3; } | |
| /* zoom column */ | |
| .cv-zoom { display: flex; flex-direction: column; gap: 3px; } | |
| .cv-zbtn { | |
| width: 32px; height: 38px; | |
| border: 1px solid #e5e7eb; border-radius: 6px; | |
| background: #ffffff; color: #6b7280; | |
| font-size: 16px; font-weight: 400; cursor: pointer; | |
| display: flex; align-items: center; justify-content: center; | |
| transition: background 0.1s, border-color 0.1s, color 0.1s, transform 0.08s; | |
| padding: 0; line-height: 1; | |
| } | |
| .cv-zbtn:hover { | |
| background: #fff7ed; border-color: #f97316; color: #f97316; | |
| transform: scale(1.1); | |
| } | |
| .cv-zbtn:active { transform: scale(0.92); background: #ffedd5; } | |
| """ | |
| JS_ON_LOAD = """ | |
| const DIST_STEPS = [0.6, 1.0, 1.8]; | |
| function snapDist(d) { | |
| return DIST_STEPS.reduce((p, c) => Math.abs(c - d) < Math.abs(p - d) ? c : p); | |
| } | |
| function shiftDist(d, dir) { | |
| const idx = DIST_STEPS.indexOf(snapDist(Number(d))); | |
| return DIST_STEPS[Math.max(0, Math.min(DIST_STEPS.length - 1, idx + dir))]; | |
| } | |
| // Delegated click listener β attached once, survives template re-renders. | |
| element.addEventListener('click', function(e) { | |
| const btn = e.target.closest('[data-action]'); | |
| if (!btn) return; | |
| const v = Object.assign({}, props.value); | |
| let az = Number(v.az) || 0; | |
| let el = Number(v.el) || 0; | |
| let dist = Number(v.dist) || 1.0; | |
| switch (btn.dataset.action) { | |
| case 'az-minus': az = (az - 45 + 360) % 360; break; | |
| case 'az-plus': az = (az + 45) % 360; break; | |
| case 'el-plus': el = Math.min(60, el + 30); break; | |
| case 'el-minus': el = Math.max(-30, el - 30); break; | |
| case 'dist-minus': dist = shiftDist(dist, -1); break; | |
| case 'dist-plus': dist = shiftDist(dist, +1); break; | |
| } | |
| props.value = { ...v, az, el, dist }; | |
| trigger('submit'); | |
| }); | |
| """ | |
| # ββ Global Gradio CSS ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| GLOBAL_CSS = """ | |
| /* ββ Row: never let the two columns wrap ββ */ | |
| /* Gradio 6 renders rows as flex containers with class "flex" */ | |
| .gradio-container .flex.flex-row, | |
| .gradio-container .row { | |
| flex-wrap: nowrap !important; | |
| } | |
| /* ββ Header ββ */ | |
| .app-heading { padding: 28px 0 20px; } | |
| .app-heading h1 { | |
| font-size: clamp(24px, 3.5vw, 36px); | |
| font-weight: 700; | |
| letter-spacing: -0.02em; | |
| line-height: 1.1; | |
| color: #111827; | |
| margin: 0 0 10px; | |
| } | |
| .app-heading .chips { | |
| display: flex; flex-wrap: wrap; gap: 6px; | |
| } | |
| .app-heading .chip { | |
| display: inline-flex; align-items: center; gap: 5px; | |
| padding: 3px 10px; | |
| background: #fff7ed; | |
| border: 1px solid #fed7aa; | |
| border-radius: 999px; | |
| font-size: 12px; font-weight: 500; | |
| color: #c2410c; | |
| line-height: 1.5; | |
| } | |
| .app-heading .chip svg { | |
| width: 12px; height: 12px; opacity: 0.7; | |
| } | |
| /* ββ Controls column β subtle card to separate it from viewer ββ */ | |
| .controls-col > .block, | |
| .controls-col > .form { | |
| background: #fafafa !important; | |
| } | |
| /* ββ Camera viewer column label ββ */ | |
| .viewer-label { | |
| font-size: 13px; font-weight: 600; | |
| color: #374151; | |
| margin-bottom: 8px; | |
| display: flex; align-items: center; gap: 8px; | |
| } | |
| .viewer-label .hint { | |
| font-weight: 400; color: #9ca3af; font-size: 12px; | |
| } | |
| /* ββ Status display ββ replaces the plain textbox look */ | |
| .status-row { | |
| display: flex; align-items: center; gap: 8px; | |
| padding: 8px 12px; | |
| background: #f9fafb; | |
| border: 1px solid #e5e7eb; | |
| border-radius: 8px; | |
| margin-top: 6px; | |
| font-size: 12px; | |
| font-family: ui-monospace, "Cascadia Code", "Source Code Pro", monospace; | |
| color: #6b7280; | |
| min-height: 38px; | |
| } | |
| /* status textbox β reduce visual weight */ | |
| .status-box textarea { | |
| font-family: ui-monospace, "Cascadia Code", "Source Code Pro", monospace !important; | |
| font-size: 12px !important; | |
| color: #374151 !important; | |
| background: #f9fafb !important; | |
| border-color: #e5e7eb !important; | |
| resize: none !important; | |
| } | |
| /* ββ Prompt box ββ */ | |
| .prompt-box textarea { | |
| font-family: ui-monospace, "Cascadia Code", "Source Code Pro", monospace !important; | |
| font-size: 12px !important; | |
| color: #6b7280 !important; | |
| } | |
| """ | |
| GRADIO_THEME = gr.themes.Default() | |
| # ββ App ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def create_app(): | |
| # FIX: theme and css are now passed to launch(), not gr.Blocks() | |
| with gr.Blocks(title="3D Camera View Generator") as demo: | |
| gr.HTML(""" | |
| <div class="app-heading"> | |
| <h1>3D Camera View Generator</h1> | |
| <div class="chips"> | |
| <span class="chip"> | |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="currentColor"><path d="M9.653 16.915l-.005-.003-.019-.01a20.759 20.759 0 01-1.162-.682 22.045 22.045 0 01-2.582-2.085c-1.034-1.036-2.035-2.329-2.535-3.765-.583-1.683-.322-3.498.985-4.82C5.576 4.29 7.319 3.75 9 3.75c.921 0 1.85.205 2.704.596L13 3.25l1.304 1.304L13 5.858a6.001 6.001 0 010 8.284l-.707.707-2.64-2.64z"/></svg> | |
| Qwen Image Edit 2511 | |
| </span> | |
| <span class="chip">β‘ Lightning LoRA</span> | |
| <span class="chip">π Multi-Angle LoRA</span> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| # ββ Left column ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=4, min_width=200, elem_classes=["controls-col"]): | |
| image_input = gr.Image( | |
| label="Source Image", | |
| type="pil", | |
| height=320, | |
| ) | |
| prompt_box = gr.Textbox( | |
| label="Active Camera Prompt", | |
| value="<sks> front view eye-level shot medium shot", | |
| interactive=False, | |
| lines=1, | |
| elem_classes=["prompt-box"], | |
| ) | |
| with gr.Accordion("β Generation Settings", open=False): | |
| seed_slider = gr.Slider(0, MAX_SEED, value=42, step=1, label="Seed") | |
| rand_seed_cb = gr.Checkbox(True, label="Randomise seed each generation") | |
| guidance_sl = gr.Slider(1.0, 20.0, value=1.0, step=0.1, label="Guidance Scale (keep β€1 for Lightning LoRA)") | |
| steps_sl = gr.Slider(1, 50, value=4, step=1, label="Inference Steps") | |
| width_sl = gr.Slider(256, 1024, value=1024, step=32, label="Width (px)") | |
| height_sl = gr.Slider(256, 1024, value=1024, step=32, label="Height (px)") | |
| # ββ Right column βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=6, min_width=280): | |
| gr.HTML(""" | |
| <div class="viewer-label"> | |
| Camera View | |
| <span class="hint">β hover to reveal orbit controls</span> | |
| </div> | |
| """) | |
| # FIX: plain gr.HTML with dict value β no subclass, no inspect error | |
| cam_view = gr.HTML( | |
| value=DEFAULT_CAM_VALUE, | |
| html_template=HTML_TEMPLATE, | |
| css_template=CSS_TEMPLATE, | |
| js_on_load=JS_ON_LOAD, | |
| apply_default_css=False, | |
| ) | |
| status_box = gr.Textbox( | |
| label="Status", | |
| value="Ready β upload an image to begin", | |
| interactive=False, | |
| lines=1, | |
| elem_classes=["status-box"], | |
| ) | |
| gallery_state = gr.State([]) | |
| with gr.Accordion("πΌ Generated Views", open=False): | |
| gallery = gr.Gallery( | |
| label="", | |
| show_label=False, | |
| columns=4, | |
| height="auto", | |
| object_fit="cover", | |
| allow_preview=True, | |
| ) | |
| # ββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _coerce_view(v): | |
| """Extract (az, el, dist) safely from a dict or default.""" | |
| if isinstance(v, dict): | |
| return float(v.get("az", 0)), float(v.get("el", 0)), float(v.get("dist", 1.0)) | |
| return 0.0, 0.0, 1.0 | |
| def _auto_dimensions(img): | |
| if img is None: | |
| return 1024, 1024 | |
| w, h = img.size | |
| ar = w / h | |
| if ar > 1: | |
| nw = 1024 | |
| nh = round(1024 / ar / 32) * 32 | |
| else: | |
| nh = 1024 | |
| nw = round(1024 * ar / 32) * 32 | |
| return max(256, min(1024, nw)), max(256, min(1024, nh)) | |
| # ββ Event handlers ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def on_image_upload(img, current_view): | |
| nw, nh = _auto_dimensions(img) | |
| if img is None: | |
| return DEFAULT_CAM_VALUE.copy(), nw, nh, "No image" | |
| az, el, dist = _coerce_view(current_view) | |
| return ( | |
| {"img": pil_to_data_url(img), "az": az, "el": el, "dist": dist}, | |
| nw, | |
| nh, | |
| "Image loaded β hover the viewer and click an arrow to generate", | |
| ) | |
| def on_camera_submit( | |
| current_view, src_img, | |
| seed_val, rand_seed, guidance, steps, h, w, | |
| gallery_imgs, | |
| ): | |
| try: | |
| az, el, dist = _coerce_view(current_view) | |
| prompt = build_camera_prompt(az, el, dist) | |
| if src_img is None: | |
| return current_view, prompt, "β Upload an image first", gallery_imgs, gallery_imgs | |
| gen_img, final_seed, final_prompt = infer_camera_edit( | |
| image=src_img, | |
| azimuth=az, elevation=el, distance=dist, | |
| seed=seed_val, randomize_seed=rand_seed, | |
| guidance_scale=guidance, | |
| num_inference_steps=int(steps), | |
| height=int(h), width=int(w), | |
| ) | |
| new_view = {"img": pil_to_data_url(gen_img), "az": az, "el": el, "dist": dist} | |
| gallery_imgs = list(gallery_imgs) + [gen_img] | |
| status = f"β {final_prompt} | seed {final_seed}" | |
| return new_view, final_prompt, status, gallery_imgs, gallery_imgs | |
| except Exception as exc: | |
| return current_view, "", f"β {str(exc)}", gallery_imgs, gallery_imgs | |
| image_input.upload( | |
| fn=on_image_upload, | |
| inputs=[image_input, cam_view], | |
| outputs=[cam_view, width_sl, height_sl, status_box], | |
| ) | |
| cam_view.submit( | |
| fn=on_camera_submit, | |
| inputs=[ | |
| cam_view, image_input, | |
| seed_slider, rand_seed_cb, guidance_sl, steps_sl, | |
| height_sl, width_sl, | |
| gallery_state, | |
| ], | |
| outputs=[cam_view, prompt_box, status_box, gallery_state, gallery], | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo = create_app() | |
| # FIX: theme and css passed to launch() as required by Gradio 6.0 | |
| demo.launch( | |
| debug=True, | |
| theme=GRADIO_THEME, | |
| css=GLOBAL_CSS, | |
| ) |