multimodalart's picture
multimodalart HF Staff
lock proportion for corners
c65f051 verified
import gradio as gr
import numpy as np
import random
import torch
import spaces
import base64
import json
import io
import os
from io import BytesIO
from PIL import Image, ImageDraw
from diffusers import Flux2KleinPipeline
from huggingface_hub import InferenceClient
MAX_SEED = np.iinfo(np.int32).max
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
# ── Load main pipeline ────────────────────────────────────────────────────────
# REPO_ID = "black-forest-labs/FLUX.2-klein-base-9B"
REPO_ID = "black-forest-labs/FLUX.2-klein-9B"
print("Loading 9B Base model...")
pipe = Flux2KleinPipeline.from_pretrained(REPO_ID, torch_dtype=dtype)
pipe.to("cuda")
# pipe.load_lora_weights("linoyts/flux2-klein-lora-v3", weight_name="pytorch_lora_weights_v4.safetensors")
pipe.load_lora_weights("linoyts/flux2-klein-bbox-drag-drop-lora", weight_name="pytorch_lora_weights.safetensors")
pipe.fuse_lora(lora_scale=1.25) # for the few step inference might work with higher scale
print("Pipeline ready.")
# ── VLM client for object detection + labeling ───────────────────────────────
hf_client = InferenceClient(api_key=os.environ.get("HF_TOKEN"))
VLM_MODEL = "Qwen/Qwen3-VL-8B-Instruct"
DEFAULT_PROMPT = (
"Move the object inside the red bounding box to the position and size "
"indicated by the green bounding box. Remove the object from its original "
"location in the red box, filling in the background naturally. Remove the "
"bounding boxes and seamlessly blend the repositioned object into the scene, "
"preserving all other objects and the background exactly as they are."
)
OBJECT_PROMPT_TEMPLATE = (
"Move the {object_name} inside the red bounding box to the position and size "
"indicated by the green bounding box. Remove the {object_name} from its original "
"location in the red box, filling in the background naturally. Remove the "
"bounding boxes and seamlessly blend the repositioned {object_name} into the scene, "
"preserving all other objects and the background exactly as they are."
)
# ── Helpers ───────────────────────────────────────────────────────────────────
def b64_to_pil(b64_str):
if not b64_str or not b64_str.startswith("data:image"):
return None
try:
_, data = b64_str.split(',', 1)
return Image.open(BytesIO(base64.b64decode(data))).convert("RGB")
except Exception as e:
print(f"Error decoding image: {e}")
return None
def pil_to_b64(pil_img, max_dim=128):
"""Encode a PIL image as a small JPEG data URI (for thumbnails)."""
thumb = pil_img.copy()
thumb.thumbnail((max_dim, max_dim), Image.LANCZOS)
buf = BytesIO()
thumb.save(buf, format="JPEG", quality=80)
return "data:image/jpeg;base64," + base64.b64encode(buf.getvalue()).decode()
def image_to_data_uri(img):
buf = BytesIO()
img.save(buf, format="PNG")
return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode()
def burn_boxes_onto_image(pil_image, boxes_json_str):
if not pil_image:
return pil_image
try:
boxes = json.loads(boxes_json_str) if boxes_json_str and boxes_json_str.strip() else {}
except Exception:
boxes = {}
src = boxes.get("src")
dst = boxes.get("dst")
if not src or not dst:
return pil_image
img = pil_image.copy().convert("RGB")
w, h = img.size
draw = ImageDraw.Draw(img)
bw = max(4, w // 150)
# Green = destination (drawn first, thicker so it peeks out under red)
draw.rectangle([int(dst["x1"]*w), int(dst["y1"]*h), int(dst["x2"]*w), int(dst["y2"]*h)],
outline=(0, 255, 0), width=bw + 3)
# Red = source (drawn on top)
draw.rectangle([int(src["x1"]*w), int(src["y1"]*h), int(src["x2"]*w), int(src["y2"]*h)],
outline=(255, 0, 0), width=bw)
return img
# ── Object detection ──────────────────────────────────────────────────────────
def detect_objects(b64_str):
"""Detect objects via Qwen3-VL-8B API — single call for both bboxes and labels."""
pil_image = b64_to_pil(b64_str)
if pil_image is None:
return "[]"
try:
w, h = pil_image.size
# Encode as JPEG for the API call (much smaller than PNG)
buf = BytesIO()
pil_image.save(buf, format="JPEG", quality=85)
img_uri = "data:image/jpeg;base64," + base64.b64encode(buf.getvalue()).decode()
content = [
{"type": "image_url", "image_url": {"url": img_uri}},
{"type": "text", "text": (
f"Detect the main objects in this image that a user might want to move or reposition. "
f"The image is {w}x{h} pixels. "
"Focus on distinct, interesting subjects — people, animals, characters, vehicles, "
"prominent items. Skip background elements like sky, ground, walls, wires, shadows.\n\n"
"Return at most 5-7 objects. For each, provide a specific label (2-5 words) "
"and its bounding box in absolute pixel coordinates.\n\n"
"Return ONLY a JSON array, no other text:\n"
'[{"label": "descriptive name", "bbox_2d": [x1, y1, x2, y2]}, ...]'
)},
]
resp = hf_client.chat.completions.create(
model=VLM_MODEL,
messages=[{"role": "user", "content": content}],
extra_body={"chat_template_kwargs": {"enable_thinking": False}},
max_tokens=1024,
)
raw = resp.choices[0].message.content.strip()
# Extract JSON from response (handle markdown code blocks)
if "```" in raw:
raw = raw.split("```")[1]
if raw.startswith("json"):
raw = raw[4:]
raw = raw.strip()
detections = json.loads(raw)
if not isinstance(detections, list) or not detections:
print(f"VLM returned no detections. Raw: {raw[:200]}")
return "[]"
# Build result with normalized coords and crop thumbnails
objects = []
for det in detections:
label = det.get("label", "object")
bbox = det.get("bbox_2d")
if not bbox or len(bbox) != 4:
continue
x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])
# Qwen3-VL returns 0-1000 normalized coords — convert to pixels
x1 = int(x1 * w / 1000)
y1 = int(y1 * h / 1000)
x2 = int(x2 * w / 1000)
y2 = int(y2 * h / 1000)
x1, y1 = max(0, x1), max(0, y1)
x2, y2 = min(w, x2), min(h, y2)
if x2 - x1 < 5 or y2 - y1 < 5:
continue
crop = pil_image.crop([x1, y1, x2, y2])
objects.append({
"label": label,
"bbox": {"x1": x1 / w, "y1": y1 / h, "x2": x2 / w, "y2": y2 / h},
"thumb": pil_to_b64(crop),
})
print(f"Detected {len(objects)} objects via Qwen3-VL")
return json.dumps(objects)
except Exception as e:
print(f"Object detection failed: {e}")
import traceback
traceback.print_exc()
return "[]"
def update_dimensions_on_upload(b64_str):
image = b64_to_pil(b64_str)
if image is None:
return 1024, 1024
ow, oh = image.size
if ow > oh:
nw = 1024
nh = int(nw * oh / ow)
else:
nh = 1024
nw = int(nh * ow / oh)
return (nw // 8) * 8, (nh // 8) * 8
# ── Inference ─────────────────────────────────────────────────────────────────
@spaces.GPU
def infer(b64_str, boxes_json, prompt, seed, randomize_seed,
guidance_scale, num_inference_steps, height, width):
progress = gr.Progress(track_tqdm=True)
if not prompt or prompt.strip() == "":
raise gr.Error("Please enter a prompt.")
source_image = b64_to_pil(b64_str)
if source_image is None:
raise gr.Error("Please upload an image first.")
try:
boxes = json.loads(boxes_json) if boxes_json and boxes_json.strip() else {}
except Exception:
boxes = {}
if not boxes.get("src") or not boxes.get("dst"):
raise gr.Error("Draw a box around the subject, then move it to the target location.")
progress(0.3, desc="Burning bounding boxes...")
marked = burn_boxes_onto_image(source_image, boxes_json)
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator(device=device).manual_seed(seed)
progress(0.4, desc=f"Running inference ({num_inference_steps} steps)...")
result = pipe(
image=[marked],
prompt=prompt,
num_inference_steps=num_inference_steps,
generator=generator,
guidance_scale=guidance_scale,
).images[0]
progress(1.0, desc="Done!")
return result, seed, marked
# ── CSS ───────────────────────────────────────────────────────────────────────
css = r"""
@import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500;600&display=swap');
*{box-sizing:border-box;margin:0;padding:0}
body,.gradio-container{background:#0f0f13!important;font-family:'DM Sans',system-ui,sans-serif!important;font-size:14px!important;color:#e4e4e7!important;min-height:100vh}
footer{display:none!important}
.hidden-input{display:none!important;height:0!important;overflow:hidden!important;margin:0!important;padding:0!important}
.app-shell{background:#18181b;border:1px solid #27272a;border-radius:16px;margin:12px auto;max-width:1400px;overflow:hidden;box-shadow:0 25px 50px -12px rgba(0,0,0,.6)}
.app-header{background:linear-gradient(135deg,#18181b 0%,#1a1a2e 50%,#18181b 100%);border-bottom:1px solid #27272a;padding:16px 24px;display:flex;align-items:center;gap:14px}
.app-logo{width:38px;height:38px;flex-shrink:0}
.app-logo svg{width:38px;height:38px}
.app-header-text{display:flex;flex-direction:column;gap:2px;flex:1;min-width:0}
.app-header-top{display:flex;align-items:center;gap:10px;flex-wrap:wrap}
.app-title{font-size:17px;font-weight:700;color:#f4f4f5;letter-spacing:-0.01em;white-space:nowrap}
.app-title .klein{color:#a5b4fc}
.app-badge{font-size:10px;font-weight:600;padding:2px 8px;border-radius:4px;background:rgba(129,140,248,.12);color:#818cf8;border:1px solid rgba(129,140,248,.2);letter-spacing:0.5px;text-transform:uppercase}
.app-subtitle{font-size:12.5px;color:#71717a;line-height:1.4}
.app-model-link{display:inline-flex;align-items:center;gap:5px;font-size:11.5px;font-weight:500;padding:5px 12px;border-radius:6px;background:rgba(255,255,255,.04);color:#71717a;border:1px solid #27272a;text-decoration:none;white-space:nowrap;transition:all .15s;flex-shrink:0}
.app-model-link:hover{color:#a5b4fc;border-color:rgba(129,140,248,.3);background:rgba(129,140,248,.08)}
.app-toolbar{background:#18181b;border-bottom:1px solid #27272a;padding:8px 16px;display:flex;gap:4px;align-items:center;flex-wrap:wrap}
.tb-sep{width:1px;height:28px;background:#27272a;margin:0 8px}
.modern-tb-btn{display:inline-flex;align-items:center;gap:6px;min-width:32px;height:34px;background:transparent;border:1px solid transparent;border-radius:8px;cursor:pointer;font-size:13px;font-weight:600;padding:0 12px;font-family:'DM Sans',sans-serif;color:#e4e4e7!important;transition:all .15s}
.modern-tb-btn:hover{background:rgba(129,140,248,.15);border-color:rgba(129,140,248,.3)}
.modern-tb-btn.active{background:rgba(129,140,248,.25);border-color:rgba(129,140,248,.45)}
.app-main-row{display:flex;gap:0;overflow:hidden}
.app-main-left{flex:1;display:flex;flex-direction:column;min-width:0;border-right:1px solid #27272a}
.app-main-right{flex:1;display:flex;flex-direction:column;min-width:0;background:#18181b;position:relative}
/* Right panel views */
#right-objects-view{display:flex;flex-direction:column;height:100%}
#right-output-view{display:none;flex-direction:column;height:100%}
#right-output-view.active{display:flex}
#right-objects-view.hidden{display:none}
/* Object cards */
.obj-panel-title{padding:12px 20px;font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid #27272a;display:flex;align-items:center;justify-content:space-between}
.obj-list{flex:1;overflow-y:auto;padding:8px 12px;display:flex;flex-direction:column;gap:6px}
.obj-card{display:flex;align-items:center;gap:12px;padding:10px 12px;border:2px solid #27272a;border-radius:10px;cursor:pointer;transition:all .15s;background:#09090b;user-select:none}
.obj-card:hover{border-color:rgba(129,140,248,.4);background:#1e1e24}
.obj-card.selected{border-color:#818cf8;background:rgba(129,140,248,.1);box-shadow:0 0 0 1px rgba(129,140,248,.2)}
.obj-card .obj-thumb{width:52px;height:52px;border-radius:8px;object-fit:cover;flex-shrink:0;background:#27272a}
.obj-card .obj-label{font-size:14px;font-weight:600;color:#e4e4e7;line-height:1.3}
.obj-card .obj-hint{font-size:11px;color:#52525b;margin-top:2px}
.obj-detecting{display:flex;align-items:center;justify-content:center;flex:1;gap:12px;color:#71717a;font-size:13px}
.obj-detecting .loader-spinner{width:20px;height:20px;border:2px solid #27272a;border-top-color:#818cf8;border-radius:50%;animation:spin .8s linear infinite}
.obj-empty{display:flex;align-items:center;justify-content:center;flex:1;color:#3f3f46;font-size:13px;text-align:center;padding:20px}
.back-link{display:inline-flex;align-items:center;gap:4px;padding:8px 16px;font-size:12px;font-weight:600;color:#818cf8;cursor:pointer;border:none;background:none;font-family:'DM Sans',sans-serif}
.back-link:hover{color:#a5b4fc}
/* Output area */
.app-main-right .out-body{background:#09090b;display:flex;align-items:flex-start;justify-content:center;overflow:hidden;position:relative;flex:1}
.app-main-right .out-body img,.app-main-right .out-body .oimg{max-width:100%;max-height:520px;display:block}
.app-main-right .out-placeholder{color:#3f3f46;font-size:13px;text-align:center;padding:20px}
/* Accordion */
.acc-toggle{display:flex;align-items:center;gap:8px;padding:10px 0;cursor:pointer;user-select:none;border:none;background:none;width:100%;font-family:'DM Sans',sans-serif;font-size:13px;font-weight:600;color:#a1a1aa;text-transform:uppercase;letter-spacing:.6px}
.acc-toggle:hover{color:#e4e4e7}
.acc-toggle .acc-arrow{transition:transform .2s;font-size:11px}
.acc-toggle.open .acc-arrow{transform:rotate(90deg)}
.acc-body{display:none;padding-bottom:12px}
.acc-body.open{display:block}
#bbox-draw-wrap{position:relative;background:#09090b;min-height:440px;overflow:hidden;cursor:crosshair}
#bbox-draw-canvas{display:block;margin:0 auto}
#bbox-status{position:absolute;top:12px;left:12px;background:rgba(129,140,248,.9);color:#fff;padding:4px 12px;font-family:'JetBrains Mono',monospace;font-size:12px;font-weight:500;border-radius:6px;z-index:10;display:none;pointer-events:none}
#bbox-count{position:absolute;top:12px;right:12px;background:rgba(24,24,27,.9);color:#a5b4fc;padding:4px 12px;font-family:'JetBrains Mono',monospace;font-size:12px;font-weight:600;border-radius:6px;border:1px solid rgba(129,140,248,.3);z-index:10;display:none}
.upload-prompt-modern{position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);z-index:20}
.upload-click-area{display:flex;flex-direction:column;align-items:center;justify-content:center;cursor:pointer;padding:36px 44px;border:2px dashed #3f3f46;border-radius:16px;background:rgba(129,140,248,.03);transition:all .2s;gap:8px}
.upload-click-area:hover{background:rgba(129,140,248,.08);border-color:#818cf8;transform:scale(1.03)}
.upload-click-area svg{width:80px;height:80px}
.upload-main-text{color:#71717a;font-size:14px;font-weight:500}
.upload-sub-text{color:#52525b;font-size:12px}
.hint-bar{background:rgba(129,140,248,.06);border-top:1px solid #27272a;border-bottom:1px solid #27272a;padding:10px 20px;font-size:13px;color:#a1a1aa;line-height:1.7}
.hint-bar b{color:#c7d2fe;font-weight:600}
.modern-textarea{width:100%;background:#09090b;border:1px solid #27272a;border-radius:8px;padding:10px 14px;font-family:'DM Sans',sans-serif;font-size:14px;color:#e4e4e7;resize:vertical;outline:none;min-height:42px;transition:border-color .2s}
.modern-textarea:focus{border-color:#818cf8;box-shadow:0 0 0 3px rgba(129,140,248,.15)}
.modern-textarea::placeholder{color:#3f3f46}
.modern-label{font-size:13px;font-weight:500;color:#a1a1aa;margin-bottom:4px;display:block}
.subject-box-wrap{display:flex;flex-direction:column;gap:4px}
.subject-input{width:100%;background:#09090b;border:1px solid #27272a;border-radius:8px;padding:10px 14px;font-family:'DM Sans',sans-serif;font-size:14px;color:#e4e4e7;outline:none;transition:border-color .2s}
.subject-input:focus{border-color:#818cf8;box-shadow:0 0 0 3px rgba(129,140,248,.15)}
.subject-input::placeholder{color:#3f3f46}
.btn-run{display:flex;align-items:center;justify-content:center;gap:8px;width:100%;background:linear-gradient(135deg,#818cf8,#6366f1);border:none;border-radius:10px;padding:12px 24px;cursor:pointer;font-size:15px;font-weight:600;font-family:'DM Sans',sans-serif;color:#fff;transition:all .2s;box-shadow:0 4px 16px rgba(129,140,248,.3),inset 0 1px 0 rgba(255,255,255,.1)}
.btn-run:hover{background:linear-gradient(135deg,#a5b4fc,#818cf8);box-shadow:0 6px 24px rgba(129,140,248,.45);transform:translateY(-1px)}
.output-frame{border-bottom:1px solid #27272a;display:flex;flex-direction:column}
.output-frame .out-title{padding:10px 20px;font-size:13px;font-weight:700;color:#e4e4e7;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6)}
.output-frame .out-body{flex:1;background:#09090b;display:flex;align-items:center;justify-content:center;overflow:hidden;min-height:180px;position:relative}
.output-frame .out-body img,.output-frame .out-body .oimg{max-width:100%;max-height:520px;display:block}
.output-frame .out-placeholder,.app-main-right .out-placeholder{color:#3f3f46;font-size:13px;text-align:center;padding:20px}
.modern-loader{display:none;position:absolute;top:0;left:0;right:0;bottom:0;background:rgba(9,9,11,.92);z-index:15;flex-direction:column;align-items:center;justify-content:center;gap:12px}
.modern-loader.active{display:flex}
.modern-loader .loader-spinner{width:36px;height:36px;border:3px solid #27272a;border-top-color:#818cf8;border-radius:50%;animation:spin .8s linear infinite}
@keyframes spin{to{transform:rotate(360deg)}}
.modern-loader .loader-text{font-size:13px;color:#a1a1aa;font-weight:500}
.modern-loader .loader-steps{font-family:'JetBrains Mono',monospace;font-size:12px;color:#818cf8;font-weight:600}
.loader-bar-track{width:220px;height:6px;background:#27272a;border-radius:3px;overflow:hidden}
.loader-bar-fill{height:100%;width:0%;background:linear-gradient(90deg,#6366f1,#818cf8,#a5b4fc);border-radius:3px;transition:width 0.3s ease}
.settings-group{border:1px solid #27272a;border-radius:10px;margin:12px 16px;overflow:hidden}
.settings-group-title{font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;padding:10px 16px;border-bottom:1px solid #27272a;background:rgba(24,24,27,.5)}
.settings-group-body{padding:14px 16px;display:flex;flex-direction:column;gap:12px}
.slider-row{display:flex;align-items:center;gap:10px;min-height:28px}
.slider-row label{font-size:13px;font-weight:500;color:#a1a1aa;min-width:72px;flex-shrink:0}
.slider-row input[type="range"]{flex:1;-webkit-appearance:none;height:6px;background:#27272a;border-radius:3px;outline:none}
.slider-row input[type="range"]::-webkit-slider-thumb{-webkit-appearance:none;width:16px;height:16px;background:linear-gradient(135deg,#818cf8,#6366f1);border-radius:50%;cursor:pointer;box-shadow:0 2px 6px rgba(129,140,248,.4)}
.slider-row .slider-val{min-width:52px;text-align:right;font-family:'JetBrains Mono',monospace;font-size:12px;font-weight:500;padding:3px 8px;background:#09090b;border:1px solid #27272a;border-radius:6px;color:#a1a1aa}
.slider-row input[type="number"].slider-num{width:72px;text-align:right;font-family:'JetBrains Mono',monospace;font-size:12px;font-weight:500;padding:3px 8px;background:#09090b;border:1px solid #27272a;border-radius:6px;color:#a1a1aa;outline:none;-moz-appearance:textfield;flex-shrink:0}
.slider-row input[type="number"].slider-num::-webkit-inner-spin-button,.slider-row input[type="number"].slider-num::-webkit-outer-spin-button{-webkit-appearance:none;margin:0}
.slider-row input[type="number"].slider-num:focus{border-color:#818cf8;box-shadow:0 0 0 2px rgba(129,140,248,.15)}
.checkbox-row{display:flex;align-items:center;gap:8px;font-size:13px;color:#a1a1aa}
.checkbox-row input[type="checkbox"]{accent-color:#818cf8;width:16px;height:16px;cursor:pointer}
.app-statusbar{background:#18181b;border-top:1px solid #27272a;padding:6px 20px;display:flex;gap:12px;height:34px;align-items:center}
.app-statusbar .sb-section{flex:1;font-family:'JetBrains Mono',monospace;font-size:12px;color:#52525b}
.app-statusbar .sb-fixed{flex:0 0 auto;padding:3px 12px;background:rgba(129,140,248,.08);border-radius:6px;color:#a5b4fc;font-weight:500;font-family:'JetBrains Mono',monospace;font-size:12px}
#gradio-run-btn{position:absolute;left:-9999px;top:-9999px;width:1px;height:1px;opacity:0.01;pointer-events:none}
@media(max-width:840px){.app-main-row{flex-direction:column}.app-main-right{width:100%}.app-main-left{border-right:none;border-bottom:1px solid #27272a}}
"""
# ── JS ────────────────────────────────────────────────────────────────────────
canvas_js = r"""
() => {
function init() {
if (window.__bboxInitDone) return;
const canvas = document.getElementById('bbox-draw-canvas');
const wrap = document.getElementById('bbox-draw-wrap');
const status = document.getElementById('bbox-status');
const badge = document.getElementById('bbox-count');
const debugCount = document.getElementById('bbox-debug-count');
const uploadPrompt = document.getElementById('upload-prompt');
const uploadClickArea = document.getElementById('upload-click-area');
const fileInput = document.getElementById('custom-file-input');
const promptInput = document.getElementById('custom-prompt-input');
const subjectInput = document.getElementById('subject-input');
const btnDraw = document.getElementById('tb-draw');
const btnMove = document.getElementById('tb-move');
const btnClear = document.getElementById('tb-clear');
const btnChange = document.getElementById('tb-change-img');
if (!canvas || !wrap || !fileInput || !btnDraw) {
setTimeout(init, 250);
return;
}
window.__bboxInitDone = true;
const ctx = canvas.getContext('2d');
let baseImg = null;
let dispW = 512, dispH = 400;
let mode = 'draw';
let srcBox = null;
let dstBox = null;
let selectedObjectLabel = null;
let dragging = false;
let dragType = null;
let dragStart = {x:0, y:0};
let dragOrig = null;
const HANDLE = 6;
function n2px(b) { return {x1:b.x1*dispW, y1:b.y1*dispH, x2:b.x2*dispW, y2:b.y2*dispH}; }
function px2n(x1,y1,x2,y2) {
return {x1:Math.min(x1,x2)/dispW, y1:Math.min(y1,y2)/dispH,
x2:Math.max(x1,x2)/dispW, y2:Math.max(y1,y2)/dispH};
}
function clamp01(v){return Math.max(0,Math.min(1,v));}
function fitSize(nw,nh) {
const mw = wrap.clientWidth || 512, mh = 500;
const r = Math.min(mw/nw, mh/nh, 1);
dispW = Math.round(nw*r); dispH = Math.round(nh*r);
canvas.width=dispW; canvas.height=dispH;
canvas.style.width=dispW+'px'; canvas.style.height=dispH+'px';
}
function canvasXY(e) {
const r = canvas.getBoundingClientRect();
const cx = e.touches ? e.touches[0].clientX : e.clientX;
const cy = e.touches ? e.touches[0].clientY : e.clientY;
return {x:Math.max(0,Math.min(dispW,cx-r.left)), y:Math.max(0,Math.min(dispH,cy-r.top))};
}
function setGradioValue(containerId, value) {
const container = document.getElementById(containerId);
if (!container) return;
container.querySelectorAll('input, textarea').forEach(el => {
if (el.type === 'file' || el.type === 'range' || el.type === 'checkbox') return;
const proto = el.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
const ns = Object.getOwnPropertyDescriptor(proto, 'value');
if (ns && ns.set) {
ns.set.call(el, value);
el.dispatchEvent(new Event('input', {bubbles:true, composed:true}));
el.dispatchEvent(new Event('change', {bubbles:true, composed:true}));
}
});
}
function syncToGradio() {
const d = {};
if (srcBox) d.src = srcBox;
if (dstBox) d.dst = dstBox;
setGradioValue('boxes-json-input', JSON.stringify(d));
if (debugCount) {
if (srcBox && dstBox) debugCount.textContent = 'Source + Target defined';
else if (srcBox) debugCount.textContent = 'Source defined — move target';
else debugCount.textContent = 'No boxes drawn';
}
}
function syncImageToGradio(dataUrl) { setGradioValue('hidden-image-b64', dataUrl); }
function syncPromptToGradio() { if (promptInput) setGradioValue('prompt-gradio-input', promptInput.value); }
/* ── Update prompt with object name ── */
function updatePromptForObject(label) {
if (!promptInput) return;
const base = "Move the {name} inside the red bounding box to the position and size indicated by the green bounding box. Remove the {name} from its original location in the red box, filling in the background naturally. Remove the bounding boxes and seamlessly blend the repositioned {name} into the scene, preserving all other objects and the background exactly as they are.";
promptInput.value = base.replace(/\{name\}/g, label);
if (subjectInput) subjectInput.value = label;
syncPromptToGradio();
}
/* ── Subject input: typing updates prompt ── */
if (subjectInput) {
subjectInput.addEventListener('input', function() {
const label = subjectInput.value.trim();
if (label) {
const base = "Move the {name} inside the red bounding box to the position and size indicated by the green bounding box. Remove the {name} from its original location in the red box, filling in the background naturally. Remove the bounding boxes and seamlessly blend the repositioned {name} into the scene, preserving all other objects and the background exactly as they are.";
promptInput.value = base.replace(/\{name\}/g, label);
} else {
promptInput.value = "Move the object inside the red bounding box to the position and size indicated by the green bounding box. Remove the object from its original location in the red box, filling in the background naturally. Remove the bounding boxes and seamlessly blend the repositioned object into the scene, preserving all other objects and the background exactly as they are.";
}
syncPromptToGradio();
});
}
function showStatus(t) { status.textContent=t; status.style.display='block'; }
function hideStatus() { status.style.display='none'; }
function handlePoints(b) {
const p = n2px(b);
const mx=(p.x1+p.x2)/2, my=(p.y1+p.y2)/2;
return {tl:{x:p.x1,y:p.y1},tc:{x:mx,y:p.y1},tr:{x:p.x2,y:p.y1},
ml:{x:p.x1,y:my},mr:{x:p.x2,y:my},
bl:{x:p.x1,y:p.y2},bc:{x:mx,y:p.y2},br:{x:p.x2,y:p.y2}};
}
function hitHandle(px,py,box) {
if (!box) return null;
const pts = handlePoints(box);
for (const k in pts) if (Math.abs(px-pts[k].x)<=HANDLE+2 && Math.abs(py-pts[k].y)<=HANDLE+2) return k;
return null;
}
function hitBox(px,py,box) {
if (!box) return false;
const p = n2px(box);
return px>=p.x1 && px<=p.x2 && py>=p.y1 && py<=p.y2;
}
function redraw(tempRect) {
ctx.clearRect(0,0,dispW,dispH);
if (!baseImg) { ctx.fillStyle='#09090b'; ctx.fillRect(0,0,dispW,dispH); updateBadge(); return; }
ctx.drawImage(baseImg, 0, 0, dispW, dispH);
const lw = Math.max(2, dispW/250);
if (srcBox) {
const p = n2px(srcBox);
ctx.save(); ctx.strokeStyle='rgba(239,68,68,0.95)'; ctx.lineWidth=lw+1;
ctx.setLineDash([8,5]); ctx.strokeRect(p.x1,p.y1,p.x2-p.x1,p.y2-p.y1); ctx.setLineDash([]); ctx.restore();
ctx.save(); ctx.font='bold 11px "DM Sans",sans-serif';
const tw=ctx.measureText('Source').width;
const ly=p.y1>20?p.y1-20:p.y1+4;
ctx.fillStyle='#ef4444'; ctx.fillRect(p.x1,ly,tw+10,18);
ctx.fillStyle='#fff'; ctx.fillText('Source',p.x1+5,ly+13); ctx.restore();
}
if (dstBox) {
const p = n2px(dstBox);
const w=p.x2-p.x1, h=p.y2-p.y1;
ctx.save(); ctx.strokeStyle='rgba(129,140,248,0.95)'; ctx.lineWidth=lw+1;
ctx.strokeRect(p.x1,p.y1,w,h);
ctx.fillStyle='rgba(129,140,248,0.06)'; ctx.fillRect(p.x1,p.y1,w,h); ctx.restore();
const pts = handlePoints(dstBox); ctx.save();
for (const k in pts) {
ctx.fillStyle='#818cf8'; ctx.beginPath(); ctx.arc(pts[k].x,pts[k].y,HANDLE,0,Math.PI*2); ctx.fill();
ctx.strokeStyle='#fff'; ctx.lineWidth=1.5; ctx.beginPath(); ctx.arc(pts[k].x,pts[k].y,HANDLE,0,Math.PI*2); ctx.stroke();
} ctx.restore();
ctx.save(); ctx.font='bold 11px "DM Sans",sans-serif';
const tw2=ctx.measureText('Target').width;
const ly2=p.y1>20?p.y1-20:p.y1+4;
ctx.fillStyle='#818cf8'; ctx.fillRect(p.x1,ly2,tw2+10,18);
ctx.fillStyle='#fff'; ctx.fillText('Target',p.x1+5,ly2+13); ctx.restore();
}
if (tempRect) {
ctx.save(); ctx.strokeStyle='rgba(239,68,68,0.95)'; ctx.lineWidth=lw;
ctx.setLineDash([4,3]); ctx.strokeRect(
Math.min(tempRect.x1,tempRect.x2), Math.min(tempRect.y1,tempRect.y2),
Math.abs(tempRect.x2-tempRect.x1), Math.abs(tempRect.y2-tempRect.y1));
ctx.setLineDash([]); ctx.restore();
}
updateBadge();
}
function updateBadge() {
if (srcBox || dstBox) {
badge.style.display='block';
badge.textContent = (srcBox && dstBox) ? 'SRC + DST' : 'SRC only';
} else { badge.style.display='none'; }
}
function setMode(m) {
mode = m;
btnDraw.classList.toggle('active', m==='draw');
btnMove.classList.toggle('active', m==='move');
canvas.style.cursor = m==='draw' ? 'crosshair' : 'default';
redraw();
}
/* ── File upload ── */
uploadClickArea.addEventListener('click', () => fileInput.click());
btnChange.addEventListener('click', () => fileInput.click());
fileInput.addEventListener('change', e => {
processFile(e.target.files[0]);
e.target.value = '';
});
wrap.addEventListener('dragover', e => { e.preventDefault(); wrap.style.outline='2px solid #818cf8'; });
wrap.addEventListener('dragleave', e => { e.preventDefault(); wrap.style.outline=''; });
wrap.addEventListener('drop', e => { e.preventDefault(); wrap.style.outline=''; processFile(e.dataTransfer.files[0]); });
function processFile(file) {
if (!file || !file.type.startsWith('image/')) return;
const reader = new FileReader();
reader.onload = ev => {
const dataUrl = ev.target.result;
const img = new window.Image();
img.crossOrigin = 'anonymous';
img.onload = () => {
baseImg = img; srcBox = null; dstBox = null; selectedObjectLabel = null;
fitSize(img.naturalWidth, img.naturalHeight);
syncToGradio(); redraw(); hideStatus();
uploadPrompt.style.display = 'none';
// Reset detection state so polling picks up new results
lastObjJson = '';
const detEl = document.querySelector('#detected-objects-json textarea, #detected-objects-json input');
if (detEl) {
const proto = detEl.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype;
const ns = Object.getOwnPropertyDescriptor(proto, 'value');
if (ns && ns.set) { ns.set.call(detEl, ''); detEl.dispatchEvent(new Event('input', {bubbles:true})); }
}
syncImageToGradio(dataUrl);
// Show detecting state
showObjectsView();
showDetecting();
};
img.src = dataUrl;
};
reader.readAsDataURL(file);
}
/* ── Right panel management ── */
function showObjectsView() {
const ov = document.getElementById('right-objects-view');
const outv = document.getElementById('right-output-view');
if (ov) { ov.classList.remove('hidden'); }
if (outv) { outv.classList.remove('active'); }
}
function showOutputView() {
const ov = document.getElementById('right-objects-view');
const outv = document.getElementById('right-output-view');
if (ov) { ov.classList.add('hidden'); }
if (outv) { outv.classList.add('active'); }
}
window.__showOutputView = showOutputView;
window.__showObjectsView = showObjectsView;
function showDetecting() {
const list = document.getElementById('obj-list');
if (list) list.innerHTML = '<div class="obj-detecting"><div class="loader-spinner"></div>Detecting objects…</div>';
}
/* ── Object selection from card click ── */
window.__selectObject = function(bbox, label, cardEl) {
srcBox = {x1: bbox.x1, y1: bbox.y1, x2: bbox.x2, y2: bbox.y2};
dstBox = {x1: bbox.x1, y1: bbox.y1, x2: bbox.x2, y2: bbox.y2};
selectedObjectLabel = label;
updatePromptForObject(label);
syncToGradio();
setMode('move');
showStatus(label + ' selected');
// Highlight the selected card
document.querySelectorAll('.obj-card').forEach(c => c.classList.remove('selected'));
if (cardEl) cardEl.classList.add('selected');
};
/* ── Poll for detected objects from Gradio ── */
let lastObjJson = '';
setInterval(function() {
const el = document.querySelector('#detected-objects-json textarea, #detected-objects-json input');
if (!el) return;
const val = el.value;
if (!val || val === lastObjJson) return;
lastObjJson = val;
if (val === '[]') {
// Detection finished but found nothing
const list = document.getElementById('obj-list');
if (list) list.innerHTML = '<div class="obj-empty">No objects detected.<br>You can still draw boxes manually.</div>';
} else {
renderObjectCards(val);
}
}, 300);
function renderObjectCards(jsonStr) {
const list = document.getElementById('obj-list');
if (!list) return;
let objects;
try { objects = JSON.parse(jsonStr); } catch(e) { return; }
if (!objects || objects.length === 0) {
list.innerHTML = '<div class="obj-empty">No objects detected.<br>You can still draw boxes manually.</div>';
return;
}
// Store parsed detections so click handlers can reference by index
window.__detections = objects;
let html = '';
objects.forEach((obj, i) => {
const safeLabel = obj.label.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;');
html += '<div class="obj-card" data-idx="' + i + '">';
html += '<img class="obj-thumb" src="' + obj.thumb + '" alt="' + safeLabel + '">';
html += '<div><div class="obj-label">' + safeLabel + '</div>';
html += '<div class="obj-hint">Click to select</div></div></div>';
});
list.innerHTML = html;
// Attach click handlers via event delegation (avoids inline JS escaping issues)
list.querySelectorAll('.obj-card').forEach(card => {
card.addEventListener('click', function() {
const idx = parseInt(this.dataset.idx);
const det = window.__detections[idx];
if (!det) return;
window.__selectObject(det.bbox, det.label, this);
});
});
}
/* ── Toolbar ── */
btnDraw.addEventListener('click', () => { selectedObjectLabel = null; setMode('draw'); });
btnMove.addEventListener('click', () => { if (srcBox) setMode('move'); });
btnClear.addEventListener('click', () => {
srcBox=null; dstBox=null; selectedObjectLabel=null;
if (subjectInput) subjectInput.value = '';
document.querySelectorAll('.obj-card').forEach(c => c.classList.remove('selected'));
syncToGradio(); setMode('draw'); redraw();
});
/* ── Canvas events ── */
canvas.addEventListener('mousedown', onDown);
canvas.addEventListener('mousemove', onMove);
canvas.addEventListener('mouseup', onUp);
canvas.addEventListener('mouseleave', e => { if(dragging) onUp(e); });
canvas.addEventListener('touchstart', onDown, {passive:false});
canvas.addEventListener('touchmove', onMove, {passive:false});
canvas.addEventListener('touchend', onUp, {passive:false});
function onDown(e) {
if (!baseImg) return;
e.preventDefault();
const {x,y} = canvasXY(e);
if (mode === 'draw') {
dragging=true; dragType='new'; dragStart={x,y}; srcBox=null; dstBox=null; selectedObjectLabel=null;
if (subjectInput) subjectInput.value = '';
document.querySelectorAll('.obj-card').forEach(c => c.classList.remove('selected'));
return;
}
if (dstBox) {
const h = hitHandle(x,y,dstBox);
if (h) { dragging=true; dragType=h; dragStart={x,y}; dragOrig={...dstBox}; showStatus('Resizing'); return; }
if (hitBox(x,y,dstBox)) { dragging=true; dragType='moveBox'; dragStart={x,y}; dragOrig={...dstBox}; showStatus('Moving'); return; }
}
}
function onMove(e) {
if (!baseImg) return;
e.preventDefault();
const {x,y} = canvasXY(e);
if (!dragging) {
if (mode==='move' && dstBox) {
if (hitHandle(x,y,dstBox)) canvas.style.cursor='nwse-resize';
else if (hitBox(x,y,dstBox)) canvas.style.cursor='grab';
else canvas.style.cursor='default';
}
return;
}
if (dragType === 'new') { redraw({x1:dragStart.x,y1:dragStart.y,x2:x,y2:y}); return; }
const dx = (x - dragStart.x) / dispW, dy = (y - dragStart.y) / dispH;
const o = dragOrig;
if (dragType === 'moveBox') {
const bw=o.x2-o.x1, bh=o.y2-o.y1;
let nx=clamp01(o.x1+dx), ny=clamp01(o.y1+dy);
if (nx+bw>1) nx=1-bw; if (ny+bh>1) ny=1-bh;
dstBox = {x1:nx, y1:ny, x2:nx+bw, y2:ny+bh};
} else {
let {x1,y1,x2,y2} = {...o};
const t = dragType;
const isCorner = (t==='tl'||t==='tr'||t==='bl'||t==='br');
const freeResize = e && e.shiftKey;
if (isCorner && !freeResize) {
// Aspect-locked resize; opposite corner is the anchor
const origW = o.x2 - o.x1, origH = o.y2 - o.y1;
if (origW < 1e-6 || origH < 1e-6) return;
const aspect = origW / origH;
const ax = t.includes('l') ? o.x2 : o.x1;
const ay = t.includes('t') ? o.y2 : o.y1;
const mx = x / dispW, my = y / dispH;
let newW = Math.abs(mx - ax);
let newH = Math.abs(my - ay);
if (newW / aspect > newH) newH = newW / aspect;
else newW = newH * aspect;
const sx = t.includes('l') ? -1 : 1;
const sy = t.includes('t') ? -1 : 1;
const maxW = sx > 0 ? (1 - ax) : ax;
const maxH = sy > 0 ? (1 - ay) : ay;
const k = Math.min(1, maxW / newW, maxH / newH);
newW *= k; newH *= k;
x1 = sx > 0 ? ax : ax - newW;
x2 = sx > 0 ? ax + newW : ax;
y1 = sy > 0 ? ay : ay - newH;
y2 = sy > 0 ? ay + newH : ay;
} else {
if (t.includes('l')) x1 = clamp01(o.x1+dx);
if (t.includes('r')) x2 = clamp01(o.x2+dx);
if (t.includes('t')) y1 = clamp01(o.y1+dy);
if (t.includes('b')) y2 = clamp01(o.y2+dy);
}
if (x1>x2) {const tmp=x1;x1=x2;x2=tmp;}
if (y1>y2) {const tmp=y1;y1=y2;y2=tmp;}
if (Math.abs(x2-x1)<0.01||Math.abs(y2-y1)<0.01) return;
dstBox = {x1,y1,x2,y2};
}
redraw();
}
function onUp(e) {
if (!dragging) return;
if (e) e.preventDefault();
if (dragType === 'new') {
const pt = e ? canvasXY(e) : dragStart;
if (Math.abs(pt.x-dragStart.x)>8 && Math.abs(pt.y-dragStart.y)>8) {
srcBox = px2n(dragStart.x, dragStart.y, pt.x, pt.y);
dstBox = {...srcBox};
showStatus('Box drawn — now move/resize the target');
setMode('move');
} else { hideStatus(); }
} else { showStatus('Target updated'); }
dragging=false; dragType=null; dragOrig=null;
syncToGradio(); redraw();
}
/* ── Prompt sync ── */
if (promptInput) promptInput.addEventListener('input', syncPromptToGradio);
/* ── Slider sync ── */
function syncSlider(customId, gradioId) {
const slider = document.getElementById(customId);
const valEl = document.getElementById(customId + '-val');
if (!slider) return;
function updateVal(v) { if (!valEl) return; if (valEl.tagName==='INPUT') valEl.value=v; else valEl.textContent=v; }
function pushToGradio(v) {
const container = document.getElementById(gradioId);
if (!container) return;
container.querySelectorAll('input[type="range"],input[type="number"]').forEach(el => {
const ns = Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, 'value');
if (ns && ns.set) { ns.set.call(el, v); el.dispatchEvent(new Event('input',{bubbles:true})); el.dispatchEvent(new Event('change',{bubbles:true})); }
});
}
slider.addEventListener('input', () => { updateVal(slider.value); pushToGradio(slider.value); });
if (valEl && valEl.tagName === 'INPUT') {
valEl.addEventListener('change', () => { slider.value = valEl.value; pushToGradio(valEl.value); });
valEl.addEventListener('input', () => { slider.value = valEl.value; });
}
}
syncSlider('custom-seed','gradio-seed');
syncSlider('custom-guidance','gradio-guidance');
syncSlider('custom-steps','gradio-steps');
syncSlider('custom-width','gradio-width');
syncSlider('custom-height','gradio-height');
const randCheck = document.getElementById('custom-randomize');
if (randCheck) { randCheck.addEventListener('change', () => {
const cb = document.querySelector('#gradio-randomize input[type="checkbox"]');
if (cb && cb.checked !== randCheck.checked) cb.click();
}); }
/* ── Run button ── */
function showLoaders() {
document.querySelectorAll('.modern-loader').forEach(l=>l.classList.add('active'));
showOutputView();
const stepsSlider = document.getElementById('custom-steps');
const totalSteps = stepsSlider ? parseInt(stepsSlider.value) : 8;
const stepText = document.getElementById('loader-step-text');
const bar = document.getElementById('loader-bar');
let currentStep = 0;
if (bar) bar.style.width = '0%';
if (stepText) stepText.textContent = 'Step 0 / ' + totalSteps;
if (window.__stepInterval) clearInterval(window.__stepInterval);
window.__stepInterval = setInterval(() => {
currentStep++;
if (currentStep > totalSteps) { clearInterval(window.__stepInterval); return; }
const pct = Math.round((currentStep / totalSteps) * 100);
if (bar) bar.style.width = pct + '%';
if (stepText) stepText.textContent = 'Step ' + currentStep + ' / ' + totalSteps;
}, 1500);
}
function hideLoaders() {
if (window.__stepInterval) clearInterval(window.__stepInterval);
const bar = document.getElementById('loader-bar');
const stepText = document.getElementById('loader-step-text');
if (bar) bar.style.width = '100%';
if (stepText) stepText.textContent = 'Done';
setTimeout(() => {
document.querySelectorAll('.modern-loader').forEach(l=>l.classList.remove('active'));
if (bar) bar.style.width = '0%';
}, 400);
}
window.__hideLoaders = hideLoaders;
const customRunBtn = document.getElementById('custom-run-btn');
if (customRunBtn) customRunBtn.addEventListener('click', () => {
syncPromptToGradio(); syncToGradio(); showLoaders();
setTimeout(() => {
const gb = document.getElementById('gradio-run-btn');
if (gb) { const b=gb.querySelector('button'); if(b)b.click(); else gb.click(); }
}, 200);
});
/* ── Watch outputs ── */
function watchOutputs() {
const rc=document.getElementById('gradio-result'), pc=document.getElementById('gradio-preview');
const ob=document.getElementById('output-image-container'), pb=document.getElementById('preview-image-container');
if(!rc||!pc||!ob||!pb){setTimeout(watchOutputs,500);return;}
function sync() {
const ri=rc.querySelector('img'); if(ri&&ri.src){
let e=ob.querySelector('img.oimg'); if(!e){e=document.createElement('img');e.className='oimg';ob.appendChild(e);}
if(e.src!==ri.src){e.src=ri.src;const ph=ob.querySelector('.out-placeholder');if(ph)ph.style.display='none';hideLoaders();}
}
const pi=pc.querySelector('img'); if(pi&&pi.src){
let e2=pb.querySelector('img.oimg'); if(!e2){e2=document.createElement('img');e2.className='oimg';pb.appendChild(e2);}
if(e2.src!==pi.src){e2.src=pi.src;const ph2=pb.querySelector('.out-placeholder');if(ph2)ph2.style.display='none';}
}
}
new MutationObserver(sync).observe(rc,{childList:true,subtree:true,attributes:true,attributeFilter:['src']});
new MutationObserver(sync).observe(pc,{childList:true,subtree:true,attributes:true,attributeFilter:['src']});
setInterval(sync,800);
}
watchOutputs();
/* ── Dimension & seed sync ── */
function watchDims() {
const wc=document.getElementById('gradio-width'),hc=document.getElementById('gradio-height');
const ws=document.getElementById('custom-width'),hs=document.getElementById('custom-height');
if(!wc||!hc||!ws||!hs){setTimeout(watchDims,500);return;}
function s(){
const wi=wc.querySelector('input[type="range"],input[type="number"]');
const hi=hc.querySelector('input[type="range"],input[type="number"]');
if(wi){ws.value=wi.value;const v=document.getElementById('custom-width-val');if(v)v.textContent=wi.value;}
if(hi){hs.value=hi.value;const v=document.getElementById('custom-height-val');if(v)v.textContent=hi.value;}
}
new MutationObserver(s).observe(wc,{childList:true,subtree:true,attributes:true});
new MutationObserver(s).observe(hc,{childList:true,subtree:true,attributes:true});
setInterval(s,1000);
}
watchDims();
function watchSeed() {
const sc = document.getElementById('gradio-seed');
const cs = document.getElementById('custom-seed');
const cv2 = document.getElementById('custom-seed-val');
if (!sc||!cs) { setTimeout(watchSeed,500); return; }
function s() {
const si = sc.querySelector('input[type="range"],input[type="number"]');
if (si && si.value && si.value !== cs.value) {
cs.value = si.value;
if (cv2) cv2.value = si.value;
}
}
new MutationObserver(s).observe(sc,{childList:true,subtree:true,attributes:true});
setInterval(s, 1000);
}
watchSeed();
/* ── Back to objects button ── */
const backBtn = document.getElementById('back-to-objects');
if (backBtn) backBtn.addEventListener('click', showObjectsView);
new ResizeObserver(() => { if(baseImg){fitSize(baseImg.naturalWidth,baseImg.naturalHeight);redraw();} }).observe(wrap);
setMode('draw'); fitSize(512,400); redraw(); syncToGradio();
}
init();
}
"""
# ── Gradio Blocks ─────────────────────────────────────────────────────────────
with gr.Blocks(css=css) as demo:
# Hidden Gradio inputs
hidden_image_b64 = gr.Textbox(elem_id="hidden-image-b64", elem_classes="hidden-input", container=False)
boxes_json = gr.Textbox(value="{}", elem_id="boxes-json-input", elem_classes="hidden-input", container=False)
prompt = gr.Textbox(value=DEFAULT_PROMPT, elem_id="prompt-gradio-input", elem_classes="hidden-input", container=False)
seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, elem_id="gradio-seed", elem_classes="hidden-input", container=False)
randomize_seed = gr.Checkbox(value=True, elem_id="gradio-randomize", elem_classes="hidden-input", container=False)
guidance_scale = gr.Slider(minimum=1.0, maximum=10.0, step=0.1, value=4.0, elem_id="gradio-guidance", elem_classes="hidden-input", container=False)
num_inference_steps = gr.Slider(minimum=1, maximum=30, step=1, value=8, elem_id="gradio-steps", elem_classes="hidden-input", container=False)
height_slider = gr.Slider(minimum=256, maximum=2048, step=8, value=1024, elem_id="gradio-height", elem_classes="hidden-input", container=False)
width_slider = gr.Slider(minimum=256, maximum=2048, step=8, value=1024, elem_id="gradio-width", elem_classes="hidden-input", container=False)
result = gr.Image(elem_id="gradio-result", elem_classes="hidden-input", container=False, format="png")
preview = gr.Image(elem_id="gradio-preview", elem_classes="hidden-input", container=False)
detected_objects = gr.Textbox(value="", elem_id="detected-objects-json", elem_classes="hidden-input", container=False)
gr.HTML(f"""
<div class="app-shell">
<div class="app-header">
<div class="app-logo">
<svg viewBox="0 0 38 38" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect x="1" y="1" width="36" height="36" rx="9" fill="url(#logo-bg)" stroke="url(#logo-border)" stroke-width="0.5"/>
<rect x="8" y="12" width="11" height="11" rx="2.5" fill="none" stroke="#fff" stroke-width="1.8" opacity="0.9"/>
<rect x="19" y="16" width="11" height="11" rx="2.5" fill="none" stroke="#a5b4fc" stroke-width="1.8" stroke-dasharray="2.5 2"/>
<path d="M14.5 17.5L20 20" stroke="#c7d2fe" stroke-width="1.2" stroke-linecap="round" opacity="0.6"/>
<circle cx="14.5" cy="17.5" r="1.5" fill="#fff" opacity="0.8"/>
<polygon points="21.5,19 19,21 21.5,20.2" fill="#c7d2fe" opacity="0.5"/>
<defs>
<linearGradient id="logo-bg" x1="0" y1="0" x2="38" y2="38"><stop offset="0%" stop-color="#4f46e5"/><stop offset="100%" stop-color="#7c3aed"/></linearGradient>
<linearGradient id="logo-border" x1="0" y1="0" x2="38" y2="38"><stop offset="0%" stop-color="#818cf8"/><stop offset="100%" stop-color="#a78bfa"/></linearGradient>
</defs>
</svg>
</div>
<div class="app-header-text">
<div class="app-header-top">
<span class="app-title">FLUX.2 <span class="klein">[klein]</span> Move</span>
<span class="app-badge">9B + LoRA</span>
</div>
<span class="app-subtitle">Select an object or draw a box to move and resize it</span>
</div>
<a href="https://huggingface.co/linoyts/flux2-klein-bbox-drag-drop-lora"
target="_blank"
class="app-model-link">Model card</a>
</div>
<div class="app-toolbar">
<button id="tb-draw" class="modern-tb-btn active"><span>▬</span> Draw</button>
<button id="tb-move" class="modern-tb-btn"><span>⇉</span> Move</button>
<div class="tb-sep"></div>
<button id="tb-clear" class="modern-tb-btn"><span>✖</span> Clear</button>
<button id="tb-change-img" class="modern-tb-btn">Upload…</button>
</div>
<div class="app-main-row">
<div class="app-main-left">
<div id="bbox-draw-wrap">
<div id="upload-prompt" class="upload-prompt-modern">
<div id="upload-click-area" class="upload-click-area">
<svg viewBox="0 0 80 80" fill="none" xmlns="http://www.w3.org/2000/svg">
<rect x="8" y="14" width="64" height="52" rx="6" fill="none" stroke="#818cf8" stroke-width="2" stroke-dasharray="4 3"/>
<polygon points="12,62 30,40 42,50 54,34 68,62" fill="rgba(129,140,248,0.15)" stroke="#818cf8" stroke-width="1.5"/>
<circle cx="28" cy="30" r="6" fill="rgba(129,140,248,0.2)" stroke="#818cf8" stroke-width="1.5"/>
</svg>
<span class="upload-main-text">Click to upload an image</span>
<span class="upload-sub-text">or drag &amp; drop</span>
</div>
</div>
<input id="custom-file-input" type="file" accept="image/*" style="display:none;" />
<canvas id="bbox-draw-canvas" width="512" height="400"></canvas>
<div id="bbox-status"></div>
<div id="bbox-count"></div>
</div>
<div class="hint-bar">
<b>Select</b> an object on the right or <b>draw</b> a box manually &nbsp;·&nbsp;
<b>Move:</b> Drag the target box to reposition &nbsp;·&nbsp;
Resize via corner handles
</div>
<div style="padding:12px 16px;display:flex;flex-direction:column;gap:12px">
<button id="custom-run-btn" class="btn-run">▶ Run</button>
<div class="subject-box-wrap">
<label class="modern-label" for="subject-input">Subject to move/resize</label>
<input type="text" id="subject-input" class="subject-input" placeholder="Select an object or type a subject…" />
</div>
<div>
<button class="acc-toggle" onclick="this.classList.toggle('open');this.nextElementSibling.classList.toggle('open')">
<span class="acc-arrow">▸</span> Prompt &amp; Input Preview
</button>
<div class="acc-body">
<label class="modern-label">Prompt</label>
<textarea id="custom-prompt-input" class="modern-textarea" rows="3">{DEFAULT_PROMPT}</textarea>
<div class="output-frame" style="margin-top:12px">
<div class="out-title"><span>Input Preview (with boxes)</span></div>
<div class="out-body" id="preview-image-container" style="min-height:140px">
<div class="modern-loader" id="preview-loader"><div class="loader-spinner"></div><div class="loader-text">Preparing…</div><div class="loader-bar-track"><div class="loader-bar-fill"></div></div></div>
<div class="out-placeholder">Preview will appear here</div>
</div>
</div>
</div>
</div>
<div class="settings-group" style="margin:0">
<div class="settings-group-title">Settings</div>
<div class="settings-group-body">
<div class="slider-row"><label>Seed</label><input type="range" id="custom-seed" min="0" max="2147483647" step="1" value="0"><input type="number" class="slider-num" id="custom-seed-val" min="0" max="2147483647" step="1" value="0"></div>
<div class="checkbox-row"><input type="checkbox" id="custom-randomize" checked><label for="custom-randomize">Randomize seed</label></div>
<div class="slider-row"><label>Guidance</label><input type="range" id="custom-guidance" min="1" max="10" step="0.1" value="4.0"><span class="slider-val" id="custom-guidance-val">4.0</span></div>
<div class="slider-row"><label>Steps</label><input type="range" id="custom-steps" min="1" max="30" step="1" value="8"><input type="number" class="slider-num" id="custom-steps-val" min="1" max="50" step="1" value="8"></div>
<div class="slider-row"><label>Width</label><input type="range" id="custom-width" min="256" max="2048" step="8" value="1024"><span class="slider-val" id="custom-width-val">1024</span></div>
<div class="slider-row"><label>Height</label><input type="range" id="custom-height" min="256" max="2048" step="8" value="1024"><span class="slider-val" id="custom-height-val">1024</span></div>
</div>
</div>
</div>
</div>
<!-- Right panel: Objects view + Output view (toggled) -->
<div class="app-main-right">
<!-- Objects view (shown after upload) -->
<div id="right-objects-view">
<div class="obj-panel-title">Objects</div>
<div class="obj-list" id="obj-list">
<div class="obj-empty">Upload an image to detect objects</div>
</div>
</div>
<!-- Output view (shown after Run) -->
<div id="right-output-view">
<button class="back-link" id="back-to-objects">← Back to objects</button>
<div class="out-body" id="output-image-container" style="flex:1;min-height:400px">
<div class="modern-loader" id="output-loader"><div class="loader-spinner"></div><div class="loader-text">Processing…</div><div class="loader-steps" id="loader-step-text"></div><div class="loader-bar-track"><div class="loader-bar-fill" id="loader-bar"></div></div></div>
<div class="out-placeholder">Result will appear here</div>
</div>
</div>
</div>
</div>
<div class="app-statusbar">
<div class="sb-section" id="bbox-debug-count">No boxes drawn</div>
<div class="sb-section sb-fixed">Ready</div>
</div>
</div>
""")
run_btn = gr.Button("Run", elem_id="gradio-run-btn")
demo.load(fn=None, js=canvas_js)
run_btn.click(
fn=infer,
inputs=[hidden_image_b64, boxes_json, prompt, seed, randomize_seed,
guidance_scale, num_inference_steps, height_slider, width_slider],
outputs=[result, seed, preview],
)
hidden_image_b64.change(
fn=update_dimensions_on_upload,
inputs=[hidden_image_b64],
outputs=[width_slider, height_slider],
)
# Trigger object detection when image is uploaded
hidden_image_b64.change(
fn=detect_objects,
inputs=[hidden_image_b64],
outputs=[detected_objects],
)
if __name__ == "__main__":
demo.launch(ssr_mode=False, show_error=True)