import gradio as gr import numpy as np import random import torch import spaces import base64 import json import io import os from io import BytesIO from PIL import Image, ImageDraw from diffusers import Flux2KleinPipeline from huggingface_hub import InferenceClient MAX_SEED = np.iinfo(np.int32).max dtype = torch.bfloat16 device = "cuda" if torch.cuda.is_available() else "cpu" # ── Load main pipeline ──────────────────────────────────────────────────────── # REPO_ID = "black-forest-labs/FLUX.2-klein-base-9B" REPO_ID = "black-forest-labs/FLUX.2-klein-9B" print("Loading 9B Base model...") pipe = Flux2KleinPipeline.from_pretrained(REPO_ID, torch_dtype=dtype) pipe.to("cuda") # pipe.load_lora_weights("linoyts/flux2-klein-lora-v3", weight_name="pytorch_lora_weights_v4.safetensors") pipe.load_lora_weights("linoyts/flux2-klein-bbox-drag-drop-lora", weight_name="pytorch_lora_weights.safetensors") pipe.fuse_lora(lora_scale=1.25) # for the few step inference might work with higher scale print("Pipeline ready.") # ── VLM client for object detection + labeling ─────────────────────────────── hf_client = InferenceClient(api_key=os.environ.get("HF_TOKEN")) VLM_MODEL = "Qwen/Qwen3-VL-8B-Instruct" DEFAULT_PROMPT = ( "Move the object inside the red bounding box to the position and size " "indicated by the green bounding box. Remove the object from its original " "location in the red box, filling in the background naturally. Remove the " "bounding boxes and seamlessly blend the repositioned object into the scene, " "preserving all other objects and the background exactly as they are." ) OBJECT_PROMPT_TEMPLATE = ( "Move the {object_name} inside the red bounding box to the position and size " "indicated by the green bounding box. Remove the {object_name} from its original " "location in the red box, filling in the background naturally. Remove the " "bounding boxes and seamlessly blend the repositioned {object_name} into the scene, " "preserving all other objects and the background exactly as they are." ) # ── Helpers ─────────────────────────────────────────────────────────────────── def b64_to_pil(b64_str): if not b64_str or not b64_str.startswith("data:image"): return None try: _, data = b64_str.split(',', 1) return Image.open(BytesIO(base64.b64decode(data))).convert("RGB") except Exception as e: print(f"Error decoding image: {e}") return None def pil_to_b64(pil_img, max_dim=128): """Encode a PIL image as a small JPEG data URI (for thumbnails).""" thumb = pil_img.copy() thumb.thumbnail((max_dim, max_dim), Image.LANCZOS) buf = BytesIO() thumb.save(buf, format="JPEG", quality=80) return "data:image/jpeg;base64," + base64.b64encode(buf.getvalue()).decode() def image_to_data_uri(img): buf = BytesIO() img.save(buf, format="PNG") return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode() def burn_boxes_onto_image(pil_image, boxes_json_str): if not pil_image: return pil_image try: boxes = json.loads(boxes_json_str) if boxes_json_str and boxes_json_str.strip() else {} except Exception: boxes = {} src = boxes.get("src") dst = boxes.get("dst") if not src or not dst: return pil_image img = pil_image.copy().convert("RGB") w, h = img.size draw = ImageDraw.Draw(img) bw = max(4, w // 150) # Green = destination (drawn first, thicker so it peeks out under red) draw.rectangle([int(dst["x1"]*w), int(dst["y1"]*h), int(dst["x2"]*w), int(dst["y2"]*h)], outline=(0, 255, 0), width=bw + 3) # Red = source (drawn on top) draw.rectangle([int(src["x1"]*w), int(src["y1"]*h), int(src["x2"]*w), int(src["y2"]*h)], outline=(255, 0, 0), width=bw) return img # ── Object detection ────────────────────────────────────────────────────────── def detect_objects(b64_str): """Detect objects via Qwen3-VL-8B API — single call for both bboxes and labels.""" pil_image = b64_to_pil(b64_str) if pil_image is None: return "[]" try: w, h = pil_image.size # Encode as JPEG for the API call (much smaller than PNG) buf = BytesIO() pil_image.save(buf, format="JPEG", quality=85) img_uri = "data:image/jpeg;base64," + base64.b64encode(buf.getvalue()).decode() content = [ {"type": "image_url", "image_url": {"url": img_uri}}, {"type": "text", "text": ( f"Detect the main objects in this image that a user might want to move or reposition. " f"The image is {w}x{h} pixels. " "Focus on distinct, interesting subjects — people, animals, characters, vehicles, " "prominent items. Skip background elements like sky, ground, walls, wires, shadows.\n\n" "Return at most 5-7 objects. For each, provide a specific label (2-5 words) " "and its bounding box in absolute pixel coordinates.\n\n" "Return ONLY a JSON array, no other text:\n" '[{"label": "descriptive name", "bbox_2d": [x1, y1, x2, y2]}, ...]' )}, ] resp = hf_client.chat.completions.create( model=VLM_MODEL, messages=[{"role": "user", "content": content}], extra_body={"chat_template_kwargs": {"enable_thinking": False}}, max_tokens=1024, ) raw = resp.choices[0].message.content.strip() # Extract JSON from response (handle markdown code blocks) if "```" in raw: raw = raw.split("```")[1] if raw.startswith("json"): raw = raw[4:] raw = raw.strip() detections = json.loads(raw) if not isinstance(detections, list) or not detections: print(f"VLM returned no detections. Raw: {raw[:200]}") return "[]" # Build result with normalized coords and crop thumbnails objects = [] for det in detections: label = det.get("label", "object") bbox = det.get("bbox_2d") if not bbox or len(bbox) != 4: continue x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3]) # Qwen3-VL returns 0-1000 normalized coords — convert to pixels x1 = int(x1 * w / 1000) y1 = int(y1 * h / 1000) x2 = int(x2 * w / 1000) y2 = int(y2 * h / 1000) x1, y1 = max(0, x1), max(0, y1) x2, y2 = min(w, x2), min(h, y2) if x2 - x1 < 5 or y2 - y1 < 5: continue crop = pil_image.crop([x1, y1, x2, y2]) objects.append({ "label": label, "bbox": {"x1": x1 / w, "y1": y1 / h, "x2": x2 / w, "y2": y2 / h}, "thumb": pil_to_b64(crop), }) print(f"Detected {len(objects)} objects via Qwen3-VL") return json.dumps(objects) except Exception as e: print(f"Object detection failed: {e}") import traceback traceback.print_exc() return "[]" def update_dimensions_on_upload(b64_str): image = b64_to_pil(b64_str) if image is None: return 1024, 1024 ow, oh = image.size if ow > oh: nw = 1024 nh = int(nw * oh / ow) else: nh = 1024 nw = int(nh * ow / oh) return (nw // 8) * 8, (nh // 8) * 8 # ── Inference ───────────────────────────────────────────────────────────────── @spaces.GPU def infer(b64_str, boxes_json, prompt, seed, randomize_seed, guidance_scale, num_inference_steps, height, width): progress = gr.Progress(track_tqdm=True) if not prompt or prompt.strip() == "": raise gr.Error("Please enter a prompt.") source_image = b64_to_pil(b64_str) if source_image is None: raise gr.Error("Please upload an image first.") try: boxes = json.loads(boxes_json) if boxes_json and boxes_json.strip() else {} except Exception: boxes = {} if not boxes.get("src") or not boxes.get("dst"): raise gr.Error("Draw a box around the subject, then move it to the target location.") progress(0.3, desc="Burning bounding boxes...") marked = burn_boxes_onto_image(source_image, boxes_json) if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator(device=device).manual_seed(seed) progress(0.4, desc=f"Running inference ({num_inference_steps} steps)...") result = pipe( image=[marked], prompt=prompt, num_inference_steps=num_inference_steps, generator=generator, guidance_scale=guidance_scale, ).images[0] progress(1.0, desc="Done!") return result, seed, marked # ── CSS ─────────────────────────────────────────────────────────────────────── css = r""" @import url('https://fonts.googleapis.com/css2?family=DM+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500;600&display=swap'); *{box-sizing:border-box;margin:0;padding:0} body,.gradio-container{background:#0f0f13!important;font-family:'DM Sans',system-ui,sans-serif!important;font-size:14px!important;color:#e4e4e7!important;min-height:100vh} footer{display:none!important} .hidden-input{display:none!important;height:0!important;overflow:hidden!important;margin:0!important;padding:0!important} .app-shell{background:#18181b;border:1px solid #27272a;border-radius:16px;margin:12px auto;max-width:1400px;overflow:hidden;box-shadow:0 25px 50px -12px rgba(0,0,0,.6)} .app-header{background:linear-gradient(135deg,#18181b 0%,#1a1a2e 50%,#18181b 100%);border-bottom:1px solid #27272a;padding:16px 24px;display:flex;align-items:center;gap:14px} .app-logo{width:38px;height:38px;flex-shrink:0} .app-logo svg{width:38px;height:38px} .app-header-text{display:flex;flex-direction:column;gap:2px;flex:1;min-width:0} .app-header-top{display:flex;align-items:center;gap:10px;flex-wrap:wrap} .app-title{font-size:17px;font-weight:700;color:#f4f4f5;letter-spacing:-0.01em;white-space:nowrap} .app-title .klein{color:#a5b4fc} .app-badge{font-size:10px;font-weight:600;padding:2px 8px;border-radius:4px;background:rgba(129,140,248,.12);color:#818cf8;border:1px solid rgba(129,140,248,.2);letter-spacing:0.5px;text-transform:uppercase} .app-subtitle{font-size:12.5px;color:#71717a;line-height:1.4} .app-model-link{display:inline-flex;align-items:center;gap:5px;font-size:11.5px;font-weight:500;padding:5px 12px;border-radius:6px;background:rgba(255,255,255,.04);color:#71717a;border:1px solid #27272a;text-decoration:none;white-space:nowrap;transition:all .15s;flex-shrink:0} .app-model-link:hover{color:#a5b4fc;border-color:rgba(129,140,248,.3);background:rgba(129,140,248,.08)} .app-toolbar{background:#18181b;border-bottom:1px solid #27272a;padding:8px 16px;display:flex;gap:4px;align-items:center;flex-wrap:wrap} .tb-sep{width:1px;height:28px;background:#27272a;margin:0 8px} .modern-tb-btn{display:inline-flex;align-items:center;gap:6px;min-width:32px;height:34px;background:transparent;border:1px solid transparent;border-radius:8px;cursor:pointer;font-size:13px;font-weight:600;padding:0 12px;font-family:'DM Sans',sans-serif;color:#e4e4e7!important;transition:all .15s} .modern-tb-btn:hover{background:rgba(129,140,248,.15);border-color:rgba(129,140,248,.3)} .modern-tb-btn.active{background:rgba(129,140,248,.25);border-color:rgba(129,140,248,.45)} .app-main-row{display:flex;gap:0;overflow:hidden} .app-main-left{flex:1;display:flex;flex-direction:column;min-width:0;border-right:1px solid #27272a} .app-main-right{flex:1;display:flex;flex-direction:column;min-width:0;background:#18181b;position:relative} /* Right panel views */ #right-objects-view{display:flex;flex-direction:column;height:100%} #right-output-view{display:none;flex-direction:column;height:100%} #right-output-view.active{display:flex} #right-objects-view.hidden{display:none} /* Object cards */ .obj-panel-title{padding:12px 20px;font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid #27272a;display:flex;align-items:center;justify-content:space-between} .obj-list{flex:1;overflow-y:auto;padding:8px 12px;display:flex;flex-direction:column;gap:6px} .obj-card{display:flex;align-items:center;gap:12px;padding:10px 12px;border:2px solid #27272a;border-radius:10px;cursor:pointer;transition:all .15s;background:#09090b;user-select:none} .obj-card:hover{border-color:rgba(129,140,248,.4);background:#1e1e24} .obj-card.selected{border-color:#818cf8;background:rgba(129,140,248,.1);box-shadow:0 0 0 1px rgba(129,140,248,.2)} .obj-card .obj-thumb{width:52px;height:52px;border-radius:8px;object-fit:cover;flex-shrink:0;background:#27272a} .obj-card .obj-label{font-size:14px;font-weight:600;color:#e4e4e7;line-height:1.3} .obj-card .obj-hint{font-size:11px;color:#52525b;margin-top:2px} .obj-detecting{display:flex;align-items:center;justify-content:center;flex:1;gap:12px;color:#71717a;font-size:13px} .obj-detecting .loader-spinner{width:20px;height:20px;border:2px solid #27272a;border-top-color:#818cf8;border-radius:50%;animation:spin .8s linear infinite} .obj-empty{display:flex;align-items:center;justify-content:center;flex:1;color:#3f3f46;font-size:13px;text-align:center;padding:20px} .back-link{display:inline-flex;align-items:center;gap:4px;padding:8px 16px;font-size:12px;font-weight:600;color:#818cf8;cursor:pointer;border:none;background:none;font-family:'DM Sans',sans-serif} .back-link:hover{color:#a5b4fc} /* Output area */ .app-main-right .out-body{background:#09090b;display:flex;align-items:flex-start;justify-content:center;overflow:hidden;position:relative;flex:1} .app-main-right .out-body img,.app-main-right .out-body .oimg{max-width:100%;max-height:520px;display:block} .app-main-right .out-placeholder{color:#3f3f46;font-size:13px;text-align:center;padding:20px} /* Accordion */ .acc-toggle{display:flex;align-items:center;gap:8px;padding:10px 0;cursor:pointer;user-select:none;border:none;background:none;width:100%;font-family:'DM Sans',sans-serif;font-size:13px;font-weight:600;color:#a1a1aa;text-transform:uppercase;letter-spacing:.6px} .acc-toggle:hover{color:#e4e4e7} .acc-toggle .acc-arrow{transition:transform .2s;font-size:11px} .acc-toggle.open .acc-arrow{transform:rotate(90deg)} .acc-body{display:none;padding-bottom:12px} .acc-body.open{display:block} #bbox-draw-wrap{position:relative;background:#09090b;min-height:440px;overflow:hidden;cursor:crosshair} #bbox-draw-canvas{display:block;margin:0 auto} #bbox-status{position:absolute;top:12px;left:12px;background:rgba(129,140,248,.9);color:#fff;padding:4px 12px;font-family:'JetBrains Mono',monospace;font-size:12px;font-weight:500;border-radius:6px;z-index:10;display:none;pointer-events:none} #bbox-count{position:absolute;top:12px;right:12px;background:rgba(24,24,27,.9);color:#a5b4fc;padding:4px 12px;font-family:'JetBrains Mono',monospace;font-size:12px;font-weight:600;border-radius:6px;border:1px solid rgba(129,140,248,.3);z-index:10;display:none} .upload-prompt-modern{position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);z-index:20} .upload-click-area{display:flex;flex-direction:column;align-items:center;justify-content:center;cursor:pointer;padding:36px 44px;border:2px dashed #3f3f46;border-radius:16px;background:rgba(129,140,248,.03);transition:all .2s;gap:8px} .upload-click-area:hover{background:rgba(129,140,248,.08);border-color:#818cf8;transform:scale(1.03)} .upload-click-area svg{width:80px;height:80px} .upload-main-text{color:#71717a;font-size:14px;font-weight:500} .upload-sub-text{color:#52525b;font-size:12px} .hint-bar{background:rgba(129,140,248,.06);border-top:1px solid #27272a;border-bottom:1px solid #27272a;padding:10px 20px;font-size:13px;color:#a1a1aa;line-height:1.7} .hint-bar b{color:#c7d2fe;font-weight:600} .modern-textarea{width:100%;background:#09090b;border:1px solid #27272a;border-radius:8px;padding:10px 14px;font-family:'DM Sans',sans-serif;font-size:14px;color:#e4e4e7;resize:vertical;outline:none;min-height:42px;transition:border-color .2s} .modern-textarea:focus{border-color:#818cf8;box-shadow:0 0 0 3px rgba(129,140,248,.15)} .modern-textarea::placeholder{color:#3f3f46} .modern-label{font-size:13px;font-weight:500;color:#a1a1aa;margin-bottom:4px;display:block} .subject-box-wrap{display:flex;flex-direction:column;gap:4px} .subject-input{width:100%;background:#09090b;border:1px solid #27272a;border-radius:8px;padding:10px 14px;font-family:'DM Sans',sans-serif;font-size:14px;color:#e4e4e7;outline:none;transition:border-color .2s} .subject-input:focus{border-color:#818cf8;box-shadow:0 0 0 3px rgba(129,140,248,.15)} .subject-input::placeholder{color:#3f3f46} .btn-run{display:flex;align-items:center;justify-content:center;gap:8px;width:100%;background:linear-gradient(135deg,#818cf8,#6366f1);border:none;border-radius:10px;padding:12px 24px;cursor:pointer;font-size:15px;font-weight:600;font-family:'DM Sans',sans-serif;color:#fff;transition:all .2s;box-shadow:0 4px 16px rgba(129,140,248,.3),inset 0 1px 0 rgba(255,255,255,.1)} .btn-run:hover{background:linear-gradient(135deg,#a5b4fc,#818cf8);box-shadow:0 6px 24px rgba(129,140,248,.45);transform:translateY(-1px)} .output-frame{border-bottom:1px solid #27272a;display:flex;flex-direction:column} .output-frame .out-title{padding:10px 20px;font-size:13px;font-weight:700;color:#e4e4e7;text-transform:uppercase;letter-spacing:.8px;border-bottom:1px solid rgba(39,39,42,.6)} .output-frame .out-body{flex:1;background:#09090b;display:flex;align-items:center;justify-content:center;overflow:hidden;min-height:180px;position:relative} .output-frame .out-body img,.output-frame .out-body .oimg{max-width:100%;max-height:520px;display:block} .output-frame .out-placeholder,.app-main-right .out-placeholder{color:#3f3f46;font-size:13px;text-align:center;padding:20px} .modern-loader{display:none;position:absolute;top:0;left:0;right:0;bottom:0;background:rgba(9,9,11,.92);z-index:15;flex-direction:column;align-items:center;justify-content:center;gap:12px} .modern-loader.active{display:flex} .modern-loader .loader-spinner{width:36px;height:36px;border:3px solid #27272a;border-top-color:#818cf8;border-radius:50%;animation:spin .8s linear infinite} @keyframes spin{to{transform:rotate(360deg)}} .modern-loader .loader-text{font-size:13px;color:#a1a1aa;font-weight:500} .modern-loader .loader-steps{font-family:'JetBrains Mono',monospace;font-size:12px;color:#818cf8;font-weight:600} .loader-bar-track{width:220px;height:6px;background:#27272a;border-radius:3px;overflow:hidden} .loader-bar-fill{height:100%;width:0%;background:linear-gradient(90deg,#6366f1,#818cf8,#a5b4fc);border-radius:3px;transition:width 0.3s ease} .settings-group{border:1px solid #27272a;border-radius:10px;margin:12px 16px;overflow:hidden} .settings-group-title{font-size:12px;font-weight:600;color:#71717a;text-transform:uppercase;letter-spacing:.8px;padding:10px 16px;border-bottom:1px solid #27272a;background:rgba(24,24,27,.5)} .settings-group-body{padding:14px 16px;display:flex;flex-direction:column;gap:12px} .slider-row{display:flex;align-items:center;gap:10px;min-height:28px} .slider-row label{font-size:13px;font-weight:500;color:#a1a1aa;min-width:72px;flex-shrink:0} .slider-row input[type="range"]{flex:1;-webkit-appearance:none;height:6px;background:#27272a;border-radius:3px;outline:none} .slider-row input[type="range"]::-webkit-slider-thumb{-webkit-appearance:none;width:16px;height:16px;background:linear-gradient(135deg,#818cf8,#6366f1);border-radius:50%;cursor:pointer;box-shadow:0 2px 6px rgba(129,140,248,.4)} .slider-row .slider-val{min-width:52px;text-align:right;font-family:'JetBrains Mono',monospace;font-size:12px;font-weight:500;padding:3px 8px;background:#09090b;border:1px solid #27272a;border-radius:6px;color:#a1a1aa} .slider-row input[type="number"].slider-num{width:72px;text-align:right;font-family:'JetBrains Mono',monospace;font-size:12px;font-weight:500;padding:3px 8px;background:#09090b;border:1px solid #27272a;border-radius:6px;color:#a1a1aa;outline:none;-moz-appearance:textfield;flex-shrink:0} .slider-row input[type="number"].slider-num::-webkit-inner-spin-button,.slider-row input[type="number"].slider-num::-webkit-outer-spin-button{-webkit-appearance:none;margin:0} .slider-row input[type="number"].slider-num:focus{border-color:#818cf8;box-shadow:0 0 0 2px rgba(129,140,248,.15)} .checkbox-row{display:flex;align-items:center;gap:8px;font-size:13px;color:#a1a1aa} .checkbox-row input[type="checkbox"]{accent-color:#818cf8;width:16px;height:16px;cursor:pointer} .app-statusbar{background:#18181b;border-top:1px solid #27272a;padding:6px 20px;display:flex;gap:12px;height:34px;align-items:center} .app-statusbar .sb-section{flex:1;font-family:'JetBrains Mono',monospace;font-size:12px;color:#52525b} .app-statusbar .sb-fixed{flex:0 0 auto;padding:3px 12px;background:rgba(129,140,248,.08);border-radius:6px;color:#a5b4fc;font-weight:500;font-family:'JetBrains Mono',monospace;font-size:12px} #gradio-run-btn{position:absolute;left:-9999px;top:-9999px;width:1px;height:1px;opacity:0.01;pointer-events:none} @media(max-width:840px){.app-main-row{flex-direction:column}.app-main-right{width:100%}.app-main-left{border-right:none;border-bottom:1px solid #27272a}} """ # ── JS ──────────────────────────────────────────────────────────────────────── canvas_js = r""" () => { function init() { if (window.__bboxInitDone) return; const canvas = document.getElementById('bbox-draw-canvas'); const wrap = document.getElementById('bbox-draw-wrap'); const status = document.getElementById('bbox-status'); const badge = document.getElementById('bbox-count'); const debugCount = document.getElementById('bbox-debug-count'); const uploadPrompt = document.getElementById('upload-prompt'); const uploadClickArea = document.getElementById('upload-click-area'); const fileInput = document.getElementById('custom-file-input'); const promptInput = document.getElementById('custom-prompt-input'); const subjectInput = document.getElementById('subject-input'); const btnDraw = document.getElementById('tb-draw'); const btnMove = document.getElementById('tb-move'); const btnClear = document.getElementById('tb-clear'); const btnChange = document.getElementById('tb-change-img'); if (!canvas || !wrap || !fileInput || !btnDraw) { setTimeout(init, 250); return; } window.__bboxInitDone = true; const ctx = canvas.getContext('2d'); let baseImg = null; let dispW = 512, dispH = 400; let mode = 'draw'; let srcBox = null; let dstBox = null; let selectedObjectLabel = null; let dragging = false; let dragType = null; let dragStart = {x:0, y:0}; let dragOrig = null; const HANDLE = 6; function n2px(b) { return {x1:b.x1*dispW, y1:b.y1*dispH, x2:b.x2*dispW, y2:b.y2*dispH}; } function px2n(x1,y1,x2,y2) { return {x1:Math.min(x1,x2)/dispW, y1:Math.min(y1,y2)/dispH, x2:Math.max(x1,x2)/dispW, y2:Math.max(y1,y2)/dispH}; } function clamp01(v){return Math.max(0,Math.min(1,v));} function fitSize(nw,nh) { const mw = wrap.clientWidth || 512, mh = 500; const r = Math.min(mw/nw, mh/nh, 1); dispW = Math.round(nw*r); dispH = Math.round(nh*r); canvas.width=dispW; canvas.height=dispH; canvas.style.width=dispW+'px'; canvas.style.height=dispH+'px'; } function canvasXY(e) { const r = canvas.getBoundingClientRect(); const cx = e.touches ? e.touches[0].clientX : e.clientX; const cy = e.touches ? e.touches[0].clientY : e.clientY; return {x:Math.max(0,Math.min(dispW,cx-r.left)), y:Math.max(0,Math.min(dispH,cy-r.top))}; } function setGradioValue(containerId, value) { const container = document.getElementById(containerId); if (!container) return; container.querySelectorAll('input, textarea').forEach(el => { if (el.type === 'file' || el.type === 'range' || el.type === 'checkbox') return; const proto = el.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype; const ns = Object.getOwnPropertyDescriptor(proto, 'value'); if (ns && ns.set) { ns.set.call(el, value); el.dispatchEvent(new Event('input', {bubbles:true, composed:true})); el.dispatchEvent(new Event('change', {bubbles:true, composed:true})); } }); } function syncToGradio() { const d = {}; if (srcBox) d.src = srcBox; if (dstBox) d.dst = dstBox; setGradioValue('boxes-json-input', JSON.stringify(d)); if (debugCount) { if (srcBox && dstBox) debugCount.textContent = 'Source + Target defined'; else if (srcBox) debugCount.textContent = 'Source defined — move target'; else debugCount.textContent = 'No boxes drawn'; } } function syncImageToGradio(dataUrl) { setGradioValue('hidden-image-b64', dataUrl); } function syncPromptToGradio() { if (promptInput) setGradioValue('prompt-gradio-input', promptInput.value); } /* ── Update prompt with object name ── */ function updatePromptForObject(label) { if (!promptInput) return; const base = "Move the {name} inside the red bounding box to the position and size indicated by the green bounding box. Remove the {name} from its original location in the red box, filling in the background naturally. Remove the bounding boxes and seamlessly blend the repositioned {name} into the scene, preserving all other objects and the background exactly as they are."; promptInput.value = base.replace(/\{name\}/g, label); if (subjectInput) subjectInput.value = label; syncPromptToGradio(); } /* ── Subject input: typing updates prompt ── */ if (subjectInput) { subjectInput.addEventListener('input', function() { const label = subjectInput.value.trim(); if (label) { const base = "Move the {name} inside the red bounding box to the position and size indicated by the green bounding box. Remove the {name} from its original location in the red box, filling in the background naturally. Remove the bounding boxes and seamlessly blend the repositioned {name} into the scene, preserving all other objects and the background exactly as they are."; promptInput.value = base.replace(/\{name\}/g, label); } else { promptInput.value = "Move the object inside the red bounding box to the position and size indicated by the green bounding box. Remove the object from its original location in the red box, filling in the background naturally. Remove the bounding boxes and seamlessly blend the repositioned object into the scene, preserving all other objects and the background exactly as they are."; } syncPromptToGradio(); }); } function showStatus(t) { status.textContent=t; status.style.display='block'; } function hideStatus() { status.style.display='none'; } function handlePoints(b) { const p = n2px(b); const mx=(p.x1+p.x2)/2, my=(p.y1+p.y2)/2; return {tl:{x:p.x1,y:p.y1},tc:{x:mx,y:p.y1},tr:{x:p.x2,y:p.y1}, ml:{x:p.x1,y:my},mr:{x:p.x2,y:my}, bl:{x:p.x1,y:p.y2},bc:{x:mx,y:p.y2},br:{x:p.x2,y:p.y2}}; } function hitHandle(px,py,box) { if (!box) return null; const pts = handlePoints(box); for (const k in pts) if (Math.abs(px-pts[k].x)<=HANDLE+2 && Math.abs(py-pts[k].y)<=HANDLE+2) return k; return null; } function hitBox(px,py,box) { if (!box) return false; const p = n2px(box); return px>=p.x1 && px<=p.x2 && py>=p.y1 && py<=p.y2; } function redraw(tempRect) { ctx.clearRect(0,0,dispW,dispH); if (!baseImg) { ctx.fillStyle='#09090b'; ctx.fillRect(0,0,dispW,dispH); updateBadge(); return; } ctx.drawImage(baseImg, 0, 0, dispW, dispH); const lw = Math.max(2, dispW/250); if (srcBox) { const p = n2px(srcBox); ctx.save(); ctx.strokeStyle='rgba(239,68,68,0.95)'; ctx.lineWidth=lw+1; ctx.setLineDash([8,5]); ctx.strokeRect(p.x1,p.y1,p.x2-p.x1,p.y2-p.y1); ctx.setLineDash([]); ctx.restore(); ctx.save(); ctx.font='bold 11px "DM Sans",sans-serif'; const tw=ctx.measureText('Source').width; const ly=p.y1>20?p.y1-20:p.y1+4; ctx.fillStyle='#ef4444'; ctx.fillRect(p.x1,ly,tw+10,18); ctx.fillStyle='#fff'; ctx.fillText('Source',p.x1+5,ly+13); ctx.restore(); } if (dstBox) { const p = n2px(dstBox); const w=p.x2-p.x1, h=p.y2-p.y1; ctx.save(); ctx.strokeStyle='rgba(129,140,248,0.95)'; ctx.lineWidth=lw+1; ctx.strokeRect(p.x1,p.y1,w,h); ctx.fillStyle='rgba(129,140,248,0.06)'; ctx.fillRect(p.x1,p.y1,w,h); ctx.restore(); const pts = handlePoints(dstBox); ctx.save(); for (const k in pts) { ctx.fillStyle='#818cf8'; ctx.beginPath(); ctx.arc(pts[k].x,pts[k].y,HANDLE,0,Math.PI*2); ctx.fill(); ctx.strokeStyle='#fff'; ctx.lineWidth=1.5; ctx.beginPath(); ctx.arc(pts[k].x,pts[k].y,HANDLE,0,Math.PI*2); ctx.stroke(); } ctx.restore(); ctx.save(); ctx.font='bold 11px "DM Sans",sans-serif'; const tw2=ctx.measureText('Target').width; const ly2=p.y1>20?p.y1-20:p.y1+4; ctx.fillStyle='#818cf8'; ctx.fillRect(p.x1,ly2,tw2+10,18); ctx.fillStyle='#fff'; ctx.fillText('Target',p.x1+5,ly2+13); ctx.restore(); } if (tempRect) { ctx.save(); ctx.strokeStyle='rgba(239,68,68,0.95)'; ctx.lineWidth=lw; ctx.setLineDash([4,3]); ctx.strokeRect( Math.min(tempRect.x1,tempRect.x2), Math.min(tempRect.y1,tempRect.y2), Math.abs(tempRect.x2-tempRect.x1), Math.abs(tempRect.y2-tempRect.y1)); ctx.setLineDash([]); ctx.restore(); } updateBadge(); } function updateBadge() { if (srcBox || dstBox) { badge.style.display='block'; badge.textContent = (srcBox && dstBox) ? 'SRC + DST' : 'SRC only'; } else { badge.style.display='none'; } } function setMode(m) { mode = m; btnDraw.classList.toggle('active', m==='draw'); btnMove.classList.toggle('active', m==='move'); canvas.style.cursor = m==='draw' ? 'crosshair' : 'default'; redraw(); } /* ── File upload ── */ uploadClickArea.addEventListener('click', () => fileInput.click()); btnChange.addEventListener('click', () => fileInput.click()); fileInput.addEventListener('change', e => { processFile(e.target.files[0]); e.target.value = ''; }); wrap.addEventListener('dragover', e => { e.preventDefault(); wrap.style.outline='2px solid #818cf8'; }); wrap.addEventListener('dragleave', e => { e.preventDefault(); wrap.style.outline=''; }); wrap.addEventListener('drop', e => { e.preventDefault(); wrap.style.outline=''; processFile(e.dataTransfer.files[0]); }); function processFile(file) { if (!file || !file.type.startsWith('image/')) return; const reader = new FileReader(); reader.onload = ev => { const dataUrl = ev.target.result; const img = new window.Image(); img.crossOrigin = 'anonymous'; img.onload = () => { baseImg = img; srcBox = null; dstBox = null; selectedObjectLabel = null; fitSize(img.naturalWidth, img.naturalHeight); syncToGradio(); redraw(); hideStatus(); uploadPrompt.style.display = 'none'; // Reset detection state so polling picks up new results lastObjJson = ''; const detEl = document.querySelector('#detected-objects-json textarea, #detected-objects-json input'); if (detEl) { const proto = detEl.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype; const ns = Object.getOwnPropertyDescriptor(proto, 'value'); if (ns && ns.set) { ns.set.call(detEl, ''); detEl.dispatchEvent(new Event('input', {bubbles:true})); } } syncImageToGradio(dataUrl); // Show detecting state showObjectsView(); showDetecting(); }; img.src = dataUrl; }; reader.readAsDataURL(file); } /* ── Right panel management ── */ function showObjectsView() { const ov = document.getElementById('right-objects-view'); const outv = document.getElementById('right-output-view'); if (ov) { ov.classList.remove('hidden'); } if (outv) { outv.classList.remove('active'); } } function showOutputView() { const ov = document.getElementById('right-objects-view'); const outv = document.getElementById('right-output-view'); if (ov) { ov.classList.add('hidden'); } if (outv) { outv.classList.add('active'); } } window.__showOutputView = showOutputView; window.__showObjectsView = showObjectsView; function showDetecting() { const list = document.getElementById('obj-list'); if (list) list.innerHTML = '