Spaces:
Runtime error
Runtime error
| # frame_editor.py | |
| import numpy as np | |
| from PIL import Image | |
| import torch | |
| import cv2 | |
| def load_qwen_image_edit(use_lightning=True, device="cuda"): | |
| from diffusers import QwenImageEditPlusPipeline, FlowMatchEulerDiscreteScheduler | |
| scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained( | |
| "Qwen/Qwen-Image-Edit-2511", subfolder="scheduler" | |
| ) | |
| pipe = QwenImageEditPlusPipeline.from_pretrained( | |
| "Qwen/Qwen-Image-Edit-2511", | |
| scheduler=scheduler, | |
| torch_dtype=torch.bfloat16, | |
| ).to(device) | |
| if use_lightning: | |
| pipe.load_lora_weights( | |
| "lightx2v/Qwen-Image-Edit-2511-Lightning", | |
| weight_name="Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors" | |
| ) | |
| pipe.fuse_lora() | |
| return pipe | |
| def insert_object_qwen_edit( | |
| first_frame, # np.ndarray [H, W, 3] uint8 RGB | |
| box, # [x1, y1, x2, y2] | |
| object_description, # e.g. "a red sports car" | |
| pipe, | |
| context_pad=60, # pixels of context around box β helps Qwen understand scene | |
| num_inference_steps=4, | |
| guidance_scale=1.0, | |
| seed=42, | |
| ): | |
| """ | |
| Inserts object into ONLY the bounding box region. | |
| Background outside the box is pixel-identical to original. | |
| Strategy: | |
| 1. Crop (box + padding) from original β gives Qwen scene context | |
| 2. Edit the crop with Qwen-Image-Edit | |
| 3. Extract only the box pixels from the edited crop | |
| 4. Paste back onto original frame | |
| """ | |
| H, W = first_frame.shape[:2] | |
| x1, y1, x2, y2 = [int(v) for v in box] | |
| # --- Step 1: Crop with context padding --- | |
| cx1 = max(0, x1 - context_pad) | |
| cy1 = max(0, y1 - context_pad) | |
| cx2 = min(W, x2 + context_pad) | |
| cy2 = min(H, y2 + context_pad) | |
| crop = first_frame[cy1:cy2, cx1:cx2].copy() # [cH, cW, 3] | |
| cH, cW = crop.shape[:2] | |
| # Box coordinates relative to crop | |
| lx1 = x1 - cx1 | |
| ly1 = y1 - cy1 | |
| lx2 = x2 - cx1 | |
| ly2 = y2 - cy1 | |
| # --- Step 2: Build focused edit instruction --- | |
| prompt = ( | |
| f"Insert {object_description} in the region ({lx1},{ly1}) to ({lx2},{ly2}). " | |
| f"Keep everything outside that region exactly the same. " | |
| f"Match the scene lighting, shadows, and perspective." | |
| ) | |
| generator = torch.Generator().manual_seed(seed) | |
| edited = pipe( | |
| image=[Image.fromarray(crop)], | |
| prompt=prompt, | |
| num_inference_steps=num_inference_steps, | |
| true_cfg_scale=guidance_scale, | |
| negative_prompt=" ", | |
| generator=generator, | |
| ).images[0] | |
| edited_np = np.array(edited) # [cH', cW', 3] | |
| # Resize back if pipeline changed resolution | |
| if edited_np.shape[:2] != (cH, cW): | |
| edited_np = cv2.resize(edited_np, (cW, cH), interpolation=cv2.INTER_LINEAR) | |
| # --- Step 3: Hard composite β only paste the box region back --- | |
| result = first_frame.copy() | |
| result[y1:y2, x1:x2] = edited_np[ly1:ly2, lx1:lx2] | |
| return result # [H, W, 3] uint8 RGB β background unchanged | |
| def segment_existing_object( | |
| first_frame: np.ndarray, | |
| box: list, | |
| sam2_predictor | |
| ) -> np.ndarray: | |
| """ | |
| Use SAM2 to get a precise mask of an existing object. | |
| Returns: [H, W] binary float32 mask | |
| """ | |
| sam2_predictor.set_image(first_frame) | |
| input_box = np.array([box]) | |
| masks, scores, _ = sam2_predictor.predict( | |
| box=input_box, | |
| multimask_output=False | |
| ) | |
| return masks[np.argmax(scores)].astype(np.float32) | |