Spaces:
Runtime error
Runtime error
File size: 3,484 Bytes
f3d0a26 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | # frame_editor.py
import numpy as np
from PIL import Image
import torch
import cv2
def load_qwen_image_edit(use_lightning=True, device="cuda"):
from diffusers import QwenImageEditPlusPipeline, FlowMatchEulerDiscreteScheduler
scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
"Qwen/Qwen-Image-Edit-2511", subfolder="scheduler"
)
pipe = QwenImageEditPlusPipeline.from_pretrained(
"Qwen/Qwen-Image-Edit-2511",
scheduler=scheduler,
torch_dtype=torch.bfloat16,
).to(device)
if use_lightning:
pipe.load_lora_weights(
"lightx2v/Qwen-Image-Edit-2511-Lightning",
weight_name="Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors"
)
pipe.fuse_lora()
return pipe
def insert_object_qwen_edit(
first_frame, # np.ndarray [H, W, 3] uint8 RGB
box, # [x1, y1, x2, y2]
object_description, # e.g. "a red sports car"
pipe,
context_pad=60, # pixels of context around box — helps Qwen understand scene
num_inference_steps=4,
guidance_scale=1.0,
seed=42,
):
"""
Inserts object into ONLY the bounding box region.
Background outside the box is pixel-identical to original.
Strategy:
1. Crop (box + padding) from original → gives Qwen scene context
2. Edit the crop with Qwen-Image-Edit
3. Extract only the box pixels from the edited crop
4. Paste back onto original frame
"""
H, W = first_frame.shape[:2]
x1, y1, x2, y2 = [int(v) for v in box]
# --- Step 1: Crop with context padding ---
cx1 = max(0, x1 - context_pad)
cy1 = max(0, y1 - context_pad)
cx2 = min(W, x2 + context_pad)
cy2 = min(H, y2 + context_pad)
crop = first_frame[cy1:cy2, cx1:cx2].copy() # [cH, cW, 3]
cH, cW = crop.shape[:2]
# Box coordinates relative to crop
lx1 = x1 - cx1
ly1 = y1 - cy1
lx2 = x2 - cx1
ly2 = y2 - cy1
# --- Step 2: Build focused edit instruction ---
prompt = (
f"Insert {object_description} in the region ({lx1},{ly1}) to ({lx2},{ly2}). "
f"Keep everything outside that region exactly the same. "
f"Match the scene lighting, shadows, and perspective."
)
generator = torch.Generator().manual_seed(seed)
edited = pipe(
image=[Image.fromarray(crop)],
prompt=prompt,
num_inference_steps=num_inference_steps,
true_cfg_scale=guidance_scale,
negative_prompt=" ",
generator=generator,
).images[0]
edited_np = np.array(edited) # [cH', cW', 3]
# Resize back if pipeline changed resolution
if edited_np.shape[:2] != (cH, cW):
edited_np = cv2.resize(edited_np, (cW, cH), interpolation=cv2.INTER_LINEAR)
# --- Step 3: Hard composite — only paste the box region back ---
result = first_frame.copy()
result[y1:y2, x1:x2] = edited_np[ly1:ly2, lx1:lx2]
return result # [H, W, 3] uint8 RGB — background unchanged
def segment_existing_object(
first_frame: np.ndarray,
box: list,
sam2_predictor
) -> np.ndarray:
"""
Use SAM2 to get a precise mask of an existing object.
Returns: [H, W] binary float32 mask
"""
sam2_predictor.set_image(first_frame)
input_box = np.array([box])
masks, scores, _ = sam2_predictor.predict(
box=input_box,
multimask_output=False
)
return masks[np.argmax(scores)].astype(np.float32)
|