Spaces:

Leema-Krishna
/

ObjectInsertion

Runtime error

Leema Krishna Murali

Initial commit

f3d0a26 29 days ago

3.48 kB

	# frame_editor.py

	import numpy as np
	from PIL import Image
	import torch
	import cv2

	def load_qwen_image_edit(use_lightning=True, device="cuda"):
	from diffusers import QwenImageEditPlusPipeline, FlowMatchEulerDiscreteScheduler

	scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
	"Qwen/Qwen-Image-Edit-2511", subfolder="scheduler"
	)
	pipe = QwenImageEditPlusPipeline.from_pretrained(
	"Qwen/Qwen-Image-Edit-2511",
	scheduler=scheduler,
	torch_dtype=torch.bfloat16,
	).to(device)

	if use_lightning:
	pipe.load_lora_weights(
	"lightx2v/Qwen-Image-Edit-2511-Lightning",
	weight_name="Qwen-Image-Edit-2511-Lightning-4steps-V1.0-bf16.safetensors"
	)
	pipe.fuse_lora()

	return pipe


	def insert_object_qwen_edit(
	first_frame, # np.ndarray [H, W, 3] uint8 RGB
	box, # [x1, y1, x2, y2]
	object_description, # e.g. "a red sports car"
	pipe,
	context_pad=60, # pixels of context around box — helps Qwen understand scene
	num_inference_steps=4,
	guidance_scale=1.0,
	seed=42,
	):
	"""
	Inserts object into ONLY the bounding box region.
	Background outside the box is pixel-identical to original.

	Strategy:
	1. Crop (box + padding) from original → gives Qwen scene context
	2. Edit the crop with Qwen-Image-Edit
	3. Extract only the box pixels from the edited crop
	4. Paste back onto original frame
	"""
	H, W = first_frame.shape[:2]
	x1, y1, x2, y2 = [int(v) for v in box]

	# --- Step 1: Crop with context padding ---
	cx1 = max(0, x1 - context_pad)
	cy1 = max(0, y1 - context_pad)
	cx2 = min(W, x2 + context_pad)
	cy2 = min(H, y2 + context_pad)

	crop = first_frame[cy1:cy2, cx1:cx2].copy() # [cH, cW, 3]
	cH, cW = crop.shape[:2]

	# Box coordinates relative to crop
	lx1 = x1 - cx1
	ly1 = y1 - cy1
	lx2 = x2 - cx1
	ly2 = y2 - cy1

	# --- Step 2: Build focused edit instruction ---
	prompt = (
	f"Insert {object_description} in the region ({lx1},{ly1}) to ({lx2},{ly2}). "
	f"Keep everything outside that region exactly the same. "
	f"Match the scene lighting, shadows, and perspective."
	)

	generator = torch.Generator().manual_seed(seed)

	edited = pipe(
	image=[Image.fromarray(crop)],
	prompt=prompt,
	num_inference_steps=num_inference_steps,
	true_cfg_scale=guidance_scale,
	negative_prompt=" ",
	generator=generator,
	).images[0]

	edited_np = np.array(edited) # [cH', cW', 3]

	# Resize back if pipeline changed resolution
	if edited_np.shape[:2] != (cH, cW):
	edited_np = cv2.resize(edited_np, (cW, cH), interpolation=cv2.INTER_LINEAR)

	# --- Step 3: Hard composite — only paste the box region back ---
	result = first_frame.copy()
	result[y1:y2, x1:x2] = edited_np[ly1:ly2, lx1:lx2]

	return result # [H, W, 3] uint8 RGB — background unchanged



	def segment_existing_object(
	first_frame: np.ndarray,
	box: list,
	sam2_predictor
	) -> np.ndarray:
	"""
	Use SAM2 to get a precise mask of an existing object.
	Returns: [H, W] binary float32 mask
	"""
	sam2_predictor.set_image(first_frame)

	input_box = np.array([box])
	masks, scores, _ = sam2_predictor.predict(
	box=input_box,
	multimask_output=False
	)

	return masks[np.argmax(scores)].astype(np.float32)