ObjectInsertion / utils /box_utils.py
Leema Krishna Murali
Initial commit
f3d0a26
# utils/box_utils.py
import numpy as np
from scipy.interpolate import interp1d
def interpolate_boxes(
keyboxes: dict, # {frame_idx: [x1, y1, x2, y2]}
num_frames: int,
method: str = "linear" # "linear" or "cubic"
) -> np.ndarray:
"""
Interpolate sparse keyboxes to dense per-frame boxes.
Returns: [T, 4] float32
"""
frame_ids = sorted(keyboxes.keys())
boxes = np.array([keyboxes[i] for i in frame_ids], dtype=np.float32)
# Interpolate each coordinate separately
result = np.zeros((num_frames, 4), dtype=np.float32)
t_query = np.arange(num_frames)
for coord in range(4):
f = interp1d(
frame_ids,
boxes[:, coord],
kind=method,
fill_value="extrapolate"
)
result[:, coord] = f(t_query)
return result.clip(0, None) # boxes can't be negative
def box_to_mask(
box: np.ndarray, # [x1, y1, x2, y2]
H: int,
W: int
) -> np.ndarray:
"""
Convert bounding box to binary mask [H, W]
"""
mask = np.zeros((H, W), dtype=np.float32)
x1, y1, x2, y2 = box.astype(int)
x1, x2 = np.clip([x1, x2], 0, W)
y1, y2 = np.clip([y1, y2], 0, H)
mask[y1:y2, x1:x2] = 1.0
return mask
def boxes_to_mask_sequence(
boxes: np.ndarray, # [T, 4]
H: int,
W: int
) -> np.ndarray:
"""
Returns: [T, H, W] binary masks
"""
T = len(boxes)
masks = np.zeros((T, H, W), dtype=np.float32)
for t, box in enumerate(boxes):
masks[t] = box_to_mask(box, H, W)
return masks
def expand_box(box: np.ndarray, padding: int = 10) -> np.ndarray:
"""Expand box by padding pixels on each side"""
x1, y1, x2, y2 = box
return np.array([x1 - padding, y1 - padding,
x2 + padding, y2 + padding])