# utils/box_utils.py import numpy as np from scipy.interpolate import interp1d def interpolate_boxes( keyboxes: dict, # {frame_idx: [x1, y1, x2, y2]} num_frames: int, method: str = "linear" # "linear" or "cubic" ) -> np.ndarray: """ Interpolate sparse keyboxes to dense per-frame boxes. Returns: [T, 4] float32 """ frame_ids = sorted(keyboxes.keys()) boxes = np.array([keyboxes[i] for i in frame_ids], dtype=np.float32) # Interpolate each coordinate separately result = np.zeros((num_frames, 4), dtype=np.float32) t_query = np.arange(num_frames) for coord in range(4): f = interp1d( frame_ids, boxes[:, coord], kind=method, fill_value="extrapolate" ) result[:, coord] = f(t_query) return result.clip(0, None) # boxes can't be negative def box_to_mask( box: np.ndarray, # [x1, y1, x2, y2] H: int, W: int ) -> np.ndarray: """ Convert bounding box to binary mask [H, W] """ mask = np.zeros((H, W), dtype=np.float32) x1, y1, x2, y2 = box.astype(int) x1, x2 = np.clip([x1, x2], 0, W) y1, y2 = np.clip([y1, y2], 0, H) mask[y1:y2, x1:x2] = 1.0 return mask def boxes_to_mask_sequence( boxes: np.ndarray, # [T, 4] H: int, W: int ) -> np.ndarray: """ Returns: [T, H, W] binary masks """ T = len(boxes) masks = np.zeros((T, H, W), dtype=np.float32) for t, box in enumerate(boxes): masks[t] = box_to_mask(box, H, W) return masks def expand_box(box: np.ndarray, padding: int = 10) -> np.ndarray: """Expand box by padding pixels on each side""" x1, y1, x2, y2 = box return np.array([x1 - padding, y1 - padding, x2 + padding, y2 + padding])