# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license import copy import time import cv2 import numpy as np from boxmot.motion.cmc.base_cmc import BaseCMC from boxmot.utils import BOXMOT class ORB(BaseCMC): def __init__( self, feature_detector_threshold: int = 20, matcher_norm_type: int = cv2.NORM_HAMMING, scale: float = 0.1, grayscale: bool = True, draw_keypoint_matches: bool = False, align: bool = False ) -> None: """Compute the warp matrix from src to dst. Parameters ---------- feature_detector_threshold: int, optional The threshold for feature extraction. Defaults to 20. matcher_norm_type: int, optional The norm type of the matcher. Defaults to cv2.NORM_HAMMING. scale: float, optional Scale ratio. Defaults to 0.1. grayscale: bool, optional Whether to transform 3-channel RGB to single-channel grayscale for faster computations. Defaults to True. draw_keypoint_matches: bool, optional Whether to draw keypoint matches on the output image. Defaults to False. align: bool, optional Whether to align the images based on keypoint matches. Defaults to False. """ self.grayscale = grayscale self.scale = scale self.detector = cv2.FastFeatureDetector_create(threshold=feature_detector_threshold) self.extractor = cv2.ORB_create() self.matcher = cv2.BFMatcher(matcher_norm_type) self.prev_img = None self.draw_keypoint_matches = draw_keypoint_matches self.align = align def apply(self, img: np.ndarray, dets: np.ndarray) -> np.ndarray: """Apply ORB-based sparse optical flow to compute the warp matrix. Parameters ---------- img : ndarray The input image. dets : ndarray Detected bounding boxes in the image. Returns ------- ndarray The warp matrix from the matching keypoint in the previous image to the current. The warp matrix is always 2x3. """ H = np.eye(2, 3) img = self.preprocess(img) h, w = img.shape # generate dynamic object maks mask = self.generate_mask(img, dets, self.scale) # find static keypoints keypoints = self.detector.detect(img, mask) # compute the descriptors keypoints, descriptors = self.extractor.compute(img, keypoints) # handle first frame if self.prev_img is None: # Initialize data self.prev_dets = dets.copy() self.prev_img = img.copy() self.prev_keypoints = copy.copy(keypoints) self.prev_descriptors = copy.copy(descriptors) return H # Match descriptors. knnMatches = self.matcher.knnMatch(self.prev_descriptors, descriptors, k=2) # Handle empty matches case if len(knnMatches) == 0: # Store to next iteration self.prev_img = img.copy() self.prev_keypoints = copy.copy(keypoints) self.prev_descriptors = copy.copy(descriptors) return H # filtered matches based on smallest spatial distance matches = [] spatial_distances = [] max_spatial_distance = 0.25 * np.array([w, h]) for m, n in knnMatches: if m.distance < 0.9 * n.distance: prevKeyPointLocation = self.prev_keypoints[m.queryIdx].pt currKeyPointLocation = keypoints[m.trainIdx].pt spatial_distance = (prevKeyPointLocation[0] - currKeyPointLocation[0], prevKeyPointLocation[1] - currKeyPointLocation[1]) if (np.abs(spatial_distance[0]) < max_spatial_distance[0]) and \ (np.abs(spatial_distance[1]) < max_spatial_distance[1]): spatial_distances.append(spatial_distance) matches.append(m) mean_spatial_distances = np.mean(spatial_distances, 0) std_spatial_distances = np.std(spatial_distances, 0) inliesrs = (spatial_distances - mean_spatial_distances) < 2.5 * std_spatial_distances goodMatches = [] prevPoints = [] currPoints = [] for i in range(len(matches)): if inliesrs[i, 0] and inliesrs[i, 1]: goodMatches.append(matches[i]) prevPoints.append(self.prev_keypoints[matches[i].queryIdx].pt) currPoints.append(keypoints[matches[i].trainIdx].pt) prevPoints = np.array(prevPoints) currPoints = np.array(currPoints) # draw keypoint matches on the output image if self.draw_keypoint_matches: self.prev_img[:, :][mask == True] = 0 # noqa:E712 self.matches_img = np.hstack((self.prev_img, img)) self.matches_img = cv2.cvtColor(self.matches_img, cv2.COLOR_GRAY2BGR) W = np.size(self.prev_img, 1) for m in goodMatches: prev_pt = np.array(self.prev_keypoints[m.queryIdx].pt, dtype=np.int_) curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_) curr_pt[0] += W color = np.random.randint(0, 255, (3,)) color = (int(color[0]), int(color[1]), int(color[2])) self.matches_img = cv2.line(self.matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA) self.matches_img = cv2.circle(self.matches_img, prev_pt, 2, tuple(color), -1) self.matches_img = cv2.circle(self.matches_img, curr_pt, 2, tuple(color), -1) for det in dets: det = np.multiply(det, self.scale).astype(int) start = (det[0] + w, det[1]) end = (det[2] + w, det[3]) self.matches_img = cv2.rectangle(self.matches_img, start, end, (0, 0, 255), 2) for det in self.prev_dets: det = np.multiply(det, self.scale).astype(int) start = (det[0], det[1]) end = (det[2], det[3]) self.matches_img = cv2.rectangle(self.matches_img, start, end, (0, 0, 255), 2) else: self.matches_img = None # find rigid matrix if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)): H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC) # upscale warp matrix to original images size if self.scale < 1.0: H[0, 2] /= self.scale H[1, 2] /= self.scale if self.align: self.prev_img_aligned = cv2.warpAffine(self.prev_img, H, (w, h), flags=cv2.INTER_LINEAR) else: print('Warning: not enough matching points') # Store to next iteration self.prev_img = img.copy() self.prev_keypoints = copy.copy(keypoints) self.prev_descriptors = copy.copy(descriptors) return H def main(): orb = ORB(scale=0.5, align=True, grayscale=True, draw_keypoint_matches=False) curr_img = cv2.imread('assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000005.jpg') prev_img = cv2.imread('assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000001.jpg') curr_dets = np.array( [[1083.8207, 541.5978, 1195.7952, 655.8790], # noqa:E241 [1635.6456, 563.8348, 1695.4153, 686.6704], # noqa:E241 [ 957.0879, 545.6558, 1042.6743, 611.8740], # noqa:E241,E261,E201 [1550.0317, 562.5705, 1600.3931, 684.7425], # noqa:E241 [ 78.8801, 714.3307, 121.0272, 817.6857], # noqa:E241,E261,E201 [1382.9938, 512.2731, 1418.6012, 620.1938], # noqa:E241 [1459.7921, 496.2123, 1488.5767, 584.3533], # noqa:E241 [ 982.9818, 492.8579, 1013.6625, 517.9271], # noqa:E241,E261,E201 [ 496.1809, 541.3972, 531.4617, 638.0989], # noqa:E241,E261,E201 [1498.8512, 522.6646, 1526.1145, 587.7672], # noqa:E241 [ 536.4527, 548.4061, 569.2723, 635.5656], # noqa:E241,E261,E201 [ 247.8834, 580.8851, 287.2241, 735.3685], # noqa:E241,E261,E201 [ 151.4096, 572.3918, 203.5401, 731.1011], # noqa:E241,E261,E201 [1227.4098, 440.5505, 1252.7986, 489.5295]] # noqa:E241 ) prev_dets = np.array( [[2.1069e-02, 6.7026e+02, 4.9816e+01, 8.8407e+02], [1.0765e+03, 5.4009e+02, 1.1883e+03, 6.5219e+02], [1.5208e+03, 5.6322e+02, 1.5711e+03, 6.7676e+02], [1.6111e+03, 5.5926e+02, 1.6640e+03, 6.7443e+02], [9.5244e+02, 5.4681e+02, 1.0384e+03, 6.1180e+02], [1.3691e+03, 5.1258e+02, 1.4058e+03, 6.1695e+02], [1.2043e+02, 7.0780e+02, 1.7309e+02, 8.0518e+02], [1.4454e+03, 5.0919e+02, 1.4724e+03, 5.8270e+02], [9.7848e+02, 4.9563e+02, 1.0083e+03, 5.1980e+02], [5.0166e+02, 5.4778e+02, 5.3796e+02, 6.3940e+02], [1.4777e+03, 5.1856e+02, 1.5105e+03, 5.9523e+02], [1.9540e+02, 5.7292e+02, 2.3711e+02, 7.2717e+02], [2.7373e+02, 5.8564e+02, 3.1335e+02, 7.3281e+02], [5.4038e+02, 5.4735e+02, 5.7359e+02, 6.3797e+02], [1.2190e+03, 4.4176e+02, 1.2414e+03, 4.9038e+02]] ) warp_matrix = orb.apply(prev_img, prev_dets) warp_matrix = orb.apply(curr_img, curr_dets) start = time.process_time() for i in range(0, 100): warp_matrix = orb.apply(prev_img, prev_dets) warp_matrix = orb.apply(curr_img, curr_dets) end = time.process_time() print('Total time', end - start) print(warp_matrix) if orb.prev_img_aligned is not None: curr_img = orb.preprocess(curr_img) prev_img = orb.preprocess(prev_img) weighted_img = cv2.addWeighted(curr_img, 0.5, orb.prev_img_aligned, 0.5, 0) cv2.imshow('prev_img_aligned', weighted_img) cv2.waitKey(0) cv2.imwrite(str(BOXMOT / 'motion/cmc/orb_aligned.jpg'), weighted_img) if __name__ == "__main__": main()