Spaces:

VerokeAI
/

Object_tracking_boxmot

Running

Object_tracking_boxmot / boxmot /motion /cmc /orb.py

usiddiquee

e1832f4 9 months ago

10.1 kB

	# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license

	import copy
	import time

	import cv2
	import numpy as np

	from boxmot.motion.cmc.base_cmc import BaseCMC
	from boxmot.utils import BOXMOT


	class ORB(BaseCMC):

	def __init__(
	self,
	feature_detector_threshold: int = 20,
	matcher_norm_type: int = cv2.NORM_HAMMING,
	scale: float = 0.1,
	grayscale: bool = True,
	draw_keypoint_matches: bool = False,
	align: bool = False
	) -> None:
	"""Compute the warp matrix from src to dst.

	Parameters
	----------
	feature_detector_threshold: int, optional
	The threshold for feature extraction. Defaults to 20.
	matcher_norm_type: int, optional
	The norm type of the matcher. Defaults to cv2.NORM_HAMMING.
	scale: float, optional
	Scale ratio. Defaults to 0.1.
	grayscale: bool, optional
	Whether to transform 3-channel RGB to single-channel grayscale for faster computations.
	Defaults to True.
	draw_keypoint_matches: bool, optional
	Whether to draw keypoint matches on the output image. Defaults to False.
	align: bool, optional
	Whether to align the images based on keypoint matches. Defaults to False.
	"""
	self.grayscale = grayscale
	self.scale = scale

	self.detector = cv2.FastFeatureDetector_create(threshold=feature_detector_threshold)
	self.extractor = cv2.ORB_create()
	self.matcher = cv2.BFMatcher(matcher_norm_type)

	self.prev_img = None
	self.draw_keypoint_matches = draw_keypoint_matches
	self.align = align

	def apply(self, img: np.ndarray, dets: np.ndarray) -> np.ndarray:
	"""Apply ORB-based sparse optical flow to compute the warp matrix.

	Parameters
	----------
	img : ndarray
	The input image.
	dets : ndarray
	Detected bounding boxes in the image.

	Returns
	-------
	ndarray
	The warp matrix from the matching keypoint in the previous image to the current.
	The warp matrix is always 2x3.
	"""

	H = np.eye(2, 3)

	img = self.preprocess(img)
	h, w = img.shape

	# generate dynamic object maks
	mask = self.generate_mask(img, dets, self.scale)

	# find static keypoints
	keypoints = self.detector.detect(img, mask)

	# compute the descriptors
	keypoints, descriptors = self.extractor.compute(img, keypoints)

	# handle first frame
	if self.prev_img is None:
	# Initialize data
	self.prev_dets = dets.copy()
	self.prev_img = img.copy()
	self.prev_keypoints = copy.copy(keypoints)
	self.prev_descriptors = copy.copy(descriptors)

	return H

	# Match descriptors.
	knnMatches = self.matcher.knnMatch(self.prev_descriptors, descriptors, k=2)

	# Handle empty matches case
	if len(knnMatches) == 0:
	# Store to next iteration
	self.prev_img = img.copy()
	self.prev_keypoints = copy.copy(keypoints)
	self.prev_descriptors = copy.copy(descriptors)

	return H

	# filtered matches based on smallest spatial distance
	matches = []
	spatial_distances = []
	max_spatial_distance = 0.25 * np.array([w, h])

	for m, n in knnMatches:
	if m.distance < 0.9 * n.distance:
	prevKeyPointLocation = self.prev_keypoints[m.queryIdx].pt
	currKeyPointLocation = keypoints[m.trainIdx].pt

	spatial_distance = (prevKeyPointLocation[0] - currKeyPointLocation[0],
	prevKeyPointLocation[1] - currKeyPointLocation[1])

	if (np.abs(spatial_distance[0]) < max_spatial_distance[0]) and \
	(np.abs(spatial_distance[1]) < max_spatial_distance[1]):
	spatial_distances.append(spatial_distance)
	matches.append(m)

	mean_spatial_distances = np.mean(spatial_distances, 0)
	std_spatial_distances = np.std(spatial_distances, 0)

	inliesrs = (spatial_distances - mean_spatial_distances) < 2.5 * std_spatial_distances

	goodMatches = []
	prevPoints = []
	currPoints = []
	for i in range(len(matches)):
	if inliesrs[i, 0] and inliesrs[i, 1]:
	goodMatches.append(matches[i])
	prevPoints.append(self.prev_keypoints[matches[i].queryIdx].pt)
	currPoints.append(keypoints[matches[i].trainIdx].pt)

	prevPoints = np.array(prevPoints)
	currPoints = np.array(currPoints)

	# draw keypoint matches on the output image
	if self.draw_keypoint_matches:
	self.prev_img[:, :][mask == True] = 0 # noqa:E712
	self.matches_img = np.hstack((self.prev_img, img))
	self.matches_img = cv2.cvtColor(self.matches_img, cv2.COLOR_GRAY2BGR)

	W = np.size(self.prev_img, 1)
	for m in goodMatches:
	prev_pt = np.array(self.prev_keypoints[m.queryIdx].pt, dtype=np.int_)
	curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
	curr_pt[0] += W
	color = np.random.randint(0, 255, (3,))
	color = (int(color[0]), int(color[1]), int(color[2]))
	self.matches_img = cv2.line(self.matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
	self.matches_img = cv2.circle(self.matches_img, prev_pt, 2, tuple(color), -1)
	self.matches_img = cv2.circle(self.matches_img, curr_pt, 2, tuple(color), -1)
	for det in dets:
	det = np.multiply(det, self.scale).astype(int)
	start = (det[0] + w, det[1])
	end = (det[2] + w, det[3])
	self.matches_img = cv2.rectangle(self.matches_img, start, end, (0, 0, 255), 2)
	for det in self.prev_dets:
	det = np.multiply(det, self.scale).astype(int)
	start = (det[0], det[1])
	end = (det[2], det[3])
	self.matches_img = cv2.rectangle(self.matches_img, start, end, (0, 0, 255), 2)
	else:
	self.matches_img = None

	# find rigid matrix
	if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
	H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)

	# upscale warp matrix to original images size
	if self.scale < 1.0:
	H[0, 2] /= self.scale
	H[1, 2] /= self.scale

	if self.align:
	self.prev_img_aligned = cv2.warpAffine(self.prev_img, H, (w, h), flags=cv2.INTER_LINEAR)
	else:
	print('Warning: not enough matching points')

	# Store to next iteration
	self.prev_img = img.copy()
	self.prev_keypoints = copy.copy(keypoints)
	self.prev_descriptors = copy.copy(descriptors)

	return H


	def main():
	orb = ORB(scale=0.5, align=True, grayscale=True, draw_keypoint_matches=False)
	curr_img = cv2.imread('assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000005.jpg')
	prev_img = cv2.imread('assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000001.jpg')
	curr_dets = np.array(
	[[1083.8207, 541.5978, 1195.7952, 655.8790], # noqa:E241
	[1635.6456, 563.8348, 1695.4153, 686.6704], # noqa:E241
	[ 957.0879, 545.6558, 1042.6743, 611.8740], # noqa:E241,E261,E201
	[1550.0317, 562.5705, 1600.3931, 684.7425], # noqa:E241
	[ 78.8801, 714.3307, 121.0272, 817.6857], # noqa:E241,E261,E201
	[1382.9938, 512.2731, 1418.6012, 620.1938], # noqa:E241
	[1459.7921, 496.2123, 1488.5767, 584.3533], # noqa:E241
	[ 982.9818, 492.8579, 1013.6625, 517.9271], # noqa:E241,E261,E201
	[ 496.1809, 541.3972, 531.4617, 638.0989], # noqa:E241,E261,E201
	[1498.8512, 522.6646, 1526.1145, 587.7672], # noqa:E241
	[ 536.4527, 548.4061, 569.2723, 635.5656], # noqa:E241,E261,E201
	[ 247.8834, 580.8851, 287.2241, 735.3685], # noqa:E241,E261,E201
	[ 151.4096, 572.3918, 203.5401, 731.1011], # noqa:E241,E261,E201
	[1227.4098, 440.5505, 1252.7986, 489.5295]] # noqa:E241
	)
	prev_dets = np.array(
	[[2.1069e-02, 6.7026e+02, 4.9816e+01, 8.8407e+02],
	[1.0765e+03, 5.4009e+02, 1.1883e+03, 6.5219e+02],
	[1.5208e+03, 5.6322e+02, 1.5711e+03, 6.7676e+02],
	[1.6111e+03, 5.5926e+02, 1.6640e+03, 6.7443e+02],
	[9.5244e+02, 5.4681e+02, 1.0384e+03, 6.1180e+02],
	[1.3691e+03, 5.1258e+02, 1.4058e+03, 6.1695e+02],
	[1.2043e+02, 7.0780e+02, 1.7309e+02, 8.0518e+02],
	[1.4454e+03, 5.0919e+02, 1.4724e+03, 5.8270e+02],
	[9.7848e+02, 4.9563e+02, 1.0083e+03, 5.1980e+02],
	[5.0166e+02, 5.4778e+02, 5.3796e+02, 6.3940e+02],
	[1.4777e+03, 5.1856e+02, 1.5105e+03, 5.9523e+02],
	[1.9540e+02, 5.7292e+02, 2.3711e+02, 7.2717e+02],
	[2.7373e+02, 5.8564e+02, 3.1335e+02, 7.3281e+02],
	[5.4038e+02, 5.4735e+02, 5.7359e+02, 6.3797e+02],
	[1.2190e+03, 4.4176e+02, 1.2414e+03, 4.9038e+02]]
	)

	warp_matrix = orb.apply(prev_img, prev_dets)
	warp_matrix = orb.apply(curr_img, curr_dets)

	start = time.process_time()
	for i in range(0, 100):
	warp_matrix = orb.apply(prev_img, prev_dets)
	warp_matrix = orb.apply(curr_img, curr_dets)
	end = time.process_time()
	print('Total time', end - start)
	print(warp_matrix)

	if orb.prev_img_aligned is not None:
	curr_img = orb.preprocess(curr_img)
	prev_img = orb.preprocess(prev_img)
	weighted_img = cv2.addWeighted(curr_img, 0.5, orb.prev_img_aligned, 0.5, 0)
	cv2.imshow('prev_img_aligned', weighted_img)
	cv2.waitKey(0)
	cv2.imwrite(str(BOXMOT / 'motion/cmc/orb_aligned.jpg'), weighted_img)


	if __name__ == "__main__":
	main()