# Mikel Broström 🔥 Yolo Tracking 🧾 AGPL-3.0 license

import copy
import time

import cv2
import numpy as np

from boxmot.motion.cmc.base_cmc import BaseCMC
from boxmot.utils import BOXMOT


class ORB(BaseCMC):

    def __init__(
        self,
        feature_detector_threshold: int = 20,
        matcher_norm_type: int = cv2.NORM_HAMMING,
        scale: float = 0.1,
        grayscale: bool = True,
        draw_keypoint_matches: bool = False,
        align: bool = False
    ) -> None:
        """Compute the warp matrix from src to dst.

        Parameters
        ----------
        feature_detector_threshold: int, optional
            The threshold for feature extraction. Defaults to 20.
        matcher_norm_type: int, optional
            The norm type of the matcher. Defaults to cv2.NORM_HAMMING.
        scale: float, optional
            Scale ratio. Defaults to 0.1.
        grayscale: bool, optional
            Whether to transform 3-channel RGB to single-channel grayscale for faster computations.
            Defaults to True.
        draw_keypoint_matches: bool, optional
            Whether to draw keypoint matches on the output image. Defaults to False.
        align: bool, optional
            Whether to align the images based on keypoint matches. Defaults to False.
        """
        self.grayscale = grayscale
        self.scale = scale

        self.detector = cv2.FastFeatureDetector_create(threshold=feature_detector_threshold)
        self.extractor = cv2.ORB_create()
        self.matcher = cv2.BFMatcher(matcher_norm_type)

        self.prev_img = None
        self.draw_keypoint_matches = draw_keypoint_matches
        self.align = align

    def apply(self, img: np.ndarray, dets: np.ndarray) -> np.ndarray:
        """Apply ORB-based sparse optical flow to compute the warp matrix.

        Parameters
        ----------
        img : ndarray
            The input image.
        dets : ndarray
            Detected bounding boxes in the image.

        Returns
        -------
        ndarray
            The warp matrix from the matching keypoint in the previous image to the current.
            The warp matrix is always 2x3.
        """

        H = np.eye(2, 3)

        img = self.preprocess(img)
        h, w = img.shape

        # generate dynamic object maks
        mask = self.generate_mask(img, dets, self.scale)

        # find static keypoints
        keypoints = self.detector.detect(img, mask)

        # compute the descriptors
        keypoints, descriptors = self.extractor.compute(img, keypoints)

        # handle first frame
        if self.prev_img is None:
            # Initialize data
            self.prev_dets = dets.copy()
            self.prev_img = img.copy()
            self.prev_keypoints = copy.copy(keypoints)
            self.prev_descriptors = copy.copy(descriptors)

            return H

        # Match descriptors.
        knnMatches = self.matcher.knnMatch(self.prev_descriptors, descriptors, k=2)

        # Handle empty matches case
        if len(knnMatches) == 0:
            # Store to next iteration
            self.prev_img = img.copy()
            self.prev_keypoints = copy.copy(keypoints)
            self.prev_descriptors = copy.copy(descriptors)

            return H

        # filtered matches based on smallest spatial distance
        matches = []
        spatial_distances = []
        max_spatial_distance = 0.25 * np.array([w, h])

        for m, n in knnMatches:
            if m.distance < 0.9 * n.distance:
                prevKeyPointLocation = self.prev_keypoints[m.queryIdx].pt
                currKeyPointLocation = keypoints[m.trainIdx].pt

                spatial_distance = (prevKeyPointLocation[0] - currKeyPointLocation[0],
                                    prevKeyPointLocation[1] - currKeyPointLocation[1])

                if (np.abs(spatial_distance[0]) < max_spatial_distance[0]) and \
                        (np.abs(spatial_distance[1]) < max_spatial_distance[1]):
                    spatial_distances.append(spatial_distance)
                    matches.append(m)

        mean_spatial_distances = np.mean(spatial_distances, 0)
        std_spatial_distances = np.std(spatial_distances, 0)

        inliesrs = (spatial_distances - mean_spatial_distances) < 2.5 * std_spatial_distances

        goodMatches = []
        prevPoints = []
        currPoints = []
        for i in range(len(matches)):
            if inliesrs[i, 0] and inliesrs[i, 1]:
                goodMatches.append(matches[i])
                prevPoints.append(self.prev_keypoints[matches[i].queryIdx].pt)
                currPoints.append(keypoints[matches[i].trainIdx].pt)

        prevPoints = np.array(prevPoints)
        currPoints = np.array(currPoints)

        # draw keypoint matches on the output image
        if self.draw_keypoint_matches:
            self.prev_img[:, :][mask == True] = 0  # noqa:E712
            self.matches_img = np.hstack((self.prev_img, img))
            self.matches_img = cv2.cvtColor(self.matches_img, cv2.COLOR_GRAY2BGR)

            W = np.size(self.prev_img, 1)
            for m in goodMatches:
                prev_pt = np.array(self.prev_keypoints[m.queryIdx].pt, dtype=np.int_)
                curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)
                curr_pt[0] += W
                color = np.random.randint(0, 255, (3,))
                color = (int(color[0]), int(color[1]), int(color[2]))
                self.matches_img = cv2.line(self.matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)
                self.matches_img = cv2.circle(self.matches_img, prev_pt, 2, tuple(color), -1)
                self.matches_img = cv2.circle(self.matches_img, curr_pt, 2, tuple(color), -1)
            for det in dets:
                det = np.multiply(det, self.scale).astype(int)
                start = (det[0] + w, det[1])
                end = (det[2] + w, det[3])
                self.matches_img = cv2.rectangle(self.matches_img, start, end, (0, 0, 255), 2)
            for det in self.prev_dets:
                det = np.multiply(det, self.scale).astype(int)
                start = (det[0], det[1])
                end = (det[2], det[3])
                self.matches_img = cv2.rectangle(self.matches_img, start, end, (0, 0, 255), 2)
        else:
            self.matches_img = None

        # find rigid matrix
        if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):
            H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)

            # upscale warp matrix to original images size
            if self.scale < 1.0:
                H[0, 2] /= self.scale
                H[1, 2] /= self.scale

            if self.align:
                self.prev_img_aligned = cv2.warpAffine(self.prev_img, H, (w, h), flags=cv2.INTER_LINEAR)
        else:
            print('Warning: not enough matching points')

        # Store to next iteration
        self.prev_img = img.copy()
        self.prev_keypoints = copy.copy(keypoints)
        self.prev_descriptors = copy.copy(descriptors)

        return H


def main():
    orb = ORB(scale=0.5, align=True, grayscale=True, draw_keypoint_matches=False)
    curr_img = cv2.imread('assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000005.jpg')
    prev_img = cv2.imread('assets/MOT17-mini/train/MOT17-13-FRCNN/img1/000001.jpg')
    curr_dets = np.array(
        [[1083.8207,  541.5978, 1195.7952,  655.8790],  # noqa:E241
         [1635.6456,  563.8348, 1695.4153,  686.6704],  # noqa:E241
         [ 957.0879,  545.6558, 1042.6743,  611.8740],  # noqa:E241,E261,E201
         [1550.0317,  562.5705, 1600.3931,  684.7425],  # noqa:E241
         [  78.8801,  714.3307,  121.0272,  817.6857],  # noqa:E241,E261,E201
         [1382.9938,  512.2731, 1418.6012,  620.1938],  # noqa:E241
         [1459.7921,  496.2123, 1488.5767,  584.3533],  # noqa:E241
         [ 982.9818,  492.8579, 1013.6625,  517.9271],  # noqa:E241,E261,E201
         [ 496.1809,  541.3972,  531.4617,  638.0989],  # noqa:E241,E261,E201
         [1498.8512,  522.6646, 1526.1145,  587.7672],  # noqa:E241
         [ 536.4527,  548.4061,  569.2723,  635.5656],  # noqa:E241,E261,E201
         [ 247.8834,  580.8851,  287.2241,  735.3685],  # noqa:E241,E261,E201
         [ 151.4096,  572.3918,  203.5401,  731.1011],  # noqa:E241,E261,E201
         [1227.4098,  440.5505, 1252.7986,  489.5295]]  # noqa:E241
    )
    prev_dets = np.array(
        [[2.1069e-02, 6.7026e+02, 4.9816e+01, 8.8407e+02],
         [1.0765e+03, 5.4009e+02, 1.1883e+03, 6.5219e+02],
         [1.5208e+03, 5.6322e+02, 1.5711e+03, 6.7676e+02],
         [1.6111e+03, 5.5926e+02, 1.6640e+03, 6.7443e+02],
         [9.5244e+02, 5.4681e+02, 1.0384e+03, 6.1180e+02],
         [1.3691e+03, 5.1258e+02, 1.4058e+03, 6.1695e+02],
         [1.2043e+02, 7.0780e+02, 1.7309e+02, 8.0518e+02],
         [1.4454e+03, 5.0919e+02, 1.4724e+03, 5.8270e+02],
         [9.7848e+02, 4.9563e+02, 1.0083e+03, 5.1980e+02],
         [5.0166e+02, 5.4778e+02, 5.3796e+02, 6.3940e+02],
         [1.4777e+03, 5.1856e+02, 1.5105e+03, 5.9523e+02],
         [1.9540e+02, 5.7292e+02, 2.3711e+02, 7.2717e+02],
         [2.7373e+02, 5.8564e+02, 3.1335e+02, 7.3281e+02],
         [5.4038e+02, 5.4735e+02, 5.7359e+02, 6.3797e+02],
         [1.2190e+03, 4.4176e+02, 1.2414e+03, 4.9038e+02]]
    )

    warp_matrix = orb.apply(prev_img, prev_dets)
    warp_matrix = orb.apply(curr_img, curr_dets)

    start = time.process_time()
    for i in range(0, 100):
        warp_matrix = orb.apply(prev_img, prev_dets)
        warp_matrix = orb.apply(curr_img, curr_dets)
    end = time.process_time()
    print('Total time', end - start)
    print(warp_matrix)

    if orb.prev_img_aligned is not None:
        curr_img = orb.preprocess(curr_img)
        prev_img = orb.preprocess(prev_img)
        weighted_img = cv2.addWeighted(curr_img, 0.5, orb.prev_img_aligned, 0.5, 0)
        cv2.imshow('prev_img_aligned', weighted_img)
        cv2.waitKey(0)
        cv2.imwrite(str(BOXMOT / 'motion/cmc/orb_aligned.jpg'), weighted_img)


if __name__ == "__main__":
    main()