Spaces:

nishanth-saka
/

dominant-flow-tracker

Sleeping

App Files Files Community

minimal surgery

#13

by nishanth-saka - opened Nov 19, 2025

base: refs/heads/main

←

from: refs/pr/13

Discussion Files changed

+134

-30

Files changed (1) hide show

app.py +134 -30

app.py CHANGED Viewed

@@ -23,6 +23,61 @@ model = YOLO(MODEL_PATH)
 VEHICLE_CLASSES = [2, 3, 5, 7]  # car, motorcycle, bus, truck
 # ---------------------------------------------------------
 # 🔍 SIMPLE KALMAN TRACKER
 # ---------------------------------------------------------
@@ -38,23 +93,28 @@ class Track:
                               [0,1,0,0]])
         self.kf.P *= 1000.0
         self.kf.R *= 10.0
-        self.kf.x[:2] = np.array(self.get_centroid(bbox)).reshape(2,1)
         self.trace = []
-    def get_centroid(self,bbox):
-        x1,y1,x2,y2 = bbox
-        return [(x1+x2)/2,(y1+y2)/2]
     def predict(self):
         self.kf.predict()
         return self.kf.x[:2].reshape(2)
-    def update(self,bbox):
         z = np.array(self.get_centroid(bbox)).reshape(2,1)
         self.kf.update(z)
-        cx,cy = self.kf.x[:2].reshape(2)
-        self.trace.append((float(cx),float(cy)))
-        return (cx,cy)
 # ---------------------------------------------------------
@@ -75,7 +135,13 @@ def process_video(video_path):
     frame_count = 0
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    pbar = tqdm(total=total_frames if total_frames>0 else 100, desc="Processing")
     while True:
         ret, frame = cap.read()
         if not ret:
@@ -94,22 +160,49 @@ def process_video(video_path):
         predicted = [trk.predict() for trk in tracks]
         predicted = np.array(predicted) if predicted else np.empty((0,2))
-        # --- ASSIGN DETECTIONS ---
         assigned = set()
         if len(predicted) > 0 and len(detections) > 0:
-            cost = np.zeros((len(predicted), len(detections)))
-            for i, trk in enumerate(predicted):
                 for j, det in enumerate(detections):
-                    cx, cy = ((det[0]+det[2])/2, (det[1]+det[3])/2)
-                    cost[i, j] = np.linalg.norm(trk - np.array([cx, cy]))
             row_ind, col_ind = linear_sum_assignment(cost)
             for r, c in zip(row_ind, col_ind):
-                if cost[r, c] < 80:  # distance threshold
                     assigned.add(c)
                     tracks[r].update(detections[c])
-        # --- NEW TRACKS ---
-        for j, det in enumerate(detections):
             if j not in assigned:
                 trk = Track(det, next_id)
                 next_id += 1
@@ -118,15 +211,17 @@ def process_video(video_path):
         # --- DRAW OUTPUT ---
         for trk in tracks:
-            if len(trk.trace) < 2:
                 continue
-            x,y = map(int,trk.trace[-1])
-            cv2.circle(frame,(x,y),3,(0,255,0),-1)
-            cv2.putText(frame,f"ID:{trk.id}",(x-10,y-10),cv2.FONT_HERSHEY_SIMPLEX,0.4,(0,255,0),1)
-            for i in range(1,len(trk.trace)):
-                cv2.line(frame,(int(trk.trace[i-1][0]),int(trk.trace[i-1][1])),
-                                (int(trk.trace[i][0]),int(trk.trace[i][1])),
-                                (0,255,0),1)
             trajectories[trk.id] = trk.trace
         out.write(frame)
@@ -161,13 +256,17 @@ def run_app(video_file):
     out_path, json_path = process_video(temp_path)
     end = time.time()
     summary = {
-        "total_time_sec": round(end-start,1),
-        "num_tracks": len(json.load(open(json_path))),
-        "avg_fps": round(cv2.VideoCapture(temp_path).get(cv2.CAP_PROP_FPS),2)
     }
-    return out_path, json.load(open(json_path)), summary
 # ---------------------------------------------------------
@@ -180,6 +279,11 @@ This app detects & tracks vehicles using YOLOv8 + Kalman Filter, and outputs:
 - Annotated tracking video
 - JSON trajectories
 - Summary stats for dominant-flow analysis
 """
 example_video = "assets/examples/sample1.mp4" if os.path.exists("assets/examples/sample1.mp4") else None
@@ -198,4 +302,4 @@ demo = gr.Interface(
 )
 if __name__ == "__main__":
-    demo.launch()

 VEHICLE_CLASSES = [2, 3, 5, 7]  # car, motorcycle, bus, truck
+# ---------------------------------------------------------
+# 🔧 HELPER FUNCTIONS
+# ---------------------------------------------------------
+def bbox_centroid(bbox):
+    """xyxy -> (cx, cy)"""
+    x1, y1, x2, y2 = bbox
+    return ( (x1 + x2) / 2.0, (y1 + y2) / 2.0 )
+def iou(boxA, boxB):
+    """Compute IoU between two xyxy boxes."""
+    xA = max(boxA[0], boxB[0])
+    yA = max(boxA[1], boxB[1])
+    xB = min(boxA[2], boxB[2])
+    yB = min(boxA[3], boxB[3])
+    interW = max(0, xB - xA)
+    interH = max(0, yB - yA)
+    interArea = interW * interH
+    if interArea <= 0:
+        return 0.0
+    boxAArea = max(0, (boxA[2] - boxA[0])) * max(0, (boxA[3] - boxA[1]))
+    boxBArea = max(0, (boxB[2] - boxB[0])) * max(0, (boxB[3] - boxB[1]))
+    denom = float(boxAArea + boxBArea - interArea)
+    if denom <= 0:
+        return 0.0
+    return interArea / denom
+def direction_penalty(track, det_cx, det_cy, lambda_dir=30.0):
+    """
+    Penalize assignments that imply a big direction flip.
+    0 = same direction, larger penalty for opposite direction.
+    """
+    if len(track.trace) < 2:
+        return 0.0
+    x_prev, y_prev = track.trace[-2]
+    x_last, y_last = track.trace[-1]
+    v_prev = np.array([x_last - x_prev, y_last - y_prev], dtype=np.float32)
+    v_new = np.array([det_cx - x_last, det_cy - y_last], dtype=np.float32)
+    norm_prev = np.linalg.norm(v_prev)
+    norm_new = np.linalg.norm(v_new)
+    if norm_prev < 1e-3 or norm_new < 1e-3:
+        return 0.0
+    cos_sim = float(np.dot(v_prev, v_new) / (norm_prev * norm_new + 1e-6))
+    # cos_sim in [-1, 1]; we want 0 penalty when cos_sim ~ 1
+    return (1.0 - cos_sim) * lambda_dir
 # ---------------------------------------------------------
 # 🔍 SIMPLE KALMAN TRACKER
 # ---------------------------------------------------------
                               [0,1,0,0]])
         self.kf.P *= 1000.0
         self.kf.R *= 10.0
+        cx, cy = bbox_centroid(bbox)
+        self.kf.x[:2] = np.array([[cx],[cy]])
         self.trace = []
+        self.bbox = np.array(bbox, dtype=np.float32)  # store last bbox
+    def get_centroid(self, bbox):
+        return bbox_centroid(bbox)
     def predict(self):
         self.kf.predict()
         return self.kf.x[:2].reshape(2)
+    def update(self, bbox):
+        """Update KF with new bbox measurement and store trace + bbox."""
+        self.bbox = np.array(bbox, dtype=np.float32)
         z = np.array(self.get_centroid(bbox)).reshape(2,1)
         self.kf.update(z)
+        cx, cy = self.kf.x[:2].reshape(2)
+        self.trace.append((float(cx), float(cy)))
+        return (cx, cy)
 # ---------------------------------------------------------
     frame_count = 0
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    pbar = tqdm(total=total_frames if total_frames > 0 else 100, desc="Processing")
+    # Matching hyperparameters
+    MAX_DIST = 120.0           # hard gate on centroid distance
+    LAMBDA_IOU = 20.0          # weight for IoU term in cost
+    MIN_IOU_FOR_BONUS = 0.05   # if IoU below this, essentially no bonus
     while True:
         ret, frame = cap.read()
         if not ret:
         predicted = [trk.predict() for trk in tracks]
         predicted = np.array(predicted) if predicted else np.empty((0,2))
         assigned = set()
+        # --- ASSIGN DETECTIONS TO TRACKS ---
         if len(predicted) > 0 and len(detections) > 0:
+            detections = np.array(detections, dtype=np.float32)
+            cost = np.full((len(predicted), len(detections)), 1e6, dtype=np.float32)
+            for i, pred_centroid in enumerate(predicted):
+                trk = tracks[i]
                 for j, det in enumerate(detections):
+                    cx, cy = bbox_centroid(det)
+                    dist = np.linalg.norm(pred_centroid - np.array([cx, cy], dtype=np.float32))
+                    # Hard distance gate: don't allow crazy jumps
+                    if dist > MAX_DIST:
+                        continue
+                    # IoU term – prefer boxes overlapping the previous one
+                    if trk.bbox is not None:
+                        iou_val = iou(trk.bbox, det)
+                    else:
+                        iou_val = 0.0
+                    if iou_val < MIN_IOU_FOR_BONUS:
+                        iou_val = 0.0
+                    dir_pen = direction_penalty(trk, cx, cy, lambda_dir=30.0)
+                    # Final cost: lower is better
+                    #   - dist drives proximity
+                    #   - (1 - iou_val) penalizes mismatched shapes/positions
+                    #   - dir_pen penalizes sudden direction flips
+                    cost[i, j] = dist + (1.0 - iou_val) * LAMBDA_IOU + dir_pen
             row_ind, col_ind = linear_sum_assignment(cost)
             for r, c in zip(row_ind, col_ind):
+                # Reject matches that are still effectively "too bad"
+                if cost[r, c] < 1e5:  # anything left at 1e6 was invalid
                     assigned.add(c)
                     tracks[r].update(detections[c])
+        # --- NEW TRACKS FOR UNASSIGNED DETECTIONS ---
+        for j, det in enumerate(detections if len(predicted) > 0 else detections):
             if j not in assigned:
                 trk = Track(det, next_id)
                 next_id += 1
         # --- DRAW OUTPUT ---
         for trk in tracks:
+            if len(trk.trace) < 2:
                 continue
+            x, y = map(int, trk.trace[-1])
+            cv2.circle(frame, (x, y), 3, (0, 255, 0), -1)
+            cv2.putText(frame, f"ID:{trk.id}", (x - 10, y - 10),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1)
+            for i in range(1, len(trk.trace)):
+                cv2.line(frame,
+                         (int(trk.trace[i-1][0]), int(trk.trace[i-1][1])),
+                         (int(trk.trace[i][0]),   int(trk.trace[i][1])),
+                         (0, 255, 0), 1)
             trajectories[trk.id] = trk.trace
         out.write(frame)
     out_path, json_path = process_video(temp_path)
     end = time.time()
+    with open(json_path, "r") as f:
+        traj_data = json.load(f)
+    # avg_fps here = original video FPS (processing FPS will differ)
     summary = {
+        "total_time_sec": round(end - start, 1),
+        "num_tracks": len(traj_data),
+        "avg_fps": round(cv2.VideoCapture(temp_path).get(cv2.CAP_PROP_FPS) or 25, 2)
     }
+    return out_path, traj_data, summary
 # ---------------------------------------------------------
 - Annotated tracking video
 - JSON trajectories
 - Summary stats for dominant-flow analysis
+🔧 Tracking is enhanced with:
+- Kalman motion model
+- Distance + IoU + direction-aware matching
+to reduce ID swaps when vehicles overtake or are very close.
 """
 example_video = "assets/examples/sample1.mp4" if os.path.exists("assets/examples/sample1.mp4") else None
 )
 if __name__ == "__main__":
+    demo.launch()