Spaces:

BiasLab2025
/

perception

Running

Zhen Ye Claude Opus 4.6 commited on 8 days ago

Commit

a70bcf5

1 Parent(s): 0d3be57

Add diagnostic logging to pinpoint tracker dropping detections

4 targeted log points across inference.py and utils/tracker.py to
diagnose why first-frame preview shows bounding boxes but the
processed video has none — the tracker is the prime suspect.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (4) hide show

inference.py +13 -0
models/detectors/drone_yolo.py +7 -2
models/detectors/yolov8.py +7 -2
utils/tracker.py +21 -0

inference.py CHANGED Viewed

@@ -947,6 +947,7 @@ def run_inference(
                         depth_stats.update(dep_res.depth_map)
             # --- POST PROCESSING ---
             for i, (idx, frame, dep_res) in enumerate(zip(indices, frames, depth_results)):
                 # 1. Detections — use cached for frame 0 if available
                 detections = []
@@ -958,6 +959,7 @@ def run_inference(
                         detections = _build_detection_records(
                             d_res.boxes, d_res.scores, d_res.labels, queries, d_res.label_names
                         )
                 # 2. Frame Rendering
                 processed = frame.copy()
@@ -981,6 +983,13 @@ def run_inference(
                             raise RuntimeError("Writer thread died unexpectedly")
                         if job_id: _check_cancellation(job_id)
             batch_accum.clear()
             logging.info(f"Worker {gpu_idx} finished flushing batch")
@@ -1152,7 +1161,11 @@ def run_inference(
                         # --- SEQUENTIAL TRACKING ---
                         # Run tracker FIRST so detections get real track_id from ByteTracker
                         dets = tracker.update(dets)
                         speed_est.estimate(dets)
                         # --- RELEVANCE GATE (deterministic, fast — stays in writer) ---

                         depth_stats.update(dep_res.depth_map)
             # --- POST PROCESSING ---
+            batch_det_summary = []
             for i, (idx, frame, dep_res) in enumerate(zip(indices, frames, depth_results)):
                 # 1. Detections — use cached for frame 0 if available
                 detections = []
                         detections = _build_detection_records(
                             d_res.boxes, d_res.scores, d_res.labels, queries, d_res.label_names
                         )
+                batch_det_summary.append((idx, len(detections)))
                 # 2. Frame Rendering
                 processed = frame.copy()
                             raise RuntimeError("Writer thread died unexpectedly")
                         if job_id: _check_cancellation(job_id)
+            total_dets = sum(c for _, c in batch_det_summary)
+            if total_dets == 0 or indices[0] % 90 == 0:
+                logging.info("Worker %d batch [frames %s]: %d total detections %s",
+                             gpu_idx,
+                             f"{indices[0]}-{indices[-1]}",
+                             total_dets,
+                             [(idx, cnt) for idx, cnt in batch_det_summary if cnt > 0])
             batch_accum.clear()
             logging.info(f"Worker {gpu_idx} finished flushing batch")
                         # --- SEQUENTIAL TRACKING ---
                         # Run tracker FIRST so detections get real track_id from ByteTracker
+                        pre_track_count = len(dets)
                         dets = tracker.update(dets)
+                        if (next_idx % 30 == 0) or (pre_track_count > 0 and len(dets) == 0):
+                            logging.info("Writer frame %d: %d detections in -> %d tracked out",
+                                         next_idx, pre_track_count, len(dets))
                         speed_est.estimate(dets)
                         # --- RELEVANCE GATE (deterministic, fast — stays in writer) ---

models/detectors/drone_yolo.py CHANGED Viewed

@@ -20,10 +20,15 @@ class DroneYoloDetector(ObjectDetector):
     def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
         self.name = "drone_yolo"
         self.score_threshold = score_threshold
         if device:
-            self.device = device
         else:
-            self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
         logging.info(
             "Loading drone YOLO from HuggingFace Hub: %s onto %s",
             self.REPO_ID,

     def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
         self.name = "drone_yolo"
         self.score_threshold = score_threshold
+        # CRITICAL: Store device as torch.device, NOT a string.
+        # Ultralytics' select_device() sets CUDA_VISIBLE_DEVICES when it
+        # receives a string like "cuda:0", restricting the entire process to
+        # one GPU.  Passing a torch.device object causes select_device() to
+        # return immediately without touching the environment.
         if device:
+            self.device = torch.device(device)
         else:
+            self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         logging.info(
             "Loading drone YOLO from HuggingFace Hub: %s onto %s",
             self.REPO_ID,

models/detectors/yolov8.py CHANGED Viewed

@@ -21,10 +21,15 @@ class HuggingFaceYoloV8Detector(ObjectDetector):
     def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
         self.name = "hf_yolov8"
         self.score_threshold = score_threshold
         if device:
-            self.device = device
         else:
-            self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
         logging.info(
             "Loading Hugging Face YOLOv8 weights %s/%s onto %s",
             self.REPO_ID,

     def __init__(self, score_threshold: float = 0.3, device: str = None) -> None:
         self.name = "hf_yolov8"
         self.score_threshold = score_threshold
+        # CRITICAL: Store device as torch.device, NOT a string.
+        # Ultralytics' select_device() sets CUDA_VISIBLE_DEVICES when it
+        # receives a string like "cuda:0", restricting the entire process to
+        # one GPU.  Passing a torch.device object causes select_device() to
+        # return immediately without touching the environment.
         if device:
+            self.device = torch.device(device)
         else:
+            self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         logging.info(
             "Loading Hugging Face YOLOv8 weights %s/%s onto %s",
             self.REPO_ID,

utils/tracker.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import numpy as np
 from scipy.optimize import linear_sum_assignment
 import scipy.linalg
@@ -509,14 +510,23 @@ class ByteTracker:
         # 4. Init new tracks from unmatched high score detections
         # Note: Unmatched low score detections are ignored (noise)
         unmatched_dets = [detections[i] for i in u_detection]
         for track in unmatched_dets:
             if track.score < self.new_track_thresh:
                 continue  # Not confident enough to start a new track
             track.activate(self.kalman_filter, self.frame_id)
             activated_stracks.append(track)
             self._sync_data(track, track)  # Sync self
         # 5. Update state
         self.tracked_stracks = [t for t in self.tracked_stracks if t.state == 2]
         self.tracked_stracks = join_stracks(self.tracked_stracks, activated_stracks)
@@ -580,6 +590,17 @@ class ByteTracker:
             results.append(d_out)
         return results
     def _sync_data(self, track, det_source):

+import logging
 import numpy as np
 from scipy.optimize import linear_sum_assignment
 import scipy.linalg
         # 4. Init new tracks from unmatched high score detections
         # Note: Unmatched low score detections are ignored (noise)
         unmatched_dets = [detections[i] for i in u_detection]
+        rejected_by_thresh = 0
         for track in unmatched_dets:
             if track.score < self.new_track_thresh:
+                rejected_by_thresh += 1
                 continue  # Not confident enough to start a new track
             track.activate(self.kalman_filter, self.frame_id)
             activated_stracks.append(track)
             self._sync_data(track, track)  # Sync self
+        if rejected_by_thresh > 0 and self.frame_id <= 5:
+            logging.warning(
+                "Tracker frame %d: %d detections rejected by new_track_thresh=%.2f (scores: %s)",
+                self.frame_id, rejected_by_thresh, self.new_track_thresh,
+                [f"{t.score:.3f}" for t in unmatched_dets if t.score < self.new_track_thresh]
+            )
         # 5. Update state
         self.tracked_stracks = [t for t in self.tracked_stracks if t.state == 2]
         self.tracked_stracks = join_stracks(self.tracked_stracks, activated_stracks)
             results.append(d_out)
+        if self.frame_id % 30 == 0:
+            logging.info(
+                "Tracker frame %d: %d tracked (%d activated), %d lost, %d new this frame, output=%d",
+                self.frame_id,
+                len(self.tracked_stracks),
+                sum(1 for t in self.tracked_stracks if t.is_activated),
+                len(self.lost_stracks),
+                len(activated_stracks),
+                len(results),
+            )
         return results
     def _sync_data(self, track, det_source):