Spaces:

Roboflow
/

Trackers

Running on T4

App Files Files Community

SkalskiP commited on Feb 25

Commit

5cd39e6

1 Parent(s): af526d5

Add Filter IDs feature, new video examples, and improve code readability

Browse files

Files changed (2) hide show

.gitignore +2 -0
app.py +147 -51

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .idea/
2	+ .gradio/

app.py CHANGED Viewed

@@ -1,8 +1,7 @@
-"""Gradio app for the trackers library — run object tracking on uploaded videos."""
 from __future__ import annotations
 import os
 import tempfile
 from pathlib import Path
@@ -44,7 +43,6 @@ COCO_CLASSES = [
     "sports ball",
 ]
-# Device and model pre-loading
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Loading {len(MODELS)} models on {DEVICE}...")
@@ -54,7 +52,6 @@ for model_id in MODELS:
     LOADED_MODELS[model_id] = AutoModel.from_pretrained(model_id, device=DEVICE)
 print("All models loaded.")
-# Visualization
 COLOR_PALETTE = sv.ColorPalette.from_hex(
     [
         "#ffff00",
@@ -158,6 +155,26 @@ VIDEO_EXAMPLES = [
         0.1,
         0.6,
         [],
         True,
         True,
         False,
@@ -176,6 +193,7 @@ VIDEO_EXAMPLES = [
         0.3,
         0.6,
         [],
         True,
         True,
         False,
@@ -184,21 +202,22 @@ VIDEO_EXAMPLES = [
         True,
     ],
     [
-        "https://storage.googleapis.com/com-roboflow-marketing/supervision/video-examples/cars-1280x720-1.mp4",
-        "rfdetr-small",
-        "bytetrack",
         0.2,
         30,
         0.3,
         3,
         0.1,
         0.6,
-        ["car"],
         True,
         True,
-        False,
         True,
         False,
         False,
     ],
     [
@@ -212,6 +231,7 @@ VIDEO_EXAMPLES = [
         0.1,
         0.6,
         [],
         True,
         True,
         False,
@@ -230,16 +250,55 @@ VIDEO_EXAMPLES = [
         0.1,
         0.6,
         [],
         True,
         True,
         False,
         False,
         True,
         False,
     ],
     [
-        "https://storage.googleapis.com/com-roboflow-marketing/supervision/video-examples/vehicles-1280x720.mp4",
         "rfdetr-small",
         "bytetrack",
         0.2,
         30,
@@ -248,6 +307,7 @@ VIDEO_EXAMPLES = [
         0.1,
         0.6,
         [],
         True,
         True,
         True,
@@ -260,15 +320,15 @@ VIDEO_EXAMPLES = [
 def _get_video_info(path: str) -> tuple[float, int]:
     """Return video duration in seconds and frame count using OpenCV."""
-    cap = cv2.VideoCapture(path)
-    if not cap.isOpened():
         raise gr.Error("Could not open the uploaded video.")
-    fps = cap.get(cv2.CAP_PROP_FPS)
-    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    cap.release()
-    if fps <= 0:
         raise gr.Error("Could not determine video frame rate.")
-    return frame_count / fps, frame_count
 def _resolve_class_filter(
@@ -287,6 +347,32 @@ def _resolve_class_filter(
     return class_filter if class_filter else None
 def track(
     video_path: str,
     model_id: str,
@@ -298,6 +384,7 @@ def track(
     minimum_iou_threshold: float,
     high_conf_det_threshold: float,
     classes: list[str] | None = None,
     show_boxes: bool = True,
     show_ids: bool = True,
     show_labels: bool = False,
@@ -314,17 +401,16 @@ def track(
     if duration > MAX_DURATION_SECONDS:
         raise gr.Error(
             f"Video is {duration:.1f}s long. "
-            f"Maximum allowed duration is {MAX_DURATION_SECONDS}s."
         )
-    # Get pre-loaded model
     detection_model = LOADED_MODELS[model_id]
     class_names = getattr(detection_model, "class_names", [])
-    # Resolve class filter
-    class_filter = _resolve_class_filter(classes, class_names)
-    # Create tracker instance and reset ID counter
     if tracker_type == "bytetrack":
         tracker = ByteTrackTracker(
             lost_track_buffer=lost_track_buffer,
@@ -342,7 +428,6 @@ def track(
         )
     tracker.reset()
-    # Setup annotators
     annotators, label_annotator = _init_annotators(
         show_boxes=show_boxes,
         show_masks=show_masks,
@@ -357,39 +442,38 @@ def track(
             color_lookup=sv.ColorLookup.TRACK,
         )
-    # Setup output
-    tmp_dir = tempfile.mkdtemp()
-    output_path = str(Path(tmp_dir) / "output.mp4")
-    # Get video info for output
     video_info = sv.VideoInfo.from_video_path(video_path)
-    # Process video with progress bar
-    frame_gen = frames_from_source(video_path)
     with sv.VideoSink(output_path, video_info=video_info) as sink:
-        for frame_idx, frame in tqdm(frame_gen, total=total_frames, desc="Processing video..."):
-            # Run detection
             predictions = detection_model(frame)
             if predictions:
                 detections = predictions[0].to_supervision()
-                # Filter by confidence
                 if len(detections) > 0 and detections.confidence is not None:
-                    mask = detections.confidence >= confidence
-                    detections = detections[mask]
-                # Filter by class
-                if class_filter is not None and len(detections) > 0:
-                    mask = np.isin(detections.class_id, class_filter)
-                    detections = detections[mask]
             else:
                 detections = sv.Detections.empty()
-            # Run tracker
             tracked = tracker.update(detections)
-            # Annotate frame
             annotated = frame.copy()
             if trace_annotator is not None:
                 annotated = trace_annotator.annotate(annotated, tracked)
@@ -423,7 +507,7 @@ with gr.Blocks(title="Trackers Playground 🔥") as demo:
         input_video = gr.Video(label="Input Video")
         output_video = gr.Video(label="Tracked Video")
-    track_btn = gr.Button(value="Track", variant="primary")
     with gr.Row():
         model_dropdown = gr.Dropdown(
@@ -455,6 +539,16 @@ with gr.Blocks(title="Trackers Playground 🔥") as demo:
                     label="Filter Classes",
                     info="Only track selected classes. None selected means all.",
                 )
             with gr.Column():
                 gr.Markdown("### Tracker")
@@ -474,7 +568,7 @@ with gr.Blocks(title="Trackers Playground 🔥") as demo:
                     label="Track Activation Threshold",
                     info="Minimum score for a track to be activated.",
                 )
-                min_consecutive_slider = gr.Slider(
                     minimum=1,
                     maximum=10,
                     value=2,
@@ -482,7 +576,7 @@ with gr.Blocks(title="Trackers Playground 🔥") as demo:
                     label="Minimum Consecutive Frames",
                     info="Detections needed before a track is confirmed.",
                 )
-                min_iou_slider = gr.Slider(
                     minimum=0.0,
                     maximum=1.0,
                     value=0.1,
@@ -490,7 +584,7 @@ with gr.Blocks(title="Trackers Playground 🔥") as demo:
                     label="Minimum IoU Threshold",
                     info="Overlap required to match a detection to a track.",
                 )
-                high_conf_slider = gr.Slider(
                     minimum=0.0,
                     maximum=1.0,
                     value=0.6,
@@ -543,10 +637,11 @@ with gr.Blocks(title="Trackers Playground 🔥") as demo:
             confidence_slider,
             lost_track_buffer_slider,
             track_activation_slider,
-            min_consecutive_slider,
-            min_iou_slider,
-            high_conf_slider,
             class_filter,
             show_boxes_checkbox,
             show_ids_checkbox,
             show_labels_checkbox,
@@ -557,7 +652,7 @@ with gr.Blocks(title="Trackers Playground 🔥") as demo:
         outputs=output_video,
     )
-    track_btn.click(
         fn=track,
         inputs=[
             input_video,
@@ -566,10 +661,11 @@ with gr.Blocks(title="Trackers Playground 🔥") as demo:
             confidence_slider,
             lost_track_buffer_slider,
             track_activation_slider,
-            min_consecutive_slider,
-            min_iou_slider,
-            high_conf_slider,
             class_filter,
             show_boxes_checkbox,
             show_ids_checkbox,
             show_labels_checkbox,

 from __future__ import annotations
 import os
+import sys
 import tempfile
 from pathlib import Path
     "sports ball",
 ]
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Loading {len(MODELS)} models on {DEVICE}...")
     LOADED_MODELS[model_id] = AutoModel.from_pretrained(model_id, device=DEVICE)
 print("All models loaded.")
 COLOR_PALETTE = sv.ColorPalette.from_hex(
     [
         "#ffff00",
         0.1,
         0.6,
         [],
+        "",
+        True,
+        True,
+        False,
+        False,
+        True,
+        False,
+    ],
+    [
+        "https://storage.googleapis.com/com-roboflow-marketing/supervision/video-examples/bikes-1280x720-1.mp4",
+        "rfdetr-small",
+        "bytetrack",
+        0.2,
+        30,
+        0.3,
+        3,
+        0.1,
+        0.6,
+        ["person"],
+        "",
         True,
         True,
         False,
         0.3,
         0.6,
         [],
+        "",
         True,
         True,
         False,
         True,
     ],
     [
+        "https://storage.googleapis.com/com-roboflow-marketing/supervision/video-examples/apples-1280x720-2.mp4",
+        "rfdetr-nano",
+        "sort",
         0.2,
         30,
         0.3,
         3,
         0.1,
         0.6,
+        [],
+        "",
         True,
         True,
         True,
         False,
+        True,
         False,
     ],
     [
         0.1,
         0.6,
         [],
+        "",
         True,
         True,
         False,
         0.1,
         0.6,
         [],
+        "",
         True,
         True,
         False,
         False,
         True,
+        True,
+    ],
+    [
+        "https://storage.googleapis.com/com-roboflow-marketing/supervision/video-examples/jets-1280x720-2.mp4",
+        "rfdetr-seg-small",
+        "bytetrack",
+        0.2,
+        30,
+        0.3,
+        3,
+        0.1,
+        0.6,
+        [],
+        "1",
+        True,
+        True,
+        False,
         False,
+        True,
+        True,
     ],
     [
+        "https://storage.googleapis.com/com-roboflow-marketing/supervision/video-examples/suitcases-1280x720-4.mp4",
         "rfdetr-small",
+        "sort",
+        0.2,
+        30,
+        0.3,
+        3,
+        0.1,
+        0.6,
+        [],
+        "",
+        True,
+        True,
+        True,
+        False,
+        True,
+        False,
+    ],
+    [
+        "https://storage.googleapis.com/com-roboflow-marketing/supervision/video-examples/vehicles-1280x720.mp4",
+        "rfdetr-medium",
         "bytetrack",
         0.2,
         30,
         0.1,
         0.6,
         [],
+        "",
         True,
         True,
         True,
 def _get_video_info(path: str) -> tuple[float, int]:
     """Return video duration in seconds and frame count using OpenCV."""
+    video_capture = cv2.VideoCapture(path)
+    if not video_capture.isOpened():
         raise gr.Error("Could not open the uploaded video.")
+    frames_per_second = video_capture.get(cv2.CAP_PROP_FPS)
+    frame_count = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
+    video_capture.release()
+    if frames_per_second <= 0:
         raise gr.Error("Could not determine video frame rate.")
+    return frame_count / frames_per_second, frame_count
 def _resolve_class_filter(
     return class_filter if class_filter else None
+def _resolve_track_id_filter(track_ids_arg: str | None) -> list[int] | None:
+    """Resolve a comma-separated string of track IDs to a list of integers.
+    Args:
+        track_ids_arg: Comma-separated string (e.g. `"1,3,5"`). `None` or
+            empty string means no filter.
+    Returns:
+        List of integer track IDs, or `None` when no valid filter remains.
+    """
+    if not track_ids_arg:
+        return None
+    track_ids: list[int] = []
+    for token in track_ids_arg.split(","):
+        token = token.strip()
+        try:
+            track_ids.append(int(token))
+        except ValueError:
+            print(
+                f"Warning: '{token}' is not a valid track ID, skipping.",
+                file=sys.stderr,
+            )
+    return track_ids if track_ids else None
 def track(
     video_path: str,
     model_id: str,
     minimum_iou_threshold: float,
     high_conf_det_threshold: float,
     classes: list[str] | None = None,
+    track_ids: str = "",
     show_boxes: bool = True,
     show_ids: bool = True,
     show_labels: bool = False,
     if duration > MAX_DURATION_SECONDS:
         raise gr.Error(
             f"Video is {duration:.1f}s long. "
+            f"Maximum allowed duration is {MAX_DURATION_SECONDS}s. "
+            f"Please use the trim tool in the Input Video player to shorten it."
         )
     detection_model = LOADED_MODELS[model_id]
     class_names = getattr(detection_model, "class_names", [])
+    selected_class_ids = _resolve_class_filter(classes, class_names)
+    selected_track_ids = _resolve_track_id_filter(track_ids)
     if tracker_type == "bytetrack":
         tracker = ByteTrackTracker(
             lost_track_buffer=lost_track_buffer,
         )
     tracker.reset()
     annotators, label_annotator = _init_annotators(
         show_boxes=show_boxes,
         show_masks=show_masks,
             color_lookup=sv.ColorLookup.TRACK,
         )
+    temporary_directory = tempfile.mkdtemp()
+    output_path = str(Path(temporary_directory) / "output.mp4")
     video_info = sv.VideoInfo.from_video_path(video_path)
+    frame_generator = frames_from_source(video_path)
     with sv.VideoSink(output_path, video_info=video_info) as sink:
+        for frame_idx, frame in tqdm(
+            frame_generator, total=total_frames, desc="Processing video..."
+        ):
             predictions = detection_model(frame)
             if predictions:
                 detections = predictions[0].to_supervision()
                 if len(detections) > 0 and detections.confidence is not None:
+                    confidence_mask = detections.confidence >= confidence
+                    detections = detections[confidence_mask]
+                if selected_class_ids is not None and len(detections) > 0:
+                    class_mask = np.isin(detections.class_id, selected_class_ids)
+                    detections = detections[class_mask]
             else:
                 detections = sv.Detections.empty()
             tracked = tracker.update(detections)
+            if selected_track_ids is not None and len(tracked) > 0:
+                if tracked.tracker_id is not None:
+                    track_id_mask = np.isin(tracked.tracker_id, selected_track_ids)
+                    tracked = tracked[track_id_mask]
             annotated = frame.copy()
             if trace_annotator is not None:
                 annotated = trace_annotator.annotate(annotated, tracked)
         input_video = gr.Video(label="Input Video")
         output_video = gr.Video(label="Tracked Video")
+    track_button = gr.Button(value="Track", variant="primary")
     with gr.Row():
         model_dropdown = gr.Dropdown(
                     label="Filter Classes",
                     info="Only track selected classes. None selected means all.",
                 )
+                track_id_filter = gr.Textbox(
+                    value="",
+                    label="Filter IDs",
+                    info=(
+                        "Only display tracks with specific track IDs "
+                        "(comma-separated, e.g. 1,3,5). "
+                        "Leave empty for all."
+                    ),
+                    placeholder="e.g. 1,3,5",
+                )
             with gr.Column():
                 gr.Markdown("### Tracker")
                     label="Track Activation Threshold",
                     info="Minimum score for a track to be activated.",
                 )
+                minimum_consecutive_slider = gr.Slider(
                     minimum=1,
                     maximum=10,
                     value=2,
                     label="Minimum Consecutive Frames",
                     info="Detections needed before a track is confirmed.",
                 )
+                minimum_iou_slider = gr.Slider(
                     minimum=0.0,
                     maximum=1.0,
                     value=0.1,
                     label="Minimum IoU Threshold",
                     info="Overlap required to match a detection to a track.",
                 )
+                high_confidence_slider = gr.Slider(
                     minimum=0.0,
                     maximum=1.0,
                     value=0.6,
             confidence_slider,
             lost_track_buffer_slider,
             track_activation_slider,
+            minimum_consecutive_slider,
+            minimum_iou_slider,
+            high_confidence_slider,
             class_filter,
+            track_id_filter,
             show_boxes_checkbox,
             show_ids_checkbox,
             show_labels_checkbox,
         outputs=output_video,
     )
+    track_button.click(
         fn=track,
         inputs=[
             input_video,
             confidence_slider,
             lost_track_buffer_slider,
             track_activation_slider,
+            minimum_consecutive_slider,
+            minimum_iou_slider,
+            high_confidence_slider,
             class_filter,
+            track_id_filter,
             show_boxes_checkbox,
             show_ids_checkbox,
             show_labels_checkbox,